download.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. package imagedata
  2. import (
  3. "compress/gzip"
  4. "context"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "net/http/cookiejar"
  9. "strings"
  10. "time"
  11. "github.com/imgproxy/imgproxy/v3/config"
  12. "github.com/imgproxy/imgproxy/v3/ierrors"
  13. "github.com/imgproxy/imgproxy/v3/security"
  14. defaultTransport "github.com/imgproxy/imgproxy/v3/transport"
  15. azureTransport "github.com/imgproxy/imgproxy/v3/transport/azure"
  16. fsTransport "github.com/imgproxy/imgproxy/v3/transport/fs"
  17. gcsTransport "github.com/imgproxy/imgproxy/v3/transport/gcs"
  18. s3Transport "github.com/imgproxy/imgproxy/v3/transport/s3"
  19. swiftTransport "github.com/imgproxy/imgproxy/v3/transport/swift"
  20. )
  21. var (
  22. downloadClient *http.Client
  23. enabledSchemes = map[string]struct{}{
  24. "http": {},
  25. "https": {},
  26. }
  27. imageHeadersToStore = []string{
  28. "Cache-Control",
  29. "Expires",
  30. "ETag",
  31. "Last-Modified",
  32. }
  33. // For tests
  34. redirectAllRequestsTo string
  35. )
  36. const msgSourceImageIsUnreachable = "Source image is unreachable"
  37. type DownloadOptions struct {
  38. Header http.Header
  39. CookieJar *cookiejar.Jar
  40. }
  41. type ErrorNotModified struct {
  42. Message string
  43. Headers map[string]string
  44. }
  45. func (e *ErrorNotModified) Error() string {
  46. return e.Message
  47. }
  48. func initDownloading() error {
  49. transport, err := defaultTransport.New(true)
  50. if err != nil {
  51. return err
  52. }
  53. registerProtocol := func(scheme string, rt http.RoundTripper) {
  54. transport.RegisterProtocol(scheme, rt)
  55. enabledSchemes[scheme] = struct{}{}
  56. }
  57. if config.LocalFileSystemRoot != "" {
  58. registerProtocol("local", fsTransport.New())
  59. }
  60. if config.S3Enabled {
  61. if t, err := s3Transport.New(); err != nil {
  62. return err
  63. } else {
  64. registerProtocol("s3", t)
  65. }
  66. }
  67. if config.GCSEnabled {
  68. if t, err := gcsTransport.New(); err != nil {
  69. return err
  70. } else {
  71. registerProtocol("gs", t)
  72. }
  73. }
  74. if config.ABSEnabled {
  75. if t, err := azureTransport.New(); err != nil {
  76. return err
  77. } else {
  78. registerProtocol("abs", t)
  79. }
  80. }
  81. if config.SwiftEnabled {
  82. if t, err := swiftTransport.New(); err != nil {
  83. return err
  84. } else {
  85. registerProtocol("swift", t)
  86. }
  87. }
  88. downloadClient = &http.Client{
  89. Transport: transport,
  90. CheckRedirect: func(req *http.Request, via []*http.Request) error {
  91. redirects := len(via)
  92. if redirects >= config.MaxRedirects {
  93. return fmt.Errorf("stopped after %d redirects", redirects)
  94. }
  95. return nil
  96. },
  97. }
  98. return nil
  99. }
  100. func headersToStore(res *http.Response) map[string]string {
  101. m := make(map[string]string)
  102. for _, h := range imageHeadersToStore {
  103. if val := res.Header.Get(h); len(val) != 0 {
  104. m[h] = val
  105. }
  106. }
  107. return m
  108. }
  109. func BuildImageRequest(ctx context.Context, imageURL string, header http.Header, jar *cookiejar.Jar) (*http.Request, context.CancelFunc, error) {
  110. reqCtx, reqCancel := context.WithTimeout(ctx, time.Duration(config.DownloadTimeout)*time.Second)
  111. // Non-http(s) URLs may contain percent symbol outside of the percent-encoded sequences.
  112. // Parsing such URLs will fail with an error.
  113. // To prevent this, we replace all percent symbols with %25.
  114. //
  115. // Also, such URLs may contain a hash symbol, which is a fragment identifier.
  116. // We replace them with %23 to prevent cutting off the fragment part.
  117. // Since we already replaced all percent symbols, we won't mix up %23 that were in the original URL
  118. // and %23 that appeared after the replacement.
  119. //
  120. // We will revert these replacements in `transport/common.GetBucketAndKey`.
  121. if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") {
  122. imageURL = strings.ReplaceAll(imageURL, "%", "%25")
  123. imageURL = strings.ReplaceAll(imageURL, "#", "%23")
  124. }
  125. req, err := http.NewRequestWithContext(reqCtx, "GET", imageURL, nil)
  126. if err != nil {
  127. reqCancel()
  128. return nil, func() {}, ierrors.New(404, err.Error(), msgSourceImageIsUnreachable)
  129. }
  130. if _, ok := enabledSchemes[req.URL.Scheme]; !ok {
  131. reqCancel()
  132. return nil, func() {}, ierrors.New(
  133. 404,
  134. fmt.Sprintf("Unknown scheme: %s", req.URL.Scheme),
  135. msgSourceImageIsUnreachable,
  136. )
  137. }
  138. if jar != nil {
  139. for _, cookie := range jar.Cookies(req.URL) {
  140. req.AddCookie(cookie)
  141. }
  142. }
  143. req.Header.Set("User-Agent", config.UserAgent)
  144. for k, v := range header {
  145. if len(v) > 0 {
  146. req.Header.Set(k, v[0])
  147. }
  148. }
  149. return req, reqCancel, nil
  150. }
  151. func SendRequest(req *http.Request) (*http.Response, error) {
  152. var client *http.Client
  153. if req.URL.Scheme == "http" || req.URL.Scheme == "https" {
  154. clientCopy := *downloadClient
  155. jar, err := cookiejar.New(nil)
  156. if err != nil {
  157. return nil, err
  158. }
  159. clientCopy.Jar = jar
  160. client = &clientCopy
  161. } else {
  162. client = downloadClient
  163. }
  164. for {
  165. res, err := client.Do(req)
  166. if err == nil {
  167. return res, nil
  168. }
  169. if res != nil && res.Body != nil {
  170. res.Body.Close()
  171. }
  172. if strings.Contains(err.Error(), "client connection lost") {
  173. select {
  174. case <-req.Context().Done():
  175. return nil, err
  176. case <-time.After(100 * time.Microsecond):
  177. continue
  178. }
  179. }
  180. return nil, wrapError(err)
  181. }
  182. }
  183. func requestImage(ctx context.Context, imageURL string, opts DownloadOptions) (*http.Response, context.CancelFunc, error) {
  184. req, reqCancel, err := BuildImageRequest(ctx, imageURL, opts.Header, opts.CookieJar)
  185. if err != nil {
  186. reqCancel()
  187. return nil, func() {}, err
  188. }
  189. res, err := SendRequest(req)
  190. if err != nil {
  191. reqCancel()
  192. return nil, func() {}, err
  193. }
  194. if res.StatusCode == http.StatusNotModified {
  195. res.Body.Close()
  196. reqCancel()
  197. return nil, func() {}, &ErrorNotModified{Message: "Not Modified", Headers: headersToStore(res)}
  198. }
  199. if res.StatusCode != 200 {
  200. body, _ := io.ReadAll(res.Body)
  201. res.Body.Close()
  202. reqCancel()
  203. status := 404
  204. if res.StatusCode >= 500 {
  205. status = 500
  206. }
  207. msg := fmt.Sprintf("Status: %d; %s", res.StatusCode, string(body))
  208. return nil, func() {}, ierrors.New(status, msg, msgSourceImageIsUnreachable)
  209. }
  210. return res, reqCancel, nil
  211. }
  212. func download(ctx context.Context, imageURL string, opts DownloadOptions, secopts security.Options) (*ImageData, error) {
  213. // We use this for testing
  214. if len(redirectAllRequestsTo) > 0 {
  215. imageURL = redirectAllRequestsTo
  216. }
  217. res, reqCancel, err := requestImage(ctx, imageURL, opts)
  218. defer reqCancel()
  219. if res != nil {
  220. defer res.Body.Close()
  221. }
  222. if err != nil {
  223. return nil, err
  224. }
  225. body := res.Body
  226. contentLength := int(res.ContentLength)
  227. if res.Header.Get("Content-Encoding") == "gzip" {
  228. gzipBody, errGzip := gzip.NewReader(res.Body)
  229. if gzipBody != nil {
  230. defer gzipBody.Close()
  231. }
  232. if errGzip != nil {
  233. return nil, err
  234. }
  235. body = gzipBody
  236. contentLength = 0
  237. }
  238. imgdata, err := readAndCheckImage(body, contentLength, secopts)
  239. if err != nil {
  240. return nil, ierrors.Wrap(err, 0)
  241. }
  242. imgdata.Headers = headersToStore(res)
  243. return imgdata, nil
  244. }
  245. func RedirectAllRequestsTo(u string) {
  246. redirectAllRequestsTo = u
  247. }
  248. func StopRedirectingRequests() {
  249. redirectAllRequestsTo = ""
  250. }