Explorar el Código

Fix URL parsing error when a non-http(s) URL contains a `%` symbol outside of the percent-encoded sequence

DarthSim hace 5 meses
padre
commit
505faf5452
Se han modificado 3 ficheros con 27 adiciones y 12 borrados
  1. 1 0
      CHANGELOG.md
  2. 15 12
      imagedata/download.go
  3. 11 0
      transport/common/common.go

+ 1 - 0
CHANGELOG.md

@@ -8,6 +8,7 @@
 ### Fixed
 - Fix detecting of width and height of HEIF images that include `irot` boxes.
 - Set `Error` status for errorred traces in OpenTelemetry.
+- Fix URL parsing error when a non-http(s) URL contains a `%` symbol outside of the percent-encoded sequence.
 - (pro) Fix opject detection accuracy when using YOLOv8 or YOLOv10 models.
 - (pro) Fix usage of the `obj` and `objw` gravity types inside the `crop` processing option.
 - (pro) Fix detecting of width and height when orientation is specified in EXIF but EXIF info is not requested.

+ 15 - 12
imagedata/download.go

@@ -7,7 +7,6 @@ import (
 	"io"
 	"net/http"
 	"net/http/cookiejar"
-	"net/url"
 	"strings"
 	"time"
 
@@ -134,23 +133,27 @@ func headersToStore(res *http.Response) map[string]string {
 func BuildImageRequest(ctx context.Context, imageURL string, header http.Header, jar *cookiejar.Jar) (*http.Request, context.CancelFunc, error) {
 	reqCtx, reqCancel := context.WithTimeout(ctx, time.Duration(config.DownloadTimeout)*time.Second)
 
+	// Non-http(s) URLs may contain percent symbol outside of the percent-encoded sequences.
+	// Parsing such URLs will fail with an error.
+	// To prevent this, we replace all percent symbols with %25.
+	//
+	// Also, such URLs may contain a hash symbol, which is a fragment identifier.
+	// We replace them with %23 to prevent cutting off the fragment part.
+	// Since we already replaced all percent symbols, we won't mix up %23 that were in the original URL
+	// and %23 that appeared after the replacement.
+	//
+	// We will revert these replacements in `transport/common.GetBucketAndKey`.
+	if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") {
+		imageURL = strings.ReplaceAll(imageURL, "%", "%25")
+		imageURL = strings.ReplaceAll(imageURL, "#", "%23")
+	}
+
 	req, err := http.NewRequestWithContext(reqCtx, "GET", imageURL, nil)
 	if err != nil {
 		reqCancel()
 		return nil, func() {}, ierrors.New(404, err.Error(), msgSourceImageIsUnreachable)
 	}
 
-	// S3, GCS, etc object keys may contain `#` symbol.
-	// `url.ParseRequestURI` unlike `url.Parse` does not cut-off the fragment part from the URL path.
-	if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
-		u, err := url.ParseRequestURI(imageURL)
-		if err != nil {
-			reqCancel()
-			return nil, func() {}, ierrors.New(404, err.Error(), msgSourceImageIsUnreachable)
-		}
-		req.URL = u
-	}
-
 	if _, ok := enabledSchemes[req.URL.Scheme]; !ok {
 		reqCancel()
 		return nil, func() {}, ierrors.New(

+ 11 - 0
transport/common/common.go

@@ -21,5 +21,16 @@ func GetBucketAndKey(u *url.URL) (bucket, key string) {
 
 	key = strings.TrimLeft(key, "/")
 
+	// We replaced all `%` with `%25` in `imagedata.BuildImageRequest` to prevent parsing errors.
+	// Also, we replaced all `#` with `%23` to prevent cutting off the fragment part.
+	// We need to revert these replacements.
+	//
+	// It's important to revert %23 first because the original URL may also contain %23,
+	// and we don't want to mix them up.
+	bucket = strings.ReplaceAll(bucket, "%23", "#")
+	bucket = strings.ReplaceAll(bucket, "%25", "%")
+	key = strings.ReplaceAll(key, "%23", "#")
+	key = strings.ReplaceAll(key, "%25", "%")
+
 	return
 }