common.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. package common
  2. import (
  3. "net/url"
  4. "strings"
  5. )
  6. func EscapeURL(u string) string {
  7. // Non-http(s) URLs may contain percent symbol outside of the percent-encoded sequences.
  8. // Parsing such URLs will fail with an error.
  9. // To prevent this, we replace all percent symbols with %25.
  10. //
  11. // Also, such URLs may contain a hash symbol (a fragment identifier) or a question mark
  12. // (a query string).
  13. // We replace them with %23 and %3F to make `url.Parse` treat them as a part of the path.
  14. // Since we already replaced all percent symbols, we won't mix up %23/%3F that were in the
  15. // original URL and %23/%3F that appeared after the replacement.
  16. //
  17. // We will revert these replacements in `GetBucketAndKey`.
  18. if !strings.HasPrefix(u, "http://") && !strings.HasPrefix(u, "https://") {
  19. u = strings.ReplaceAll(u, "%", "%25")
  20. u = strings.ReplaceAll(u, "?", "%3F")
  21. u = strings.ReplaceAll(u, "#", "%23")
  22. }
  23. return u
  24. }
  25. func GetBucketAndKey(u *url.URL, sep string) (bucket, key, query string) {
  26. bucket = u.Host
  27. // We can't use u.Path here because `url.Parse` unescapes the original URL's path.
  28. // So we have to use `u.RawPath` if it's available.
  29. // If it is not available, then `u.EscapedPath()` is the same as the original URL's path
  30. // before `url.Parse`.
  31. // See: https://cs.opensource.google/go/go/+/refs/tags/go1.22.4:src/net/url/url.go;l=680
  32. if len(u.RawPath) > 0 {
  33. key = u.RawPath
  34. } else {
  35. key = u.EscapedPath()
  36. }
  37. key = strings.TrimLeft(key, "/")
  38. // We percent-encoded `%`, `#`, and `?` in `EscapeURL` to prevent parsing errors.
  39. // Now we need to revert these replacements.
  40. //
  41. // It's important to revert %25 last because %23/%3F may appear in the original URL and
  42. // we don't want to mix them up.
  43. bucket = strings.ReplaceAll(bucket, "%23", "#")
  44. bucket = strings.ReplaceAll(bucket, "%3F", "?")
  45. bucket = strings.ReplaceAll(bucket, "%25", "%")
  46. key = strings.ReplaceAll(key, "%23", "#")
  47. key = strings.ReplaceAll(key, "%3F", "?")
  48. key = strings.ReplaceAll(key, "%25", "%")
  49. // Cut the query string if it's present.
  50. // Since we replaced `?` with `%3F` in `EscapeURL`, `url.Parse` will treat query
  51. // string as a part of the path.
  52. // Also, query string separator may be different from `?`, so we can't rely on `url.URL.RawQuery`.
  53. if len(sep) > 0 {
  54. key, query, _ = strings.Cut(key, sep)
  55. }
  56. return
  57. }