Viktor Sokolov преди 2 месеца
родител
ревизия
bd7de31f18
променени са 8 файла, в които са добавени 546 реда и са изтрити 0 реда
  1. 114 0
      imagedetect/detect.go
  2. 41 0
      imagedetect/detect_test.go
  3. 23 0
      imagedetect/errors.go
  4. 75 0
      imagedetect/registry.go
  5. 43 0
      imagedetect/registry_test.go
  6. 35 0
      imagedetect/svg.go
  7. 89 0
      stemext/stem_ext.go
  8. 126 0
      stemext/stem_ext_test.go

+ 114 - 0
imagedetect/detect.go

@@ -0,0 +1,114 @@
+package imagedetect
+
+import (
+	"io"
+
+	"github.com/imgproxy/imgproxy/v3/imagetype_new"
+)
+
+// Detect attempts to detect the image type from a reader.
+// It first tries magic byte detection, then custom detectors in registration order
+func Detect(r io.Reader) (imagetype_new.Type, error) {
+	// Start with 64 bytes to cover magic bytes
+	buf := make([]byte, 64)
+
+	n, err := io.ReadFull(r, buf)
+	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+		return imagetype_new.Unknown, err
+	}
+
+	data := buf[:n]
+
+	// First try magic byte detection
+	for _, magic := range registry.magicBytes {
+		if hasMagicBytes(data, magic) {
+			return magic.Type, nil
+		}
+	}
+
+	// Then try custom detectors
+	for _, detector := range registry.detectors {
+		// Check if we have enough bytes for this detector
+		if len(data) < detector.BytesNeeded {
+			// Need to read more data
+			additionalBytes := detector.BytesNeeded - len(data)
+			extraBuf := make([]byte, additionalBytes)
+			extraN, err := io.ReadFull(r, extraBuf)
+
+			// It's fine if we can't read required number of bytes
+			if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+				return imagetype_new.Unknown, err
+			}
+
+			// Extend our data buffer
+			data = append(data, extraBuf[:extraN]...)
+		}
+
+		if typ, err := detector.Func(data); err == nil && typ != imagetype_new.Unknown {
+			return typ, nil
+		}
+	}
+
+	return imagetype_new.Unknown, newUnknownFormatError()
+}
+
+// hasMagicBytes checks if the data matches a magic byte signature
+// Supports '?' characters in signature which match any byte
+func hasMagicBytes(data []byte, magic MagicBytes) bool {
+	if len(data) < len(magic.Signature) {
+		return false
+	}
+
+	for i, c := range magic.Signature {
+		if c != data[i] && c != '?' {
+			return false
+		}
+	}
+	return true
+}
+
+// init registers default magic bytes for common image formats
+func init() {
+	// JPEG magic bytes
+	RegisterMagicBytes([]byte("\xff\xd8"), imagetype_new.JPEG)
+
+	// JXL magic bytes
+	//
+	// NOTE: for "naked" jxl (0xff 0x0a) there is no way to ensure this is a JXL file, except to fully
+	// decode it. The data starts right after it, no additional marker bytes are provided.
+	// We stuck with the potential false positives here.
+	RegisterMagicBytes([]byte{0xff, 0x0a}, imagetype_new.JXL)                                                             // JXL codestream (can't use string due to 0x0a)
+	RegisterMagicBytes([]byte{0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A}, imagetype_new.JXL) // JXL container (has null bytes)
+
+	// PNG magic bytes
+	RegisterMagicBytes([]byte("\x89PNG\r\n\x1a\n"), imagetype_new.PNG)
+
+	// WEBP magic bytes (RIFF container with WEBP fourcc) - using wildcard for size
+	RegisterMagicBytes([]byte("RIFF????WEBP"), imagetype_new.WEBP)
+
+	// GIF magic bytes
+	RegisterMagicBytes([]byte("GIF8?a"), imagetype_new.GIF)
+
+	// ICO magic bytes
+	RegisterMagicBytes([]byte{0, 0, 1, 0}, imagetype_new.ICO) // ICO (has null bytes)
+
+	// HEIC/HEIF magic bytes with wildcards for size
+	RegisterMagicBytes([]byte("????ftypheic"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftypheix"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftyphevc"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftypheim"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftypheis"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftyphevm"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftyphevs"), imagetype_new.HEIC)
+	RegisterMagicBytes([]byte("????ftypmif1"), imagetype_new.HEIC)
+
+	// AVIF magic bytes
+	RegisterMagicBytes([]byte("????ftypavif"), imagetype_new.AVIF)
+
+	// BMP magic bytes
+	RegisterMagicBytes([]byte("BM"), imagetype_new.BMP)
+
+	// TIFF magic bytes (little-endian and big-endian)
+	RegisterMagicBytes([]byte("II*\x00"), imagetype_new.TIFF) // Little-endian
+	RegisterMagicBytes([]byte("MM\x00*"), imagetype_new.TIFF) // Big-endian
+}

+ 41 - 0
imagedetect/detect_test.go

@@ -0,0 +1,41 @@
+package imagedetect
+
+import (
+	"os"
+	"testing"
+
+	"github.com/imgproxy/imgproxy/v3/imagetype_new"
+	"github.com/stretchr/testify/require"
+)
+
+func TestDetect(t *testing.T) {
+	tests := []struct {
+		name string
+		file string
+		want imagetype_new.Type
+	}{
+		{"JPEG", "../testdata/test-images/jpg/jpg.jpg", imagetype_new.JPEG},
+		{"JXL", "../testdata/test-images/jxl/jxl.jxl", imagetype_new.JXL},
+		{"PNG", "../testdata/test-images/png/png.png", imagetype_new.PNG},
+		{"WEBP", "../testdata/test-images/webp/webp.webp", imagetype_new.WEBP},
+		{"GIF", "../testdata/test-images/gif/gif.gif", imagetype_new.GIF},
+		{"ICO", "../testdata/test-images/ico/png-256x256.ico", imagetype_new.ICO},
+		{"SVG", "../testdata/test-images/svg/svg.svg", imagetype_new.SVG},
+		{"HEIC", "../testdata/test-images/heif/heif.heif", imagetype_new.HEIC},
+		{"BMP", "../testdata/test-images/bmp/24-bpp.bmp", imagetype_new.BMP},
+		{"TIFF", "../testdata/test-images/tiff/tiff.tiff", imagetype_new.TIFF},
+		{"SVG", "../testdata/test-images/svg/svg.svg", imagetype_new.SVG},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			f, err := os.Open(tt.file)
+			require.NoError(t, err)
+			defer f.Close()
+
+			got, err := Detect(f)
+			require.NoError(t, err)
+			require.Equal(t, tt.want, got)
+		})
+	}
+}

+ 23 - 0
imagedetect/errors.go

@@ -0,0 +1,23 @@
+package imagedetect
+
+import (
+	"net/http"
+
+	"github.com/imgproxy/imgproxy/v3/ierrors"
+)
+
+type (
+	UnknownFormatError struct{}
+)
+
+func newUnknownFormatError() error {
+	return ierrors.Wrap(
+		UnknownFormatError{},
+		1,
+		ierrors.WithStatusCode(http.StatusUnprocessableEntity),
+		ierrors.WithPublicMessage("Invalid source image"),
+		ierrors.WithShouldReport(false),
+	)
+}
+
+func (e UnknownFormatError) Error() string { return "Source image type not supported" }

+ 75 - 0
imagedetect/registry.go

@@ -0,0 +1,75 @@
+package imagedetect
+
+import (
+	"sync"
+
+	"github.com/imgproxy/imgproxy/v3/imagetype_new"
+)
+
+// DetectFunc is a function that detects the image type from byte data
+type DetectFunc func(b []byte) (imagetype_new.Type, error)
+
+// MagicBytes represents a magic byte signature for image type detection
+// Signature can contain '?' characters which match any byte
+type MagicBytes struct {
+	Signature []byte
+	Type      imagetype_new.Type
+}
+
+// Detector represents a registered detector function with its byte requirements
+type Detector struct {
+	Func        DetectFunc
+	BytesNeeded int
+}
+
+// Registry manages the registration and execution of image type detectors
+type Registry struct {
+	mu         sync.RWMutex
+	detectors  []Detector
+	magicBytes []MagicBytes
+}
+
+// Global registry instance
+var registry = &Registry{}
+
+// RegisterDetector registers a custom detector function
+// Detectors are tried in the order they were registered
+func RegisterDetector(detector DetectFunc, bytesNeeded int) {
+	registry.mu.Lock()
+	defer registry.mu.Unlock()
+	registry.detectors = append(registry.detectors, Detector{
+		Func:        detector,
+		BytesNeeded: bytesNeeded,
+	})
+}
+
+// RegisterMagicBytes registers magic bytes for a specific image type
+// Magic byte detectors are always tried before custom detectors
+func RegisterMagicBytes(signature []byte, typ imagetype_new.Type) {
+	registry.mu.Lock()
+	defer registry.mu.Unlock()
+	registry.magicBytes = append(registry.magicBytes, MagicBytes{
+		Signature: signature,
+		Type:      typ,
+	})
+}
+
+// RegisterDetector registers a custom detector function on this registry instance
+func (r *Registry) RegisterDetector(detector DetectFunc, bytesNeeded int) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.detectors = append(r.detectors, Detector{
+		Func:        detector,
+		BytesNeeded: bytesNeeded,
+	})
+}
+
+// RegisterMagicBytes registers magic bytes for a specific image type on this registry instance
+func (r *Registry) RegisterMagicBytes(signature []byte, typ imagetype_new.Type) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.magicBytes = append(r.magicBytes, MagicBytes{
+		Signature: signature,
+		Type:      typ,
+	})
+}

+ 43 - 0
imagedetect/registry_test.go

@@ -0,0 +1,43 @@
+package imagedetect
+
+import (
+	"testing"
+
+	"github.com/imgproxy/imgproxy/v3/imagetype_new"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRegisterDetector(t *testing.T) {
+	// Create a test registry to avoid interfering with global state
+	testRegistry := &Registry{}
+
+	// Create a test detector function
+	testDetector := func(data []byte) (imagetype_new.Type, error) {
+		if len(data) >= 2 && data[0] == 0xFF && data[1] == 0xD8 {
+			return imagetype_new.JPEG, nil
+		}
+		return imagetype_new.Unknown, newUnknownFormatError()
+	}
+
+	// Register the detector using the method
+	testRegistry.RegisterDetector(testDetector, 64)
+
+	// Verify the detector is registered
+	require.Len(t, testRegistry.detectors, 1)
+	require.Equal(t, 64, testRegistry.detectors[0].BytesNeeded)
+	require.NotNil(t, testRegistry.detectors[0].Func)
+}
+
+func TestRegisterMagicBytes(t *testing.T) {
+	// Create a test registry to avoid interfering with global state
+	testRegistry := &Registry{}
+
+	// Register magic bytes for JPEG using the method
+	jpegMagic := []byte{0xFF, 0xD8}
+	testRegistry.RegisterMagicBytes(jpegMagic, imagetype_new.JPEG)
+
+	// Verify the magic bytes are registered
+	require.Len(t, testRegistry.magicBytes, 1)
+	require.Equal(t, jpegMagic, testRegistry.magicBytes[0].Signature)
+	require.Equal(t, imagetype_new.JPEG, testRegistry.magicBytes[0].Type)
+}

+ 35 - 0
imagedetect/svg.go

@@ -0,0 +1,35 @@
+package imagedetect
+
+import (
+	"bytes"
+	"strings"
+
+	"github.com/imgproxy/imgproxy/v3/imagetype_new"
+
+	"github.com/tdewolff/parse/v2"
+	"github.com/tdewolff/parse/v2/xml"
+)
+
+func init() {
+	// Register SVG detector (needs at least 1000 bytes to reliably detect SVG)
+	RegisterDetector(IsSVG, 1000)
+}
+
+func IsSVG(b []byte) (imagetype_new.Type, error) {
+	l := xml.NewLexer(parse.NewInput(bytes.NewReader(b)))
+
+	for {
+		tt, _ := l.Next()
+
+		switch tt {
+		case xml.ErrorToken:
+			return imagetype_new.Unknown, nil
+
+		case xml.StartTagToken:
+			tag := strings.ToLower(string(l.Text()))
+			if tag == "svg" || tag == "svg:svg" {
+				return imagetype_new.SVG, nil
+			}
+		}
+	}
+}

+ 89 - 0
stemext/stem_ext.go

@@ -0,0 +1,89 @@
+// stemext package provides methods which help to generate correct
+// content-disposition header.
+package contentdisposition
+
+import (
+	"mime"
+	"net/url"
+	"path/filepath"
+)
+
+const (
+	// fallbackStem is used when the stem cannot be determined from the URL.
+	fallbackStem = "image"
+)
+
+// StemExt helps to detect correct stem and ext for content-disposition header.
+type StemExt struct {
+	stem string
+	ext  string
+}
+
+// FromURL creates a new StemExt instance from the provided URL.
+// Returns a value type to avoid heap allocation.
+func FromURL(url *url.URL) StemExt {
+	_, filename := filepath.Split(url.Path)
+	ext := filepath.Ext(filename)
+
+	// Avoid strings.TrimSuffix allocation by using slice operation
+	var stem string
+	if ext != "" {
+		stem = filename[:len(filename)-len(ext)]
+	} else {
+		stem = filename
+	}
+
+	return StemExt{
+		stem: stem,
+		ext:  ext,
+	}
+}
+
+// SetExtFromContentTypeIfEmpty sets the ext field based on the provided content type.
+// Uses pointer receiver for zero-copy method chaining.
+func (cd *StemExt) SetExtFromContentTypeIfEmpty(contentType string) *StemExt {
+	if len(contentType) == 0 || len(cd.ext) > 0 {
+		return cd
+	}
+
+	if exts, err := mime.ExtensionsByType(contentType); err == nil && len(exts) != 0 {
+		cd.ext = exts[0]
+	}
+
+	return cd
+}
+
+// OverrideExt sets the ext field if the provided ext is not empty.
+// Uses pointer receiver for zero-copy method chaining.
+func (cd *StemExt) OverrideExt(ext string) *StemExt {
+	if len(ext) > 0 {
+		cd.ext = ext
+	}
+
+	return cd
+}
+
+// OverrideStem sets the stem field if the provided stem is not empty.
+// Uses pointer receiver for zero-copy method chaining.
+func (cd *StemExt) OverrideStem(stem string) *StemExt {
+	if len(stem) > 0 {
+		cd.stem = stem
+	}
+
+	return cd
+}
+
+// StemExtWithFallback returns stem and ext, but if stem is empty, it uses a fallback value.
+func (cd StemExt) StemExtWithFallback() (string, string) {
+	stem := cd.stem
+	if len(stem) == 0 {
+		stem = fallbackStem
+	}
+
+	return stem, cd.ext
+}
+
+// StemExt returns the tuple of stem and ext.
+func (cd StemExt) StemExt() (string, string) {
+	return cd.stem, cd.ext
+}

+ 126 - 0
stemext/stem_ext_test.go

@@ -0,0 +1,126 @@
+package contentdisposition
+
+import (
+	"net/url"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestStemExt(t *testing.T) {
+	// Test cases for stem and ext detection
+	tests := []struct {
+		name string
+		url  string
+		stem string
+		ext  string
+		fn   func(StemExt) (string, string)
+	}{
+		{
+			name: "BasicURL",
+			url:  "http://example.com/test.jpg",
+			stem: "test",
+			ext:  ".jpg",
+			fn: func(se StemExt) (string, string) {
+				return se.StemExt()
+			},
+		},
+		{
+			name: "EmptyFilename",
+			url:  "http://example.com/path/to/",
+			stem: "",
+			ext:  "",
+			fn: func(se StemExt) (string, string) {
+				return se.StemExt()
+			},
+		},
+		{
+			name: "EmptyFilenameWithContentType",
+			url:  "http://example.com/path/to/",
+			stem: "",
+			ext:  ".png",
+			fn: func(se StemExt) (string, string) {
+				return (&se).SetExtFromContentTypeIfEmpty("image/png").StemExt()
+			},
+		},
+		{
+			name: "EmptyFilenameWithContentTypeAndOverride",
+			url:  "http://example.com/path/to/",
+			stem: "example",
+			ext:  ".png",
+			fn: func(se StemExt) (string, string) {
+				return (&se).OverrideStem("example").SetExtFromContentTypeIfEmpty("image/png").StemExt()
+			},
+		},
+		{
+			name: "EmptyFilenameWithOverride",
+			url:  "http://example.com/path/to/",
+			stem: "example",
+			ext:  ".jpg",
+			fn: func(se StemExt) (string, string) {
+				return (&se).OverrideStem("example").OverrideExt(".jpg").StemExt()
+			},
+		},
+		{
+			name: "PresentFilenameWithOverride",
+			url:  "http://example.com/path/to/face",
+			stem: "face",
+			ext:  ".jpg",
+			fn: func(se StemExt) (string, string) {
+				return (&se).OverrideExt(".jpg").StemExt()
+			},
+		},
+		{
+			name: "PresentFilenameWithOverride",
+			url:  "http://example.com/path/to/123",
+			stem: "face",
+			ext:  ".jpg",
+			fn: func(se StemExt) (string, string) {
+				return (&se).OverrideStem("face").OverrideExt(".jpg").StemExt()
+			},
+		},
+		{
+			name: "EmptyFilenameWithFallback",
+			url:  "http://example.com/path/to/",
+			stem: "image",
+			ext:  ".png",
+			fn: func(se StemExt) (string, string) {
+				return (&se).SetExtFromContentTypeIfEmpty("image/png").StemExtWithFallback()
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			u, err := url.Parse(tc.url)
+			require.NoError(t, err)
+
+			se := FromURL(u)
+			stem, ext := tc.fn(se)
+
+			require.Equal(t, tc.stem, stem)
+			require.Equal(t, tc.ext, ext)
+		})
+	}
+}
+
+func BenchmarkFromURL(b *testing.B) {
+	u, _ := url.Parse("http://example.com/path/to/test.jpg")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		se := FromURL(u)
+		_, _ = se.StemExt()
+	}
+}
+
+func BenchmarkMethodChaining(b *testing.B) {
+	u, _ := url.Parse("http://example.com/path/to/")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		se := FromURL(u)
+		(&se).SetExtFromContentTypeIfEmpty("image/png").OverrideStem("example")
+		_, _ = se.StemExtWithFallback()
+	}
+}