Kaynağa Gözat

Better error metrics

DarthSim 2 yıl önce
ebeveyn
işleme
20039f49a5

+ 4 - 15
metrics/datadog/datadog.go

@@ -2,16 +2,15 @@ package datadog
 
 import (
 	"context"
-	"errors"
 	"net/http"
 	"os"
-	"time"
 
 	log "github.com/sirupsen/logrus"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
 
 	"github.com/imgproxy/imgproxy/v3/config"
+	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
 	"github.com/imgproxy/imgproxy/v3/version"
 )
 
@@ -79,24 +78,14 @@ func StartSpan(ctx context.Context, name string) context.CancelFunc {
 	return func() {}
 }
 
-func SendError(ctx context.Context, err error) {
+func SendError(ctx context.Context, errType string, err error) {
 	if !enabled {
 		return
 	}
 
 	if rootSpan, ok := ctx.Value(spanCtxKey{}).(tracer.Span); ok {
-		rootSpan.Finish(tracer.WithError(err))
-	}
-}
-
-func SendTimeout(ctx context.Context, d time.Duration) {
-	if !enabled {
-		return
-	}
-
-	if rootSpan, ok := ctx.Value(spanCtxKey{}).(tracer.Span); ok {
-		rootSpan.SetTag("timeout_duration", d)
-		rootSpan.Finish(tracer.WithError(errors.New("Timeout")))
+		rootSpan.SetTag(ext.Error, err)
+		rootSpan.SetTag(ext.ErrorType, errformat.FormatErrType(errType, err))
 	}
 }
 

+ 21 - 0
metrics/errformat/errformat.go

@@ -0,0 +1,21 @@
+package errformat
+
+import (
+	"fmt"
+	"reflect"
+
+	"github.com/sirupsen/logrus"
+
+	"github.com/imgproxy/imgproxy/v3/ierrors"
+)
+
+func FormatErrType(errType string, err error) string {
+	errType += "_error"
+
+	if _, ok := err.(*ierrors.Error); !ok {
+		errType = fmt.Sprintf("%s (%s)", errType, reflect.TypeOf(err).String())
+		logrus.Warnf("ErrType: %s", errType)
+	}
+
+	return errType
+}

+ 2 - 9
metrics/metrics.go

@@ -3,7 +3,6 @@ package metrics
 import (
 	"context"
 	"net/http"
-	"time"
 
 	"github.com/imgproxy/imgproxy/v3/metrics/datadog"
 	"github.com/imgproxy/imgproxy/v3/metrics/newrelic"
@@ -76,12 +75,6 @@ func StartProcessingSegment(ctx context.Context) context.CancelFunc {
 
 func SendError(ctx context.Context, errType string, err error) {
 	prometheus.IncrementErrorsTotal(errType)
-	newrelic.SendError(ctx, err)
-	datadog.SendError(ctx, err)
-}
-
-func SendTimeout(ctx context.Context, d time.Duration) {
-	prometheus.IncrementErrorsTotal("timeout")
-	newrelic.SendTimeout(ctx, d)
-	datadog.SendTimeout(ctx, d)
+	newrelic.SendError(ctx, errType, err)
+	datadog.SendError(ctx, errType, err)
 }

+ 4 - 17
metrics/newrelic/newrelic.go

@@ -4,9 +4,9 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"time"
 
 	"github.com/imgproxy/imgproxy/v3/config"
+	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
 	"github.com/newrelic/go-agent/v3/newrelic"
 )
 
@@ -78,28 +78,15 @@ func StartSegment(ctx context.Context, name string) context.CancelFunc {
 	return func() {}
 }
 
-func SendError(ctx context.Context, err error) {
-	if !enabled {
-		return
-	}
-
-	if txn, ok := ctx.Value(transactionCtxKey{}).(*newrelic.Transaction); ok {
-		txn.NoticeError(err)
-	}
-}
-
-func SendTimeout(ctx context.Context, d time.Duration) {
+func SendError(ctx context.Context, errType string, err error) {
 	if !enabled {
 		return
 	}
 
 	if txn, ok := ctx.Value(transactionCtxKey{}).(*newrelic.Transaction); ok {
 		txn.NoticeError(newrelic.Error{
-			Message: "Timeout",
-			Class:   "Timeout",
-			Attributes: map[string]interface{}{
-				"time": d.Seconds(),
-			},
+			Message: err.Error(),
+			Class:   errformat.FormatErrType(errType, err),
 		})
 	}
 }

+ 4 - 1
processing/pipeline.go

@@ -53,7 +53,10 @@ func (p pipeline) Run(ctx context.Context, img *vips.Image, po *options.Processi
 		if err := step(&pctx, img, po, imgdata); err != nil {
 			return err
 		}
-		router.CheckTimeout(ctx)
+
+		if err := router.CheckTimeout(ctx); err != nil {
+			return err
+		}
 	}
 
 	return nil

+ 3 - 1
processing/processing.go

@@ -214,7 +214,9 @@ func saveImageToFitBytes(ctx context.Context, po *options.ProcessingOptions, img
 		}
 		imgdata.Close()
 
-		router.CheckTimeout(ctx)
+		if err := router.CheckTimeout(ctx); err != nil {
+			return nil, err
+		}
 
 		delta := float64(len(imgdata.Data)) / float64(po.MaxBytes)
 		switch {

+ 52 - 23
processing_handler.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"net/http"
 	"net/http/cookiejar"
@@ -144,6 +145,33 @@ func respondWithNotModified(reqID string, r *http.Request, rw http.ResponseWrite
 	)
 }
 
+func sendErrAndPanic(ctx context.Context, errType string, err error) {
+	send := true
+
+	if ierr, ok := err.(*ierrors.Error); ok {
+		switch ierr.StatusCode {
+		case http.StatusServiceUnavailable:
+			errType = "timeout"
+		case 499:
+			// Don't need to send a "request cancelled" error
+			send = false
+		}
+	}
+
+	if send {
+		metrics.SendError(ctx, errType, err)
+	}
+
+	panic(err)
+}
+
+func checkErr(ctx context.Context, errType string, err error) {
+	if err == nil {
+		return
+	}
+	sendErrAndPanic(ctx, errType, err)
+}
+
 func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
 
@@ -163,25 +191,29 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 		signature = path[:signatureEnd]
 		path = path[signatureEnd:]
 	} else {
-		panic(ierrors.New(404, fmt.Sprintf("Invalid path: %s", path), "Invalid URL"))
+		sendErrAndPanic(ctx, "path_parsing", ierrors.New(
+			404, fmt.Sprintf("Invalid path: %s", path), "Invalid URL",
+		))
 	}
 
 	if err := security.VerifySignature(signature, path); err != nil {
-		panic(ierrors.New(403, err.Error(), "Forbidden"))
+		sendErrAndPanic(ctx, "security", ierrors.New(403, err.Error(), "Forbidden"))
 	}
 
 	po, imageURL, err := options.ParsePath(path, r.Header)
-	if err != nil {
-		panic(err)
-	}
+	checkErr(ctx, "path_parsing", err)
 
 	if !security.VerifySourceURL(imageURL) {
-		panic(ierrors.New(404, fmt.Sprintf("Source URL is not allowed: %s", imageURL), "Invalid source"))
+		sendErrAndPanic(ctx, "security", ierrors.New(
+			404,
+			fmt.Sprintf("Source URL is not allowed: %s", imageURL),
+			"Invalid source",
+		))
 	}
 
 	// SVG is a special case. Though saving to svg is not supported, SVG->SVG is.
 	if !vips.SupportsSave(po.Format) && po.Format != imagetype.Unknown && po.Format != imagetype.SVG {
-		panic(ierrors.New(
+		sendErrAndPanic(ctx, "path_parsing", ierrors.New(
 			422,
 			fmt.Sprintf("Resulting image format is not supported: %s", po.Format),
 			"Invalid URL",
@@ -209,7 +241,7 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 		// We don't actually need to check timeout here,
 		// but it's an easy way to check if this is an actual timeout
 		// or the request was cancelled
-		router.CheckTimeout(ctx)
+		checkErr(ctx, "queue", router.CheckTimeout(ctx))
 	}
 	defer func() { <-processingSem }()
 
@@ -221,9 +253,8 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 		var cookieJar *cookiejar.Jar
 
 		if config.CookiePassthrough {
-			if cookieJar, err = cookies.JarFromRequest(r); err != nil {
-				panic(err)
-			}
+			cookieJar, err = cookies.JarFromRequest(r)
+			checkErr(ctx, "download", err)
 		}
 
 		return imagedata.Download(imageURL, "source image", imgRequestHeader, cookieJar)
@@ -258,7 +289,7 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 		originData = imagedata.FallbackImage
 	}
 
-	router.CheckTimeout(ctx)
+	checkErr(ctx, "timeout", router.CheckTimeout(ctx))
 
 	if config.ETagEnabled && statusCode == http.StatusOK {
 		imgDataMatch := etagHandler.SetActualImageData(originData)
@@ -271,16 +302,14 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 		}
 	}
 
-	router.CheckTimeout(ctx)
+	checkErr(ctx, "timeout", router.CheckTimeout(ctx))
 
 	if originData.Type == po.Format || po.Format == imagetype.Unknown {
 		// Don't process SVG
 		if originData.Type == imagetype.SVG {
 			if config.SanitizeSvg {
 				sanitized, svgErr := svg.Satitize(originData.Data)
-				if svgErr != nil {
-					panic(svgErr)
-				}
+				checkErr(ctx, "svg_processing", svgErr)
 
 				// Since we'll replace origin data, it's better to close it to return
 				// it's buffer to the pool
@@ -307,7 +336,7 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 	}
 
 	if !vips.SupportsLoad(originData.Type) {
-		panic(ierrors.New(
+		sendErrAndPanic(ctx, "processing", ierrors.New(
 			422,
 			fmt.Sprintf("Source image format is not supported: %s", originData.Type),
 			"Invalid URL",
@@ -316,20 +345,20 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 
 	// At this point we can't allow requested format to be SVG as we can't save SVGs
 	if po.Format == imagetype.SVG {
-		panic(ierrors.New(422, "Resulting image format is not supported: svg", "Invalid URL"))
+		sendErrAndPanic(ctx, "processing", ierrors.New(
+			422, "Resulting image format is not supported: svg", "Invalid URL",
+		))
 	}
 
 	resultData, err := func() (*imagedata.ImageData, error) {
 		defer metrics.StartProcessingSegment(ctx)()
 		return processing.ProcessImage(ctx, originData, po)
 	}()
-	if err != nil {
-		metrics.SendError(ctx, "processing", err)
-		panic(err)
-	}
+	checkErr(ctx, "processing", err)
+
 	defer resultData.Close()
 
-	router.CheckTimeout(ctx)
+	checkErr(ctx, "timeout", router.CheckTimeout(ctx))
 
 	respondWithImage(reqID, r, rw, statusCode, resultData, po, imageURL, originData)
 }

+ 5 - 7
router/timer.go

@@ -8,7 +8,6 @@ import (
 
 	"github.com/imgproxy/imgproxy/v3/config"
 	"github.com/imgproxy/imgproxy/v3/ierrors"
-	"github.com/imgproxy/imgproxy/v3/metrics"
 )
 
 type timerSinceCtxKey = struct{}
@@ -27,7 +26,7 @@ func ctxTime(ctx context.Context) time.Duration {
 	return 0
 }
 
-func CheckTimeout(ctx context.Context) {
+func CheckTimeout(ctx context.Context) error {
 	select {
 	case <-ctx.Done():
 		d := ctxTime(ctx)
@@ -35,14 +34,13 @@ func CheckTimeout(ctx context.Context) {
 		err := ctx.Err()
 		switch err {
 		case context.Canceled:
-			panic(ierrors.New(499, fmt.Sprintf("Request was cancelled after %v", d), "Cancelled"))
+			return ierrors.New(499, fmt.Sprintf("Request was cancelled after %v", d), "Cancelled")
 		case context.DeadlineExceeded:
-			metrics.SendTimeout(ctx, d)
-			panic(ierrors.New(503, fmt.Sprintf("Timeout after %v", d), "Timeout"))
+			return ierrors.New(http.StatusServiceUnavailable, fmt.Sprintf("Request was timed out after %v", d), "Timeout")
 		default:
-			panic(err)
+			return err
 		}
 	default:
-		// Go ahead
+		return nil
 	}
 }