3 년 전 · cf2b3c5195
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
 
				 - Add support of JPEG files with differential Huffman coding or arithmetic coding.
			
 
				 - Add `IMGPROXY_PREFERRED_FORMATS` config.
			
 
				 - Add `IMGPROXY_REQUESTS_QUEUE_SIZE` config.
			
 
				+- Add `requests_in_progress` and `images_in_progress` metrics.
			
 
				+- Add queue segment/span to request traces.
			
 
				 - Add sending additional metrics to Datadog and `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` config.
			
 
				 - Add sending additional metrics to New Relic.
			
 
				 
			
--- a/docs/datadog.md
+++ b/docs/datadog.md
@@ -23,6 +23,7 @@ imgproxy can send its metrics to Datadog. To use this feature, do the following:
 
				 imgproxy will send the following info to Datadog:
			
 
				 
			
 
				 * Response time
			
 
				+* Queue time
			
 
				 * Image downloading time
			
 
				 * Image processing time
			
 
				 * Errors that occurred while downloading and processing image
			
@@ -31,6 +32,8 @@ imgproxy will send the following info to Datadog:
 
				 
			
 
				 When the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable is set to `true` imgproxy will send the following additional metrics to Datadog:
			
 
				 
			
 
				+* `imgproxy.requests_in_progress`: a number of requests currently being in progress
			
 
				+* `imgproxy.images_in_progress`: a number of images currently being in progress
			
 
				 * `imgproxy.buffer.size`: a histogram of the download/gzip buffers sizes (in bytes)
			
 
				 * `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes)
			
 
				 * `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes)
			
--- a/docs/new_relic.md
+++ b/docs/new_relic.md
@@ -11,12 +11,15 @@ imgproxy will send the following info to New Relic:
 
				 
			
 
				 * CPU and memory usage
			
 
				 * Response time
			
 
				+* Queue time
			
 
				 * Image downloading time
			
 
				 * Image processing time
			
 
				 * Errors that occurred while downloading and processing an image
			
 
				 
			
 
				 Additionally, imgproxy sends the following metrics over [Metrics API](https://docs.newrelic.com/docs/data-apis/ingest-apis/metric-api/introduction-metric-api/):
			
 
				 
			
 
				+* `imgproxy.requests_in_progress`: a number of requests currently being in progress
			
 
				+* `imgproxy.images_in_progress`: a number of images currently being in progress
			
 
				 * `imgproxy.buffer.size`: a summary of the download/gzip buffers sizes (in bytes)
			
 
				 * `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes)
			
 
				 * `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes)
			
--- a/docs/prometheus.md
+++ b/docs/prometheus.md
@@ -3,16 +3,17 @@
 
				 imgproxy can collect metrics for Prometheus. To use this feature, do the following:
			
 
				 
			
 
				 1. Set the `IMGPROXY_PROMETHEUS_BIND` environment variable to the address and port that will be listened to by the Prometheus server. Note that you can't bind the main server and Prometheus to the same port.
			
 
				-2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` Names will appear like `imgproxy_requests_total`.
			
 
				+2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` names will appear like `imgproxy_requests_total`.
			
 
				 3. Collect the metrics from any path on the specified binding.
			
 
				 
			
 
				 imgproxy will collect the following metrics:
			
 
				 
			
 
				 * `requests_total`: a counter with the total number of HTTP requests imgproxy has processed
			
 
				 * `errors_total`: a counter of the occurred errors separated by type (timeout, downloading, processing)
			
 
				-* `request_duration_seconds`: a histogram of the response latency (in seconds)
			
 
				-* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds)
			
 
				-* `processing_duration_seconds`: a histogram of the image processing latency (in seconds)
			
 
				+* `request_duration_seconds`: a histogram of the request latency (in seconds)
			
 
				+* `request_span_duration_seconds`: a histogram of the request latency (in seconds) separated by span (queue, downloading, processing)
			
 
				+* `requests_in_progress`: a number of requests currently being in progress
			
 
				+* `images_in_progress`: a number of images currently being in progress
			
 
				 * `buffer_size_bytes`: a histogram of the download/gzip buffers sizes (in bytes)
			
 
				 * `buffer_default_size_bytes`: calibrated default buffer size (in bytes)
			
 
				 * `buffer_max_size_bytes`: calibrated maximum buffer size (in bytes)
			
@@ -20,3 +21,11 @@ imgproxy will collect the following metrics:
 
				 * `vips_max_memory_bytes`: libvips maximum memory usage
			
 
				 * `vips_allocs`: the number of active vips allocations
			
 
				 * Some useful Go metrics like memstats and goroutines count
			
 
				+
			
 
				+### Deprecated metrics
			
 
				+
			
 
				+The following metrics are deprecated and can be removed in future versions. Use `request_span_duration_seconds` instead.
			
 
				+
			
 
				+* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds)
			
 
				+* `processing_duration_seconds`: a histogram of the image processing latency (in seconds)
			
 
				+
			
--- a/metrics/datadog/datadog.go
+++ b/metrics/datadog/datadog.go
@@ -15,6 +15,7 @@ import (
 
				 
			
 
				 	"github.com/imgproxy/imgproxy/v3/config"
			
 
				 	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
			
 
				+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
			
 
				 	"github.com/imgproxy/imgproxy/v3/version"
			
 
				 )
			
 
				 
			
@@ -176,6 +177,9 @@ func runMetricsCollector() {
 
				 					statsdClient.Gauge(name, f(), nil, 1)
			
 
				 				}
			
 
				 			}()
			
 
				+
			
 
				+			statsdClient.Gauge("imgproxy.requests_in_progress", stats.RequestsInProgress(), nil, 1)
			
 
				+			statsdClient.Gauge("imgproxy.images_in_progress", stats.ImagesInProgress(), nil, 1)
			
 
				 		case <-statsdClientStop:
			
 
				 			return
			
 
				 		}
			
--- a/metrics/metrics.go
+++ b/metrics/metrics.go
@@ -46,6 +46,20 @@ func StartRequest(ctx context.Context, rw http.ResponseWriter, r *http.Request)
 
				 	return ctx, cancel, rw
			
 
				 }
			
 
				 
			
 
				+func StartQueueSegment(ctx context.Context) context.CancelFunc {
			
 
				+	promCancel := prometheus.StartQueueSegment()
			
 
				+	nrCancel := newrelic.StartSegment(ctx, "Queue")
			
 
				+	ddCancel := datadog.StartSpan(ctx, "queue")
			
 
				+
			
 
				+	cancel := func() {
			
 
				+		promCancel()
			
 
				+		nrCancel()
			
 
				+		ddCancel()
			
 
				+	}
			
 
				+
			
 
				+	return cancel
			
 
				+}
			
 
				+
			
 
				 func StartDownloadingSegment(ctx context.Context) context.CancelFunc {
			
 
				 	promCancel := prometheus.StartDownloadingSegment()
			
 
				 	nrCancel := newrelic.StartSegment(ctx, "Downloading image")
			
--- a/metrics/newrelic/newrelic.go
+++ b/metrics/newrelic/newrelic.go
@@ -15,6 +15,7 @@ import (
 
				 
			
 
				 	"github.com/imgproxy/imgproxy/v3/config"
			
 
				 	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
			
 
				+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
			
 
				 )
			
 
				 
			
 
				 type transactionCtxKey struct{}
			
@@ -246,6 +247,18 @@ func runMetricsCollector() {
 
				 				}
			
 
				 			}()
			
 
				 
			
 
				+			harvester.RecordMetric(telemetry.Gauge{
			
 
				+				Name:      "imgproxy.requests_in_progress",
			
 
				+				Value:     stats.RequestsInProgress(),
			
 
				+				Timestamp: time.Now(),
			
 
				+			})
			
 
				+
			
 
				+			harvester.RecordMetric(telemetry.Gauge{
			
 
				+				Name:      "imgproxy.images_in_progress",
			
 
				+				Value:     stats.ImagesInProgress(),
			
 
				+				Timestamp: time.Now(),
			
 
				+			})
			
 
				+
			
 
				 			harvester.HarvestNow(harvesterCtx)
			
 
				 		case <-harvesterCtx.Done():
			
 
				 			return
			
--- a/metrics/prometheus/prometheus.go
+++ b/metrics/prometheus/prometheus.go
@@ -11,20 +11,27 @@ import (
 
				 	log "github.com/sirupsen/logrus"
			
 
				 
			
 
				 	"github.com/imgproxy/imgproxy/v3/config"
			
 
				+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
			
 
				 	"github.com/imgproxy/imgproxy/v3/reuseport"
			
 
				 )
			
 
				 
			
 
				 var (
			
 
				 	enabled = false
			
 
				 
			
 
				-	requestsTotal      prometheus.Counter
			
 
				-	errorsTotal        *prometheus.CounterVec
			
 
				-	requestDuration    prometheus.Histogram
			
 
				-	downloadDuration   prometheus.Histogram
			
 
				-	processingDuration prometheus.Histogram
			
 
				-	bufferSize         *prometheus.HistogramVec
			
 
				-	bufferDefaultSize  *prometheus.GaugeVec
			
 
				-	bufferMaxSize      *prometheus.GaugeVec
			
 
				+	requestsTotal prometheus.Counter
			
 
				+	errorsTotal   *prometheus.CounterVec
			
 
				+
			
 
				+	requestDuration     prometheus.Histogram
			
 
				+	requestSpanDuration *prometheus.HistogramVec
			
 
				+	downloadDuration    prometheus.Histogram
			
 
				+	processingDuration  prometheus.Histogram
			
 
				+
			
 
				+	bufferSize        *prometheus.HistogramVec
			
 
				+	bufferDefaultSize *prometheus.GaugeVec
			
 
				+	bufferMaxSize     *prometheus.GaugeVec
			
 
				+
			
 
				+	requestsInProgress prometheus.GaugeFunc
			
 
				+	imagesInProgress   prometheus.GaugeFunc
			
 
				 )
			
 
				 
			
 
				 func Init() {
			
@@ -50,6 +57,12 @@ func Init() {
 
				 		Help:      "A histogram of the response latency.",
			
 
				 	})
			
 
				 
			
 
				+	requestSpanDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
			
 
				+		Namespace: config.PrometheusNamespace,
			
 
				+		Name:      "request_span_duration_seconds",
			
 
				+		Help:      "A histogram of the queue latency.",
			
 
				+	}, []string{"span"})
			
 
				+
			
 
				 	downloadDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
			
 
				 		Namespace: config.PrometheusNamespace,
			
 
				 		Name:      "download_duration_seconds",
			
@@ -81,15 +94,30 @@ func Init() {
 
				 		Help:      "A gauge of the buffer max size in bytes.",
			
 
				 	}, []string{"type"})
			
 
				 
			
 
				+	requestsInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
			
 
				+		Namespace: config.PrometheusNamespace,
			
 
				+		Name:      "requests_in_progress",
			
 
				+		Help:      "A gauge of the number of requests currently being in progress.",
			
 
				+	}, stats.RequestsInProgress)
			
 
				+
			
 
				+	imagesInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
			
 
				+		Namespace: config.PrometheusNamespace,
			
 
				+		Name:      "images_in_progress",
			
 
				+		Help:      "A gauge of the number of images currently being in progress.",
			
 
				+	}, stats.ImagesInProgress)
			
 
				+
			
 
				 	prometheus.MustRegister(
			
 
				 		requestsTotal,
			
 
				 		errorsTotal,
			
 
				 		requestDuration,
			
 
				+		requestSpanDuration,
			
 
				 		downloadDuration,
			
 
				 		processingDuration,
			
 
				 		bufferSize,
			
 
				 		bufferDefaultSize,
			
 
				 		bufferMaxSize,
			
 
				+		requestsInProgress,
			
 
				+		imagesInProgress,
			
 
				 	)
			
 
				 
			
 
				 	enabled = true
			
@@ -131,19 +159,43 @@ func StartRequest() context.CancelFunc {
 
				 	return startDuration(requestDuration)
			
 
				 }
			
 
				 
			
 
				-func StartDownloadingSegment() context.CancelFunc {
			
 
				-	return startDuration(downloadDuration)
			
 
				+func StartQueueSegment() context.CancelFunc {
			
 
				+	if !enabled {
			
 
				+		return func() {}
			
 
				+	}
			
 
				+
			
 
				+	return startDuration(requestSpanDuration.With(prometheus.Labels{"span": "queue"}))
			
 
				 }
			
 
				 
			
 
				-func StartProcessingSegment() context.CancelFunc {
			
 
				-	return startDuration(processingDuration)
			
 
				+func StartDownloadingSegment() context.CancelFunc {
			
 
				+	if !enabled {
			
 
				+		return func() {}
			
 
				+	}
			
 
				+
			
 
				+	cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "downloading"}))
			
 
				+	cancelLegacy := startDuration(downloadDuration)
			
 
				+
			
 
				+	return func() {
			
 
				+		cancel()
			
 
				+		cancelLegacy()
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-func startDuration(m prometheus.Histogram) context.CancelFunc {
			
 
				+func StartProcessingSegment() context.CancelFunc {
			
 
				 	if !enabled {
			
 
				 		return func() {}
			
 
				 	}
			
 
				 
			
 
				+	cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "processing"}))
			
 
				+	cancelLegacy := startDuration(processingDuration)
			
 
				+
			
 
				+	return func() {
			
 
				+		cancel()
			
 
				+		cancelLegacy()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func startDuration(m prometheus.Observer) context.CancelFunc {
			
 
				 	t := time.Now()
			
 
				 	return func() {
			
 
				 		m.Observe(time.Since(t).Seconds())
			
--- a/metrics/stats/stats.go
+++ b/metrics/stats/stats.go
@@ -0,0 +1,32 @@
 
				+package stats
			
 
				+
			
 
				+import "sync/atomic"
			
 
				+
			
 
				+var (
			
 
				+	requestsInProgress int64
			
 
				+	imagesInProgress   int64
			
 
				+)
			
 
				+
			
 
				+func RequestsInProgress() float64 {
			
 
				+	return float64(atomic.LoadInt64(&requestsInProgress))
			
 
				+}
			
 
				+
			
 
				+func IncRequestsInProgress() {
			
 
				+	atomic.AddInt64(&requestsInProgress, 1)
			
 
				+}
			
 
				+
			
 
				+func DecRequestsInProgress() {
			
 
				+	atomic.AddInt64(&requestsInProgress, -1)
			
 
				+}
			
 
				+
			
 
				+func ImagesInProgress() float64 {
			
 
				+	return float64(atomic.LoadInt64(&imagesInProgress))
			
 
				+}
			
 
				+
			
 
				+func IncImagesInProgress() {
			
 
				+	atomic.AddInt64(&imagesInProgress, 1)
			
 
				+}
			
 
				+
			
 
				+func DecImagesInProgress() {
			
 
				+	atomic.AddInt64(&imagesInProgress, -1)
			
 
				+}
			
--- a/processing_handler.go
+++ b/processing_handler.go
@@ -19,6 +19,7 @@ import (
 
				 	"github.com/imgproxy/imgproxy/v3/imagedata"
			
 
				 	"github.com/imgproxy/imgproxy/v3/imagetype"
			
 
				 	"github.com/imgproxy/imgproxy/v3/metrics"
			
 
				+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
			
 
				 	"github.com/imgproxy/imgproxy/v3/options"
			
 
				 	"github.com/imgproxy/imgproxy/v3/processing"
			
 
				 	"github.com/imgproxy/imgproxy/v3/router"
			
@@ -179,6 +180,9 @@ func checkErr(ctx context.Context, errType string, err error) {
 
				 }
			
 
				 
			
 
				 func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
			
 
				+	stats.IncRequestsInProgress()
			
 
				+	defer stats.DecRequestsInProgress()
			
 
				+
			
 
				 	ctx := r.Context()
			
 
				 
			
 
				 	if queueSem != nil {
			
@@ -249,15 +253,24 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 
				 	}
			
 
				 
			
 
				 	// The heavy part start here, so we need to restrict concurrency
			
 
				-	processingSemToken, aquired := processingSem.Aquire(ctx)
			
 
				-	if !aquired {
			
 
				-		// We don't actually need to check timeout here,
			
 
				-		// but it's an easy way to check if this is an actual timeout
			
 
				-		// or the request was cancelled
			
 
				-		checkErr(ctx, "queue", router.CheckTimeout(ctx))
			
 
				-	}
			
 
				+	var processingSemToken *semaphore.Token
			
 
				+	func() {
			
 
				+		defer metrics.StartQueueSegment(ctx)()
			
 
				+
			
 
				+		var aquired bool
			
 
				+		processingSemToken, aquired = processingSem.Aquire(ctx)
			
 
				+		if !aquired {
			
 
				+			// We don't actually need to check timeout here,
			
 
				+			// but it's an easy way to check if this is an actual timeout
			
 
				+			// or the request was cancelled
			
 
				+			checkErr(ctx, "queue", router.CheckTimeout(ctx))
			
 
				+		}
			
 
				+	}()
			
 
				 	defer processingSemToken.Release()
			
 
				 
			
 
				+	stats.IncImagesInProgress()
			
 
				+	defer stats.DecImagesInProgress()
			
 
				+
			
 
				 	statusCode := http.StatusOK
			
 
				 
			
 
				 	originData, err := func() (*imagedata.ImageData, error) {