浏览代码

Merge pull request #920 from imgproxy/feature/additional-metrics

Additional metrics
Sergey Alexandrovich 3 年之前
父节点
当前提交
badf8303a2
共有 17 个文件被更改,包括 517 次插入54 次删除
  1. 4 0
      CHANGELOG.md
  2. 4 4
      bufpool/bufpool.go
  3. 3 1
      config/config.go
  4. 7 4
      docs/configuration.md
  5. 15 0
      docs/datadog.md
  6. 12 0
      docs/new_relic.md
  7. 13 4
      docs/prometheus.md
  8. 2 0
      go.mod
  9. 4 0
      go.sum
  10. 98 1
      metrics/datadog/datadog.go
  11. 33 0
      metrics/metrics.go
  12. 180 5
      metrics/newrelic/newrelic.go
  13. 65 13
      metrics/prometheus/prometheus.go
  14. 32 0
      metrics/stats/stats.go
  15. 20 7
      processing_handler.go
  16. 15 15
      vips/vips.c
  17. 10 0
      vips/vips.go

+ 4 - 0
CHANGELOG.md

@@ -7,6 +7,10 @@
 - Add support of JPEG files with differential Huffman coding or arithmetic coding.
 - Add `IMGPROXY_PREFERRED_FORMATS` config.
 - Add `IMGPROXY_REQUESTS_QUEUE_SIZE` config.
+- Add `requests_in_progress` and `images_in_progress` metrics.
+- Add queue segment/span to request traces.
+- Add sending additional metrics to Datadog and `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` config.
+- Add sending additional metrics to New Relic.
 
 ### Change
 - Change `IMGPROXY_MAX_CLIENTS` default value to 2048.

+ 4 - 4
bufpool/bufpool.go

@@ -8,7 +8,7 @@ import (
 
 	"github.com/imgproxy/imgproxy/v3/config"
 	"github.com/imgproxy/imgproxy/v3/imath"
-	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
+	"github.com/imgproxy/imgproxy/v3/metrics"
 )
 
 type intSlice []int
@@ -69,8 +69,8 @@ func (p *Pool) calibrateAndClean() {
 		runtime.GC()
 	}
 
-	prometheus.SetBufferDefaultSize(p.name, p.defaultSize)
-	prometheus.SetBufferMaxSize(p.name, p.maxSize)
+	metrics.SetBufferDefaultSize(p.name, p.defaultSize)
+	metrics.SetBufferMaxSize(p.name, p.maxSize)
 }
 
 func (p *Pool) Get(size int) *bytes.Buffer {
@@ -146,7 +146,7 @@ func (p *Pool) Put(buf *bytes.Buffer) {
 			p.buffers[i] = buf
 
 			if buf.Cap() > 0 {
-				prometheus.ObserveBufferSize(p.name, buf.Cap())
+				metrics.ObserveBufferSize(p.name, buf.Cap())
 			}
 
 			return

+ 3 - 1
config/config.go

@@ -131,7 +131,8 @@ var (
 	FallbackImageHTTPCode int
 	FallbackImageTTL      int
 
-	DataDogEnable bool
+	DataDogEnable        bool
+	DataDogEnableMetrics bool
 
 	NewRelicAppName string
 	NewRelicKey     string
@@ -475,6 +476,7 @@ func Configure() error {
 	configurators.Int(&FallbackImageTTL, "IMGPROXY_FALLBACK_IMAGE_TTL")
 
 	configurators.Bool(&DataDogEnable, "IMGPROXY_DATADOG_ENABLE")
+	configurators.Bool(&DataDogEnableMetrics, "IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS")
 
 	configurators.String(&NewRelicAppName, "IMGPROXY_NEW_RELIC_APP_NAME")
 	configurators.String(&NewRelicKey, "IMGPROXY_NEW_RELIC_KEY")

+ 7 - 4
docs/configuration.md

@@ -34,7 +34,7 @@ echo $(xxd -g 2 -l 64 -p /dev/random | tr -d '\n')
 * `IMGPROXY_DOWNLOAD_TIMEOUT`: the maximum duration (in seconds) for downloading the source image. Default: `5`
 * `IMGPROXY_CONCURRENCY`: the maximum number of image requests to be processed simultaneously. Requests that exceed this limit are put in the queue. Default: the number of CPU cores multiplied by two
 * `IMGPROXY_REQUESTS_QUEUE_SIZE`: the maximum number of image requests that can be put in the queue. Requests that exceed this limit are rejected with `429` HTTP status. When set to `0`, the requests queue is unlimited. Default: `0`
-* `IMGPROXY_MAX_CLIENTS`: the maximum number of simultaneous active connections. When set to `0`, connections number limitation is disabled. Default: `2048`
+* `IMGPROXY_MAX_CLIENTS`: the maximum number of simultaneous active connections. When set to `0`, connection limit is disabled. Default: `2048`
 * `IMGPROXY_TTL`: a duration (in seconds) sent via the `Expires` and `Cache-Control: max-age` HTTP headers. Default: `31536000` (1 year)
 * `IMGPROXY_CACHE_CONTROL_PASSTHROUGH`: when `true` and the source image response contains the `Expires` or `Cache-Control` headers, reuse those headers. Default: false
 * `IMGPROXY_SET_CANONICAL_HEADER`: when `true` and the source image has an `http` or `https` scheme, set a `rel="canonical"` HTTP header to the value of the source image URL. More details [here](https://developers.google.com/search/docs/advanced/crawling/consolidate-duplicate-urls#rel-canonical-header-method). Default: `false`
@@ -252,17 +252,17 @@ You can set up a fallback image that will be used in case imgproxy is unable to
 
 ## Preferred formats
 
-When the resulting image format is not explicitly specified in the imgproxy URL via the extension or the `format` processing option, imgproxy will choose one of the preferred formats as the resultant:
+When the resulting image format is not explicitly specified in the imgproxy URL via the extension or the `format` processing option, imgproxy will choose one of the preferred formats:
 
 * `IMGPROXY_PREFERRED_FORMATS`: a list of preferred formats, comma divided. Default: `jpeg,png,gif,webp,avif,ico`
 
 imgproxy is guided by the following rules when choosing the resulting format:
 
-1. If the preferred formats list contains the source image format, it will be used as the resultant
+1. If the preferred formats list contains the source image format, it will be used
 2. If the resulting image is animated, the resulting image format should support animations
 3. If the resulting image contains transparency, the resulting image format should support transparency
 4. imgproxy chooses the first preferred format that meets those requirements
-5. If none of the preferred formats meet the requirements, the first preferred format is used as the resultant
+5. If none of the preferred formats meet the requirements, the first preferred format is used
 
 **📝Note:** When AVIF/WebP support detection is enabled and the browser supports AVIF/WebP, it may be used as the resultant format even if the preferred formats list doesn't contain it.
 
@@ -385,6 +385,9 @@ Check out the [Prometheus](prometheus.md) guide to learn more.
 imgproxy can send its metrics to Datadog:
 
 * `IMGPROXY_DATADOG_ENABLE`: when `true`, enables sending metrics to Datadog. Default: false
+* `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS`: when true, enables sending the additional metrics to Datadog. Default: false
+
+**⚠️Warning:** Since the additional metrics are treated by Datadog as custom, Datadog can additionally bill you for their usage. Please, check out Datadog's [Custom Metrics Billing](https://docs.datadoghq.com/account_management/billing/custom_metrics/) page for additional details.
 
 Check out the [Datadog](datadog.md) guide to learn more.
 

+ 15 - 0
docs/datadog.md

@@ -18,10 +18,25 @@ imgproxy can send its metrics to Datadog. To use this feature, do the following:
     * `DD_RUNTIME_METRICS_ENABLED`: enables automatic collection of runtime metrics every 10 seconds. Default: `false`
     * `DD_TRACE_STARTUP_LOGS`: causes various startup info to be written when the tracer starts. Default: `true`
     * `DD_TRACE_DEBUG`: enables detailed logs. Default: `false`
+4. _(optional)_ Set the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable to `true` to collect the [additional metrics](#additional-metrics).
 
 imgproxy will send the following info to Datadog:
 
 * Response time
+* Queue time
 * Image downloading time
 * Image processing time
 * Errors that occurred while downloading and processing image
+
+## Additional metrics
+
+When the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable is set to `true`, imgproxy will send the following additional metrics to Datadog:
+
+* `imgproxy.requests_in_progress`: the number of requests currently in progress
+* `imgproxy.images_in_progress`: the number of images currently in progress
+* `imgproxy.buffer.size`: a histogram of the download/gzip buffers sizes (in bytes)
+* `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes)
+* `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes)
+* `imgproxy.vips.memory`: libvips memory usage (in bytes)
+* `imgproxy.vips.max_memory`: libvips maximum memory usage (in bytes)
+* `imgproxy.vips.allocs`: the number of active vips allocations

+ 12 - 0
docs/new_relic.md

@@ -11,6 +11,18 @@ imgproxy will send the following info to New Relic:
 
 * CPU and memory usage
 * Response time
+* Queue time
 * Image downloading time
 * Image processing time
 * Errors that occurred while downloading and processing an image
+
+Additionally, imgproxy sends the following metrics over [Metrics API](https://docs.newrelic.com/docs/data-apis/ingest-apis/metric-api/introduction-metric-api/):
+
+* `imgproxy.requests_in_progress`: the number of requests currently in progress
+* `imgproxy.images_in_progress`: the number of images currently in progress
+* `imgproxy.buffer.size`: a summary of the download/gzip buffers sizes (in bytes)
+* `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes)
+* `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes)
+* `imgproxy.vips.memory`: libvips memory usage (in bytes)
+* `imgproxy.vips.max_memory`: libvips maximum memory usage (in bytes)
+* `imgproxy.vips.allocs`: the number of active vips allocations

+ 13 - 4
docs/prometheus.md

@@ -3,16 +3,17 @@
 imgproxy can collect metrics for Prometheus. To use this feature, do the following:
 
 1. Set the `IMGPROXY_PROMETHEUS_BIND` environment variable to the address and port that will be listened to by the Prometheus server. Note that you can't bind the main server and Prometheus to the same port.
-2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` Names will appear like `imgproxy_requests_total`.
+2. _(optional)_ Set the `IMGPROXY_PROMETHEUS_NAMESPACE` to prepend prefix to the names of metrics, i.e. with `IMGPROXY_PROMETHEUS_NAMESPACE=imgproxy` names will appear like `imgproxy_requests_total`.
 3. Collect the metrics from any path on the specified binding.
 
 imgproxy will collect the following metrics:
 
 * `requests_total`: a counter with the total number of HTTP requests imgproxy has processed
 * `errors_total`: a counter of the occurred errors separated by type (timeout, downloading, processing)
-* `request_duration_seconds`: a histogram of the response latency (in seconds)
-* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds)
-* `processing_duration_seconds`: a histogram of the image processing latency (in seconds)
+* `request_duration_seconds`: a histogram of the request latency (in seconds)
+* `request_span_duration_seconds`: a histogram of the request latency (in seconds) separated by span (queue, downloading, processing)
+* `requests_in_progress`: the number of requests currently in progress
+* `images_in_progress`: the number of images currently in progress
 * `buffer_size_bytes`: a histogram of the download/gzip buffers sizes (in bytes)
 * `buffer_default_size_bytes`: calibrated default buffer size (in bytes)
 * `buffer_max_size_bytes`: calibrated maximum buffer size (in bytes)
@@ -20,3 +21,11 @@ imgproxy will collect the following metrics:
 * `vips_max_memory_bytes`: libvips maximum memory usage
 * `vips_allocs`: the number of active vips allocations
 * Some useful Go metrics like memstats and goroutines count
+
+### Deprecated metrics
+
+The following metrics are deprecated and can be removed in future versions. Use `request_span_duration_seconds` instead.
+
+* `download_duration_seconds`: a histogram of the source image downloading latency (in seconds)
+* `processing_duration_seconds`: a histogram of the image processing latency (in seconds)
+

+ 2 - 0
go.mod

@@ -5,6 +5,7 @@ go 1.16
 require (
 	cloud.google.com/go/storage v1.22.1
 	github.com/Azure/azure-storage-blob-go v0.15.0
+	github.com/DataDog/datadog-go/v5 v5.1.1 // indirect
 	github.com/Microsoft/go-winio v0.5.2 // indirect
 	github.com/StackExchange/wmi v1.2.1 // indirect
 	github.com/airbrake/gobrake/v5 v5.5.1
@@ -20,6 +21,7 @@ require (
 	github.com/matoous/go-nanoid/v2 v2.0.0
 	github.com/ncw/swift/v2 v2.0.1
 	github.com/newrelic/go-agent/v3 v3.16.1
+	github.com/newrelic/newrelic-telemetry-sdk-go v0.8.1 // indirect
 	github.com/onsi/ginkgo v1.16.5 // indirect
 	github.com/prometheus/client_golang v1.12.2
 	github.com/sirupsen/logrus v1.8.1

+ 4 - 0
go.sum

@@ -102,6 +102,8 @@ github.com/DataDog/datadog-go v4.8.2+incompatible h1:qbcKSx29aBLD+5QLvlQZlGmRMF/
 github.com/DataDog/datadog-go v4.8.2+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
 github.com/DataDog/datadog-go/v5 v5.0.2 h1:UFtEe7662/Qojxkw1d6SboAeA0CPI3naKhVASwFn+04=
 github.com/DataDog/datadog-go/v5 v5.0.2/go.mod h1:ZI9JFB4ewXbw1sBnF4sxsR2k1H3xjV+PUAOUsHvKpcU=
+github.com/DataDog/datadog-go/v5 v5.1.1 h1:JLZ6s2K1pG2h9GkvEvMdEGqMDyVLEAccdX5TltWcLMU=
+github.com/DataDog/datadog-go/v5 v5.1.1/go.mod h1:KhiYb2Badlv9/rofz+OznKoEF5XKTonWyhx5K83AP8E=
 github.com/DataDog/gostackparse v0.5.0/go.mod h1:lTfqcJKqS9KnXQGnyQMCugq3u1FP6UZMfWR0aitKFMM=
 github.com/DataDog/sketches-go v1.0.0 h1:chm5KSXO7kO+ywGWJ0Zs6tdmWU8PBXSbywFVciL6BG4=
 github.com/DataDog/sketches-go v1.0.0/go.mod h1:O+XkJHWk9w4hDwY2ZUDU31ZC9sNYlYo8DiFsxjYeo1k=
@@ -882,6 +884,8 @@ github.com/ncw/swift/v2 v2.0.1 h1:q1IN8hNViXEv8Zvg3Xdis4a3c4IlIGezkYz09zQL5J0=
 github.com/ncw/swift/v2 v2.0.1/go.mod h1:z0A9RVdYPjNjXVo2pDOPxZ4eu3oarO1P91fTItcb+Kg=
 github.com/newrelic/go-agent/v3 v3.16.1 h1:gH053irA4rIAySGSvMc2grKiKNhrM4gCzc+p3M+rqAE=
 github.com/newrelic/go-agent/v3 v3.16.1/go.mod h1:BFJOlbZWRlPTXKYIC1TTTtQKTnYntEJaU0VU507hDc0=
+github.com/newrelic/newrelic-telemetry-sdk-go v0.8.1 h1:6OX5VXMuj2salqNBc41eXKz6K+nV6OB/hhlGnAKCbwU=
+github.com/newrelic/newrelic-telemetry-sdk-go v0.8.1/go.mod h1:2kY6OeOxrJ+RIQlVjWDc/pZlT3MIf30prs6drzMfJ6E=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=

+ 98 - 1
metrics/datadog/datadog.go

@@ -2,21 +2,37 @@ package datadog
 
 import (
 	"context"
+	"net"
 	"net/http"
 	"os"
+	"sync"
+	"time"
 
+	"github.com/DataDog/datadog-go/v5/statsd"
 	log "github.com/sirupsen/logrus"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
 
 	"github.com/imgproxy/imgproxy/v3/config"
 	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
 	"github.com/imgproxy/imgproxy/v3/version"
 )
 
 type spanCtxKey struct{}
 
-var enabled bool
+type GaugeFunc func() float64
+
+var (
+	enabled        bool
+	enabledMetrics bool
+
+	statsdClient     *statsd.Client
+	statsdClientStop chan struct{}
+
+	gaugeFuncs      = make(map[string]GaugeFunc)
+	gaugeFuncsMutex sync.RWMutex
+)
 
 func Init() {
 	if !config.DataDogEnable {
@@ -35,11 +51,44 @@ func Init() {
 	)
 
 	enabled = true
+
+	statsdHost, statsdPort := os.Getenv("DD_AGENT_HOST"), os.Getenv("DD_DOGSTATSD_PORT")
+	if len(statsdHost) == 0 {
+		statsdHost = "localhost"
+	}
+	if len(statsdPort) == 0 {
+		statsdPort = "8125"
+	}
+
+	if !config.DataDogEnableMetrics {
+		return
+	}
+
+	var err error
+	statsdClient, err = statsd.New(
+		net.JoinHostPort(statsdHost, statsdPort),
+		statsd.WithTags([]string{
+			"service:" + name,
+			"version:" + version.Version(),
+		}),
+	)
+	if err == nil {
+		statsdClientStop = make(chan struct{})
+		enabledMetrics = true
+		go runMetricsCollector()
+	} else {
+		log.Warnf("Can't initialize DogStatsD client: %s", err)
+	}
 }
 
 func Stop() {
 	if enabled {
 		tracer.Stop()
+
+		if statsdClient != nil {
+			close(statsdClientStop)
+			statsdClient.Close()
+		}
 	}
 }
 
@@ -89,6 +138,54 @@ func SendError(ctx context.Context, errType string, err error) {
 	}
 }
 
+func AddGaugeFunc(name string, f GaugeFunc) {
+	gaugeFuncsMutex.Lock()
+	defer gaugeFuncsMutex.Unlock()
+
+	gaugeFuncs["imgproxy."+name] = f
+}
+
+func ObserveBufferSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Histogram("imgproxy.buffer.size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Gauge("imgproxy.buffer.default_size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func SetBufferMaxSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Gauge("imgproxy.buffer.max_size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func runMetricsCollector() {
+	tick := time.NewTicker(10 * time.Second)
+	defer tick.Stop()
+	for {
+		select {
+		case <-tick.C:
+			func() {
+				gaugeFuncsMutex.RLock()
+				defer gaugeFuncsMutex.RUnlock()
+
+				for name, f := range gaugeFuncs {
+					statsdClient.Gauge(name, f(), nil, 1)
+				}
+			}()
+
+			statsdClient.Gauge("imgproxy.requests_in_progress", stats.RequestsInProgress(), nil, 1)
+			statsdClient.Gauge("imgproxy.images_in_progress", stats.ImagesInProgress(), nil, 1)
+		case <-statsdClientStop:
+			return
+		}
+	}
+}
+
 type dataDogLogger struct {
 }
 

+ 33 - 0
metrics/metrics.go

@@ -22,6 +22,7 @@ func Init() error {
 }
 
 func Stop() {
+	newrelic.Stop()
 	datadog.Stop()
 }
 
@@ -45,6 +46,20 @@ func StartRequest(ctx context.Context, rw http.ResponseWriter, r *http.Request)
 	return ctx, cancel, rw
 }
 
+func StartQueueSegment(ctx context.Context) context.CancelFunc {
+	promCancel := prometheus.StartQueueSegment()
+	nrCancel := newrelic.StartSegment(ctx, "Queue")
+	ddCancel := datadog.StartSpan(ctx, "queue")
+
+	cancel := func() {
+		promCancel()
+		nrCancel()
+		ddCancel()
+	}
+
+	return cancel
+}
+
 func StartDownloadingSegment(ctx context.Context) context.CancelFunc {
 	promCancel := prometheus.StartDownloadingSegment()
 	nrCancel := newrelic.StartSegment(ctx, "Downloading image")
@@ -78,3 +93,21 @@ func SendError(ctx context.Context, errType string, err error) {
 	newrelic.SendError(ctx, errType, err)
 	datadog.SendError(ctx, errType, err)
 }
+
+func ObserveBufferSize(t string, size int) {
+	prometheus.ObserveBufferSize(t, size)
+	newrelic.ObserveBufferSize(t, size)
+	datadog.ObserveBufferSize(t, size)
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	prometheus.SetBufferDefaultSize(t, size)
+	newrelic.SetBufferDefaultSize(t, size)
+	datadog.SetBufferDefaultSize(t, size)
+}
+
+func SetBufferMaxSize(t string, size int) {
+	prometheus.SetBufferMaxSize(t, size)
+	newrelic.SetBufferMaxSize(t, size)
+	datadog.SetBufferMaxSize(t, size)
+}

+ 180 - 5
metrics/newrelic/newrelic.go

@@ -3,19 +3,49 @@ package newrelic
 import (
 	"context"
 	"fmt"
+	"math"
 	"net/http"
+	"regexp"
+	"sync"
+	"time"
+
+	"github.com/newrelic/go-agent/v3/newrelic"
+	"github.com/newrelic/newrelic-telemetry-sdk-go/telemetry"
+	log "github.com/sirupsen/logrus"
 
 	"github.com/imgproxy/imgproxy/v3/config"
 	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
-	"github.com/newrelic/go-agent/v3/newrelic"
+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
 )
 
 type transactionCtxKey struct{}
 
+type GaugeFunc func() float64
+
+const (
+	defaultMetricURL = "https://metric-api.newrelic.com/metric/v1"
+	euMetricURL      = "https://metric-api.eu.newrelic.com/metric/v1"
+)
+
 var (
-	enabled = false
+	enabled          = false
+	enabledHarvester = false
+
+	app       *newrelic.Application
+	harvester *telemetry.Harvester
 
-	newRelicApp *newrelic.Application
+	harvesterCtx       context.Context
+	harvesterCtxCancel context.CancelFunc
+
+	gaugeFuncs      = make(map[string]GaugeFunc)
+	gaugeFuncsMutex sync.RWMutex
+
+	bufferSummaries      = make(map[string]*telemetry.Summary)
+	bufferSummariesMutex sync.RWMutex
+
+	interval = 10 * time.Second
+
+	licenseEuRegex = regexp.MustCompile(`(^eu.+?)x`)
 )
 
 func Init() error {
@@ -30,7 +60,7 @@ func Init() error {
 
 	var err error
 
-	newRelicApp, err = newrelic.NewApplication(
+	app, err = newrelic.NewApplication(
 		newrelic.ConfigAppName(name),
 		newrelic.ConfigLicense(config.NewRelicKey),
 		func(c *newrelic.Config) {
@@ -44,11 +74,47 @@ func Init() error {
 		return fmt.Errorf("Can't init New Relic agent: %s", err)
 	}
 
+	harvesterAttributes := map[string]interface{}{"appName": name}
+	for k, v := range config.NewRelicLabels {
+		harvesterAttributes[k] = v
+	}
+
+	metricsURL := defaultMetricURL
+	if licenseEuRegex.MatchString(config.NewRelicKey) {
+		metricsURL = euMetricURL
+	}
+
+	harvester, err = telemetry.NewHarvester(
+		telemetry.ConfigAPIKey(config.NewRelicKey),
+		telemetry.ConfigCommonAttributes(harvesterAttributes),
+		telemetry.ConfigHarvestPeriod(0), // Don't harvest automatically
+		telemetry.ConfigMetricsURLOverride(metricsURL),
+		telemetry.ConfigBasicErrorLogger(log.StandardLogger().WithField("from", "newrelic").WriterLevel(log.WarnLevel)),
+	)
+	if err == nil {
+		harvesterCtx, harvesterCtxCancel = context.WithCancel(context.Background())
+		enabledHarvester = true
+		go runMetricsCollector()
+	} else {
+		log.Warnf("Can't init New Relic telemetry harvester: %s", err)
+	}
+
 	enabled = true
 
 	return nil
 }
 
+func Stop() {
+	if enabled {
+		app.Shutdown(5 * time.Second)
+
+		if enabledHarvester {
+			harvesterCtxCancel()
+			harvester.HarvestNow(context.Background())
+		}
+	}
+}
+
 func Enabled() bool {
 	return enabled
 }
@@ -58,7 +124,7 @@ func StartTransaction(ctx context.Context, rw http.ResponseWriter, r *http.Reque
 		return ctx, func() {}, rw
 	}
 
-	txn := newRelicApp.StartTransaction("request")
+	txn := app.StartTransaction("request")
 	txn.SetWebRequestHTTP(r)
 	newRw := txn.SetWebResponse(rw)
 	cancel := func() { txn.End() }
@@ -90,3 +156,112 @@ func SendError(ctx context.Context, errType string, err error) {
 		})
 	}
 }
+
+func AddGaugeFunc(name string, f GaugeFunc) {
+	gaugeFuncsMutex.Lock()
+	defer gaugeFuncsMutex.Unlock()
+
+	gaugeFuncs["imgproxy."+name] = f
+}
+
+func ObserveBufferSize(t string, size int) {
+	if enabledHarvester {
+		bufferSummariesMutex.Lock()
+		defer bufferSummariesMutex.Unlock()
+
+		summary, ok := bufferSummaries[t]
+		if !ok {
+			summary = &telemetry.Summary{
+				Name:       "imgproxy.buffer.size",
+				Attributes: map[string]interface{}{"buffer_type": t},
+				Timestamp:  time.Now(),
+			}
+			bufferSummaries[t] = summary
+		}
+
+		sizef := float64(size)
+
+		summary.Count += 1
+		summary.Sum += sizef
+		summary.Min = math.Min(summary.Min, sizef)
+		summary.Max = math.Max(summary.Max, sizef)
+	}
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	if enabledHarvester {
+		harvester.RecordMetric(telemetry.Gauge{
+			Name:       "imgproxy.buffer.default_size",
+			Value:      float64(size),
+			Attributes: map[string]interface{}{"buffer_type": t},
+			Timestamp:  time.Now(),
+		})
+	}
+}
+
+func SetBufferMaxSize(t string, size int) {
+	if enabledHarvester {
+		harvester.RecordMetric(telemetry.Gauge{
+			Name:       "imgproxy.buffer.max_size",
+			Value:      float64(size),
+			Attributes: map[string]interface{}{"buffer_type": t},
+			Timestamp:  time.Now(),
+		})
+	}
+}
+
+func runMetricsCollector() {
+	tick := time.NewTicker(interval)
+	defer tick.Stop()
+	for {
+		select {
+		case <-tick.C:
+			func() {
+				gaugeFuncsMutex.RLock()
+				defer gaugeFuncsMutex.RUnlock()
+
+				for name, f := range gaugeFuncs {
+					harvester.RecordMetric(telemetry.Gauge{
+						Name:      name,
+						Value:     f(),
+						Timestamp: time.Now(),
+					})
+				}
+			}()
+
+			func() {
+				bufferSummariesMutex.RLock()
+				defer bufferSummariesMutex.RUnlock()
+
+				now := time.Now()
+
+				for _, summary := range bufferSummaries {
+					summary.Interval = now.Sub(summary.Timestamp)
+					harvester.RecordMetric(*summary)
+
+					summary.Timestamp = now
+					summary.Count = 0
+					summary.Sum = 0
+					summary.Min = 0
+					summary.Max = 0
+				}
+			}()
+
+			harvester.RecordMetric(telemetry.Gauge{
+				Name:      "imgproxy.requests_in_progress",
+				Value:     stats.RequestsInProgress(),
+				Timestamp: time.Now(),
+			})
+
+			harvester.RecordMetric(telemetry.Gauge{
+				Name:      "imgproxy.images_in_progress",
+				Value:     stats.ImagesInProgress(),
+				Timestamp: time.Now(),
+			})
+
+			harvester.HarvestNow(harvesterCtx)
+		case <-harvesterCtx.Done():
+			return
+		}
+	}
+}

+ 65 - 13
metrics/prometheus/prometheus.go

@@ -11,20 +11,27 @@ import (
 	log "github.com/sirupsen/logrus"
 
 	"github.com/imgproxy/imgproxy/v3/config"
+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
 	"github.com/imgproxy/imgproxy/v3/reuseport"
 )
 
 var (
 	enabled = false
 
-	requestsTotal      prometheus.Counter
-	errorsTotal        *prometheus.CounterVec
-	requestDuration    prometheus.Histogram
-	downloadDuration   prometheus.Histogram
-	processingDuration prometheus.Histogram
-	bufferSize         *prometheus.HistogramVec
-	bufferDefaultSize  *prometheus.GaugeVec
-	bufferMaxSize      *prometheus.GaugeVec
+	requestsTotal prometheus.Counter
+	errorsTotal   *prometheus.CounterVec
+
+	requestDuration     prometheus.Histogram
+	requestSpanDuration *prometheus.HistogramVec
+	downloadDuration    prometheus.Histogram
+	processingDuration  prometheus.Histogram
+
+	bufferSize        *prometheus.HistogramVec
+	bufferDefaultSize *prometheus.GaugeVec
+	bufferMaxSize     *prometheus.GaugeVec
+
+	requestsInProgress prometheus.GaugeFunc
+	imagesInProgress   prometheus.GaugeFunc
 )
 
 func Init() {
@@ -50,6 +57,12 @@ func Init() {
 		Help:      "A histogram of the response latency.",
 	})
 
+	requestSpanDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: config.PrometheusNamespace,
+		Name:      "request_span_duration_seconds",
+		Help:      "A histogram of the queue latency.",
+	}, []string{"span"})
+
 	downloadDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
 		Namespace: config.PrometheusNamespace,
 		Name:      "download_duration_seconds",
@@ -81,15 +94,30 @@ func Init() {
 		Help:      "A gauge of the buffer max size in bytes.",
 	}, []string{"type"})
 
+	requestsInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Namespace: config.PrometheusNamespace,
+		Name:      "requests_in_progress",
+		Help:      "A gauge of the number of requests currently being in progress.",
+	}, stats.RequestsInProgress)
+
+	imagesInProgress = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Namespace: config.PrometheusNamespace,
+		Name:      "images_in_progress",
+		Help:      "A gauge of the number of images currently being in progress.",
+	}, stats.ImagesInProgress)
+
 	prometheus.MustRegister(
 		requestsTotal,
 		errorsTotal,
 		requestDuration,
+		requestSpanDuration,
 		downloadDuration,
 		processingDuration,
 		bufferSize,
 		bufferDefaultSize,
 		bufferMaxSize,
+		requestsInProgress,
+		imagesInProgress,
 	)
 
 	enabled = true
@@ -131,19 +159,43 @@ func StartRequest() context.CancelFunc {
 	return startDuration(requestDuration)
 }
 
-func StartDownloadingSegment() context.CancelFunc {
-	return startDuration(downloadDuration)
+func StartQueueSegment() context.CancelFunc {
+	if !enabled {
+		return func() {}
+	}
+
+	return startDuration(requestSpanDuration.With(prometheus.Labels{"span": "queue"}))
 }
 
-func StartProcessingSegment() context.CancelFunc {
-	return startDuration(processingDuration)
+func StartDownloadingSegment() context.CancelFunc {
+	if !enabled {
+		return func() {}
+	}
+
+	cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "downloading"}))
+	cancelLegacy := startDuration(downloadDuration)
+
+	return func() {
+		cancel()
+		cancelLegacy()
+	}
 }
 
-func startDuration(m prometheus.Histogram) context.CancelFunc {
+func StartProcessingSegment() context.CancelFunc {
 	if !enabled {
 		return func() {}
 	}
 
+	cancel := startDuration(requestSpanDuration.With(prometheus.Labels{"span": "processing"}))
+	cancelLegacy := startDuration(processingDuration)
+
+	return func() {
+		cancel()
+		cancelLegacy()
+	}
+}
+
+func startDuration(m prometheus.Observer) context.CancelFunc {
 	t := time.Now()
 	return func() {
 		m.Observe(time.Since(t).Seconds())

+ 32 - 0
metrics/stats/stats.go

@@ -0,0 +1,32 @@
+package stats
+
+import "sync/atomic"
+
+var (
+	requestsInProgress int64
+	imagesInProgress   int64
+)
+
+func RequestsInProgress() float64 {
+	return float64(atomic.LoadInt64(&requestsInProgress))
+}
+
+func IncRequestsInProgress() {
+	atomic.AddInt64(&requestsInProgress, 1)
+}
+
+func DecRequestsInProgress() {
+	atomic.AddInt64(&requestsInProgress, -1)
+}
+
+func ImagesInProgress() float64 {
+	return float64(atomic.LoadInt64(&imagesInProgress))
+}
+
+func IncImagesInProgress() {
+	atomic.AddInt64(&imagesInProgress, 1)
+}
+
+func DecImagesInProgress() {
+	atomic.AddInt64(&imagesInProgress, -1)
+}

+ 20 - 7
processing_handler.go

@@ -19,6 +19,7 @@ import (
 	"github.com/imgproxy/imgproxy/v3/imagedata"
 	"github.com/imgproxy/imgproxy/v3/imagetype"
 	"github.com/imgproxy/imgproxy/v3/metrics"
+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
 	"github.com/imgproxy/imgproxy/v3/options"
 	"github.com/imgproxy/imgproxy/v3/processing"
 	"github.com/imgproxy/imgproxy/v3/router"
@@ -179,6 +180,9 @@ func checkErr(ctx context.Context, errType string, err error) {
 }
 
 func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
+	stats.IncRequestsInProgress()
+	defer stats.DecRequestsInProgress()
+
 	ctx := r.Context()
 
 	if queueSem != nil {
@@ -249,15 +253,24 @@ func handleProcessing(reqID string, rw http.ResponseWriter, r *http.Request) {
 	}
 
 	// The heavy part start here, so we need to restrict concurrency
-	processingSemToken, aquired := processingSem.Aquire(ctx)
-	if !aquired {
-		// We don't actually need to check timeout here,
-		// but it's an easy way to check if this is an actual timeout
-		// or the request was cancelled
-		checkErr(ctx, "queue", router.CheckTimeout(ctx))
-	}
+	var processingSemToken *semaphore.Token
+	func() {
+		defer metrics.StartQueueSegment(ctx)()
+
+		var aquired bool
+		processingSemToken, aquired = processingSem.Aquire(ctx)
+		if !aquired {
+			// We don't actually need to check timeout here,
+			// but it's an easy way to check if this is an actual timeout
+			// or the request was cancelled
+			checkErr(ctx, "queue", router.CheckTimeout(ctx))
+		}
+	}()
 	defer processingSemToken.Release()
 
+	stats.IncImagesInProgress()
+	defer stats.DecImagesInProgress()
+
 	statusCode := http.StatusOK
 
 	originData, err := func() (*imagedata.ImageData, error) {

+ 15 - 15
vips/vips.c

@@ -125,12 +125,12 @@ int
 vips_get_orientation(VipsImage *image) {
   int orientation;
 
-	if (
+  if (
     vips_image_get_typeof(image, VIPS_META_ORIENTATION) == G_TYPE_INT &&
     vips_image_get_int(image, VIPS_META_ORIENTATION, &orientation) == 0
   ) return orientation;
 
-	return 1;
+  return 1;
 }
 
 int
@@ -142,7 +142,7 @@ vips_get_palette_bit_depth(VipsImage *image) {
     vips_image_get_int(image, "palette-bit-depth", &palette_bit_depth) == 0
   ) return palette_bit_depth;
 
-	return 0;
+  return 0;
 }
 
 VipsBandFormat
@@ -205,7 +205,7 @@ vips_cast_go(VipsImage *in, VipsImage **out, VipsBandFormat format) {
 
 int
 vips_rad2float_go(VipsImage *in, VipsImage **out) {
-	return vips_rad2float(in, out, NULL);
+  return vips_rad2float(in, out, NULL);
 }
 
 int
@@ -216,7 +216,7 @@ vips_resize_go(VipsImage *in, VipsImage **out, double wscale, double hscale) {
   VipsBandFormat format = vips_band_format(in);
 
   VipsImage *base = vips_image_new();
-	VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 3);
+  VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 3);
 
   int res =
     vips_premultiply(in, &t[0], NULL) ||
@@ -395,7 +395,7 @@ vips_trim(VipsImage *in, VipsImage **out, double threshold,
           gboolean equal_hor, gboolean equal_ver) {
 
   VipsImage *base = vips_image_new();
-	VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 2);
+  VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 2);
 
   VipsImage *tmp = in;
 
@@ -476,9 +476,9 @@ vips_replicate_go(VipsImage *in, VipsImage **out, int width, int height) {
   if (vips_replicate(in, &tmp, 1 + width / in->Xsize, 1 + height / in->Ysize, NULL))
     return 1;
 
-	if (vips_extract_area(tmp, out, 0, 0, width, height, NULL)) {
+  if (vips_extract_area(tmp, out, 0, 0, width, height, NULL)) {
     clear_image(&tmp);
-		return 1;
+    return 1;
   }
 
   clear_image(&tmp);
@@ -510,24 +510,24 @@ vips_ensure_alpha(VipsImage *in, VipsImage **out) {
 int
 vips_apply_watermark(VipsImage *in, VipsImage *watermark, VipsImage **out, double opacity) {
   VipsImage *base = vips_image_new();
-	VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 6);
+  VipsImage **t = (VipsImage **) vips_object_local_array(VIPS_OBJECT(base), 6);
 
   if (vips_ensure_alpha(watermark, &t[0])) {
     clear_image(&base);
-		return 1;
+    return 1;
   }
 
-	if (opacity < 1) {
+  if (opacity < 1) {
     if (
       vips_extract_band(t[0], &t[1], 0, "n", t[0]->Bands - 1, NULL) ||
       vips_extract_band(t[0], &t[2], t[0]->Bands - 1, "n", 1, NULL) ||
-		  vips_linear1(t[2], &t[3], opacity, 0, NULL) ||
+      vips_linear1(t[2], &t[3], opacity, 0, NULL) ||
       vips_bandjoin2(t[1], t[3], &t[4], NULL)
     ) {
       clear_image(&base);
-			return 1;
-		}
-	} else {
+      return 1;
+    }
+  } else {
     if (vips_copy(t[0], &t[4], NULL)) {
       clear_image(&base);
       return 1;

+ 10 - 0
vips/vips.go

@@ -21,6 +21,8 @@ import (
 	"github.com/imgproxy/imgproxy/v3/ierrors"
 	"github.com/imgproxy/imgproxy/v3/imagedata"
 	"github.com/imgproxy/imgproxy/v3/imagetype"
+	"github.com/imgproxy/imgproxy/v3/metrics/datadog"
+	"github.com/imgproxy/imgproxy/v3/metrics/newrelic"
 	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
 )
 
@@ -95,6 +97,14 @@ func Init() error {
 		GetAllocs,
 	)
 
+	datadog.AddGaugeFunc("vips.memory", GetMem)
+	datadog.AddGaugeFunc("vips.max_memory", GetMemHighwater)
+	datadog.AddGaugeFunc("vips.allocs", GetAllocs)
+
+	newrelic.AddGaugeFunc("vips.memory", GetMem)
+	newrelic.AddGaugeFunc("vips.max_memory", GetMemHighwater)
+	newrelic.AddGaugeFunc("vips.allocs", GetAllocs)
+
 	return nil
 }