Browse Source

Additional metrics for Datadog

DarthSim 3 years ago
parent
commit
c4801761db
10 changed files with 140 additions and 6 deletions
  1. 1 0
      CHANGELOG.md
  2. 4 4
      bufpool/bufpool.go
  3. 3 1
      config/config.go
  4. 3 0
      docs/configuration.md
  5. 12 0
      docs/datadog.md
  6. 1 0
      go.mod
  7. 2 0
      go.sum
  8. 94 1
      metrics/datadog/datadog.go
  9. 15 0
      metrics/metrics.go
  10. 5 0
      vips/vips.go

+ 1 - 0
CHANGELOG.md

@@ -7,6 +7,7 @@
 - Add support of JPEG files with differential Huffman coding or arithmetic coding.
 - Add `IMGPROXY_PREFERRED_FORMATS` config.
 - Add `IMGPROXY_REQUESTS_QUEUE_SIZE` config.
+- Add sending additional metrics to Datadog and `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` config.
 
 ### Change
 - Change `IMGPROXY_MAX_CLIENTS` default value to 2048.

+ 4 - 4
bufpool/bufpool.go

@@ -8,7 +8,7 @@ import (
 
 	"github.com/imgproxy/imgproxy/v3/config"
 	"github.com/imgproxy/imgproxy/v3/imath"
-	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
+	"github.com/imgproxy/imgproxy/v3/metrics"
 )
 
 type intSlice []int
@@ -69,8 +69,8 @@ func (p *Pool) calibrateAndClean() {
 		runtime.GC()
 	}
 
-	prometheus.SetBufferDefaultSize(p.name, p.defaultSize)
-	prometheus.SetBufferMaxSize(p.name, p.maxSize)
+	metrics.SetBufferDefaultSize(p.name, p.defaultSize)
+	metrics.SetBufferMaxSize(p.name, p.maxSize)
 }
 
 func (p *Pool) Get(size int) *bytes.Buffer {
@@ -146,7 +146,7 @@ func (p *Pool) Put(buf *bytes.Buffer) {
 			p.buffers[i] = buf
 
 			if buf.Cap() > 0 {
-				prometheus.ObserveBufferSize(p.name, buf.Cap())
+				metrics.ObserveBufferSize(p.name, buf.Cap())
 			}
 
 			return

+ 3 - 1
config/config.go

@@ -131,7 +131,8 @@ var (
 	FallbackImageHTTPCode int
 	FallbackImageTTL      int
 
-	DataDogEnable bool
+	DataDogEnable        bool
+	DataDogEnableMetrics bool
 
 	NewRelicAppName string
 	NewRelicKey     string
@@ -475,6 +476,7 @@ func Configure() error {
 	configurators.Int(&FallbackImageTTL, "IMGPROXY_FALLBACK_IMAGE_TTL")
 
 	configurators.Bool(&DataDogEnable, "IMGPROXY_DATADOG_ENABLE")
+	configurators.Bool(&DataDogEnableMetrics, "IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS")
 
 	configurators.String(&NewRelicAppName, "IMGPROXY_NEW_RELIC_APP_NAME")
 	configurators.String(&NewRelicKey, "IMGPROXY_NEW_RELIC_KEY")

+ 3 - 0
docs/configuration.md

@@ -385,6 +385,9 @@ Check out the [Prometheus](prometheus.md) guide to learn more.
 imgproxy can send its metrics to Datadog:
 
 * `IMGPROXY_DATADOG_ENABLE`: when `true`, enables sending metrics to Datadog. Default: false
+* `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS`: when true, enables sending the additional metrics to Datadog. Default: false
+
+**⚠️Warning:** Since the additional metrics are treated by Datadog as custom, Datadog can additionally bill you for their usage. Please, check out Datadog's [Custom Metrics Billing](https://docs.datadoghq.com/account_management/billing/custom_metrics/) page for additional details.
 
 Check out the [Datadog](datadog.md) guide to learn more.
 

+ 12 - 0
docs/datadog.md

@@ -18,6 +18,7 @@ imgproxy can send its metrics to Datadog. To use this feature, do the following:
     * `DD_RUNTIME_METRICS_ENABLED`: enables automatic collection of runtime metrics every 10 seconds. Default: `false`
     * `DD_TRACE_STARTUP_LOGS`: causes various startup info to be written when the tracer starts. Default: `true`
     * `DD_TRACE_DEBUG`: enables detailed logs. Default: `false`
+4. _(optional)_ Set the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable to `true` to collect the [additional metrics](#additional-metrics).
 
 imgproxy will send the following info to Datadog:
 
@@ -25,3 +26,14 @@ imgproxy will send the following info to Datadog:
 * Image downloading time
 * Image processing time
 * Errors that occurred while downloading and processing image
+
+## Additional metrics
+
+When the `IMGPROXY_DATADOG_ENABLE_ADDITIONAL_METRICS` environment variable is set to `true` imgproxy will send the following additional metrics to Datadog:
+
+* `imgproxy.buffer.size`: a histogram of the download/gzip buffers sizes (in bytes)
+* `imgproxy.buffer.default_size`: calibrated default buffer size (in bytes)
+* `imgproxy.buffer.max_size`: calibrated maximum buffer size (in bytes)
+* `imgproxy.vips.memory`: libvips memory usage (in bytes)
+* `imgproxy.vips.max_memory`: libvips maximum memory usage (in bytes)
+* `imgproxy.vips.allocs`: the number of active vips allocations

+ 1 - 0
go.mod

@@ -5,6 +5,7 @@ go 1.16
 require (
 	cloud.google.com/go/storage v1.22.1
 	github.com/Azure/azure-storage-blob-go v0.15.0
+	github.com/DataDog/datadog-go/v5 v5.1.1 // indirect
 	github.com/Microsoft/go-winio v0.5.2 // indirect
 	github.com/StackExchange/wmi v1.2.1 // indirect
 	github.com/airbrake/gobrake/v5 v5.5.1

+ 2 - 0
go.sum

@@ -102,6 +102,8 @@ github.com/DataDog/datadog-go v4.8.2+incompatible h1:qbcKSx29aBLD+5QLvlQZlGmRMF/
 github.com/DataDog/datadog-go v4.8.2+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
 github.com/DataDog/datadog-go/v5 v5.0.2 h1:UFtEe7662/Qojxkw1d6SboAeA0CPI3naKhVASwFn+04=
 github.com/DataDog/datadog-go/v5 v5.0.2/go.mod h1:ZI9JFB4ewXbw1sBnF4sxsR2k1H3xjV+PUAOUsHvKpcU=
+github.com/DataDog/datadog-go/v5 v5.1.1 h1:JLZ6s2K1pG2h9GkvEvMdEGqMDyVLEAccdX5TltWcLMU=
+github.com/DataDog/datadog-go/v5 v5.1.1/go.mod h1:KhiYb2Badlv9/rofz+OznKoEF5XKTonWyhx5K83AP8E=
 github.com/DataDog/gostackparse v0.5.0/go.mod h1:lTfqcJKqS9KnXQGnyQMCugq3u1FP6UZMfWR0aitKFMM=
 github.com/DataDog/sketches-go v1.0.0 h1:chm5KSXO7kO+ywGWJ0Zs6tdmWU8PBXSbywFVciL6BG4=
 github.com/DataDog/sketches-go v1.0.0/go.mod h1:O+XkJHWk9w4hDwY2ZUDU31ZC9sNYlYo8DiFsxjYeo1k=

+ 94 - 1
metrics/datadog/datadog.go

@@ -2,9 +2,13 @@ package datadog
 
 import (
 	"context"
+	"net"
 	"net/http"
 	"os"
+	"sync"
+	"time"
 
+	"github.com/DataDog/datadog-go/v5/statsd"
 	log "github.com/sirupsen/logrus"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext"
 	"gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer"
@@ -16,7 +20,18 @@ import (
 
 type spanCtxKey struct{}
 
-var enabled bool
+type GaugeFunc func() float64
+
+var (
+	enabled        bool
+	enabledMetrics bool
+
+	statsdClient     *statsd.Client
+	statsdClientStop chan struct{}
+
+	gaugeFuncs      = make(map[string]GaugeFunc)
+	gaugeFuncsMutex sync.RWMutex
+)
 
 func Init() {
 	if !config.DataDogEnable {
@@ -35,11 +50,44 @@ func Init() {
 	)
 
 	enabled = true
+
+	statsdHost, statsdPort := os.Getenv("DD_AGENT_HOST"), os.Getenv("DD_DOGSTATSD_PORT")
+	if len(statsdHost) == 0 {
+		statsdHost = "localhost"
+	}
+	if len(statsdPort) == 0 {
+		statsdPort = "8125"
+	}
+
+	if !config.DataDogEnableMetrics {
+		return
+	}
+
+	var err error
+	statsdClient, err = statsd.New(
+		net.JoinHostPort(statsdHost, statsdPort),
+		statsd.WithTags([]string{
+			"service:" + name,
+			"version:" + version.Version(),
+		}),
+	)
+	if err == nil {
+		statsdClientStop = make(chan struct{})
+		enabledMetrics = true
+		go runMetricsCollector()
+	} else {
+		log.Warnf("Can't initialize DogStatsD client: %s", err)
+	}
 }
 
 func Stop() {
 	if enabled {
 		tracer.Stop()
+
+		if statsdClient != nil {
+			close(statsdClientStop)
+			statsdClient.Close()
+		}
 	}
 }
 
@@ -89,6 +137,51 @@ func SendError(ctx context.Context, errType string, err error) {
 	}
 }
 
+func AddGaugeFunc(name string, f GaugeFunc) {
+	gaugeFuncsMutex.Lock()
+	defer gaugeFuncsMutex.Unlock()
+
+	gaugeFuncs["imgproxy."+name] = f
+}
+
+func ObserveBufferSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Histogram("imgproxy.buffer.size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Gauge("imgproxy.buffer.default_size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func SetBufferMaxSize(t string, size int) {
+	if enabledMetrics {
+		statsdClient.Gauge("imgproxy.buffer.max_size", float64(size), []string{"type:" + t}, 1)
+	}
+}
+
+func runMetricsCollector() {
+	tick := time.NewTicker(10 * time.Second)
+	defer tick.Stop()
+	for {
+		select {
+		case <-tick.C:
+			func() {
+				gaugeFuncsMutex.RLock()
+				defer gaugeFuncsMutex.RUnlock()
+
+				for name, f := range gaugeFuncs {
+					statsdClient.Gauge(name, f(), nil, 1)
+				}
+			}()
+		case <-statsdClientStop:
+			return
+		}
+	}
+}
+
 type dataDogLogger struct {
 }
 

+ 15 - 0
metrics/metrics.go

@@ -78,3 +78,18 @@ func SendError(ctx context.Context, errType string, err error) {
 	newrelic.SendError(ctx, errType, err)
 	datadog.SendError(ctx, errType, err)
 }
+
+func ObserveBufferSize(t string, size int) {
+	prometheus.ObserveBufferSize(t, size)
+	datadog.ObserveBufferSize(t, size)
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	prometheus.SetBufferDefaultSize(t, size)
+	datadog.SetBufferDefaultSize(t, size)
+}
+
+func SetBufferMaxSize(t string, size int) {
+	prometheus.SetBufferMaxSize(t, size)
+	datadog.SetBufferMaxSize(t, size)
+}

+ 5 - 0
vips/vips.go

@@ -21,6 +21,7 @@ import (
 	"github.com/imgproxy/imgproxy/v3/ierrors"
 	"github.com/imgproxy/imgproxy/v3/imagedata"
 	"github.com/imgproxy/imgproxy/v3/imagetype"
+	"github.com/imgproxy/imgproxy/v3/metrics/datadog"
 	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
 )
 
@@ -95,6 +96,10 @@ func Init() error {
 		GetAllocs,
 	)
 
+	datadog.AddGaugeFunc("vips.memory", GetMem)
+	datadog.AddGaugeFunc("vips.max_memory", GetMemHighwater)
+	datadog.AddGaugeFunc("vips.allocs", GetAllocs)
+
 	return nil
 }