Browse Source

Add more metrics to OTel

DarthSim 2 years ago
parent
commit
a4c876fc6d
6 changed files with 223 additions and 15 deletions
  1. 3 0
      CHANGELOG.md
  2. 4 0
      docs/open_telemetry.md
  3. 2 0
      go.mod
  4. 5 0
      go.sum
  5. 3 0
      metrics/metrics.go
  6. 206 15
      metrics/otel/otel.go

+ 3 - 0
CHANGELOG.md

@@ -1,6 +1,9 @@
 # Changelog
 
 ## [Unreleased]
+### Add
+- Add `process_resident_memory_bytes`, `process_virtual_memory_bytes`, `go_memstats_sys_bytes`, `go_memstats_heap_idle_bytes`, `go_memstats_heap_inuse_bytes`, `go_goroutines`, `go_threads`, `buffer_default_size_bytes`, `buffer_max_size_bytes`, and `buffer_size_bytes` metrics to OpenTelemetry.
+
 ### Change
 - Optimized memory buffers pooling for better performance and memory reusage.
 

+ 4 - 0
docs/open_telemetry.md

@@ -34,9 +34,13 @@ If `IMGPROXY_OPEN_TELEMETRY_ENABLE_METRICS` is set to `true`, imgproxy will also
 
 * `requests_in_progress`: the number of requests currently in progress
 * `images_in_progress`: the number of images currently in progress
+* `buffer_size_bytes`: a histogram of buffer sizes (in bytes)
+* `buffer_default_size_bytes`: calibrated default buffer size (in bytes)
+* `buffer_max_size_bytes`: calibrated maximum buffer size (in bytes)
 * `vips_memory_bytes`: libvips memory usage
 * `vips_max_memory_bytes`: libvips maximum memory usage
 * `vips_allocs`: the number of active vips allocations
+* Some useful Go metrics like memstats and goroutines count
 
 ## TLS Configuration
 

+ 2 - 0
go.mod

@@ -123,6 +123,8 @@ require (
 	github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 // indirect
 	github.com/shirou/gopsutil v3.21.11+incompatible // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
+	github.com/tklauser/go-sysconf v0.3.11 // indirect
+	github.com/tklauser/numcpus v0.6.0 // indirect
 	github.com/yusufpapurcu/wmi v1.2.2 // indirect
 	go.opencensus.io v0.24.0 // indirect
 	go.opentelemetry.io/contrib/propagators/b3 v1.15.0 // indirect

+ 5 - 0
go.sum

@@ -415,6 +415,10 @@ github.com/tdewolff/test v1.0.7 h1:8Vs0142DmPFW/bQeHRP3MV19m1gvndjUb1sn8yy74LM=
 github.com/tdewolff/test v1.0.7/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
 github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
 github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
+github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM=
+github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
+github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms=
+github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
 github.com/trimmer-io/go-xmp v1.0.0 h1:zY8bolSga5kOjBAaHS6hrdxLgEoYuT875xTy0QDwZWs=
 github.com/trimmer-io/go-xmp v1.0.0/go.mod h1:Aaptr9sp1lLv7UnCAdQ+gSHZyY2miYaKmcNVj7HRBwA=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -658,6 +662,7 @@ golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=

+ 3 - 0
metrics/metrics.go

@@ -137,6 +137,7 @@ func ObserveBufferSize(t string, size int) {
 	prometheus.ObserveBufferSize(t, size)
 	newrelic.ObserveBufferSize(t, size)
 	datadog.ObserveBufferSize(t, size)
+	otel.ObserveBufferSize(t, size)
 	cloudwatch.ObserveBufferSize(t, size)
 }
 
@@ -144,6 +145,7 @@ func SetBufferDefaultSize(t string, size int) {
 	prometheus.SetBufferDefaultSize(t, size)
 	newrelic.SetBufferDefaultSize(t, size)
 	datadog.SetBufferDefaultSize(t, size)
+	otel.SetBufferDefaultSize(t, size)
 	cloudwatch.SetBufferDefaultSize(t, size)
 }
 
@@ -151,5 +153,6 @@ func SetBufferMaxSize(t string, size int) {
 	prometheus.SetBufferMaxSize(t, size)
 	newrelic.SetBufferMaxSize(t, size)
 	datadog.SetBufferMaxSize(t, size)
+	otel.SetBufferMaxSize(t, size)
 	cloudwatch.SetBufferMaxSize(t, size)
 }

+ 206 - 15
metrics/otel/otel.go

@@ -7,10 +7,14 @@ import (
 	"errors"
 	"fmt"
 	"net/http"
+	"os"
+	"runtime"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/felixge/httpsnoop"
+	"github.com/shirou/gopsutil/process"
 	"github.com/sirupsen/logrus"
 	"go.opentelemetry.io/contrib/detectors/aws/ec2"
 	"go.opentelemetry.io/contrib/detectors/aws/ecs"
@@ -58,6 +62,11 @@ var (
 	meter         metric.Meter
 
 	propagator propagation.TextMapPropagator
+
+	bufferSizeHist     instrument.Int64Histogram
+	bufferDefaultSizes = make(map[string]int)
+	bufferMaxSizes     = make(map[string]int)
+	bufferStatsMutex   sync.Mutex
 )
 
 func Init() error {
@@ -157,20 +166,11 @@ func Init() error {
 
 	meter = meterProvider.Meter("imgproxy")
 
-	enabledMetrics = true
+	if err = addDefaultMetrics(); err != nil {
+		return err
+	}
 
-	AddGaugeFunc(
-		"requests_in_progress",
-		"A gauge of the number of requests currently being in progress.",
-		"1",
-		stats.RequestsInProgress,
-	)
-	AddGaugeFunc(
-		"images_in_progress",
-		"A gauge of the number of images currently being in progress.",
-		"1",
-		stats.ImagesInProgress,
-	)
+	enabledMetrics = true
 
 	return nil
 }
@@ -399,8 +399,176 @@ func SendError(ctx context.Context, errType string, err error) {
 	span.AddEvent(semconv.ExceptionEventName, trace.WithAttributes(attributes...))
 }
 
+func addDefaultMetrics() error {
+	proc, err := process.NewProcess(int32(os.Getpid()))
+	if err != nil {
+		return fmt.Errorf("Can't initialize process data for OpenTelemetry: %s", err)
+	}
+
+	processResidentMemory, err := meter.Int64ObservableGauge(
+		"process_resident_memory_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("Resident memory size in bytes."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add process_resident_memory_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	processVirtualMemory, err := meter.Int64ObservableGauge(
+		"process_virtual_memory_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("Virtual memory size in bytes."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add process_virtual_memory_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	goMemstatsSys, err := meter.Int64ObservableGauge(
+		"go_memstats_sys_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("Number of bytes obtained from system."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add go_memstats_sys_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	goMemstatsHeapIdle, err := meter.Int64ObservableGauge(
+		"go_memstats_heap_idle_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("Number of heap bytes waiting to be used."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add go_memstats_heap_idle_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	goMemstatsHeapInuse, err := meter.Int64ObservableGauge(
+		"go_memstats_heap_inuse_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("Number of heap bytes that are in use."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add go_memstats_heap_inuse_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	goGoroutines, err := meter.Int64ObservableGauge(
+		"go_goroutines",
+		instrument.WithUnit("1"),
+		instrument.WithDescription("Number of goroutines that currently exist."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add go_goroutines gauge to OpenTelemetry: %s", err)
+	}
+
+	goThreads, err := meter.Int64ObservableGauge(
+		"go_threads",
+		instrument.WithUnit("1"),
+		instrument.WithDescription("Number of OS threads created."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add go_threads gauge to OpenTelemetry: %s", err)
+	}
+
+	requestsInProgressGauge, err := meter.Float64ObservableGauge(
+		"requests_in_progress",
+		instrument.WithUnit("1"),
+		instrument.WithDescription("A gauge of the number of requests currently being in progress."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add requests_in_progress gauge to OpenTelemetry: %s", err)
+	}
+
+	imagesInProgressGauge, err := meter.Float64ObservableGauge(
+		"images_in_progress",
+		instrument.WithUnit("1"),
+		instrument.WithDescription("A gauge of the number of images currently being in progress."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add images_in_progress gauge to OpenTelemetry: %s", err)
+	}
+
+	bufferDefaultSizeGauge, err := meter.Int64ObservableGauge(
+		"buffer_default_size_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("A gauge of the buffer default size in bytes."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add buffer_default_size_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	bufferMaxSizeGauge, err := meter.Int64ObservableGauge(
+		"buffer_max_size_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("A gauge of the buffer max size in bytes."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add buffer_max_size_bytes gauge to OpenTelemetry: %s", err)
+	}
+
+	_, err = meter.RegisterCallback(
+		func(ctx context.Context, o metric.Observer) error {
+			memStats, merr := proc.MemoryInfo()
+			if merr != nil {
+				return merr
+			}
+
+			o.ObserveInt64(processResidentMemory, int64(memStats.RSS))
+			o.ObserveInt64(processVirtualMemory, int64(memStats.VMS))
+
+			goMemStats := &runtime.MemStats{}
+			runtime.ReadMemStats(goMemStats)
+
+			o.ObserveInt64(goMemstatsSys, int64(goMemStats.Sys))
+			o.ObserveInt64(goMemstatsHeapIdle, int64(goMemStats.HeapIdle))
+			o.ObserveInt64(goMemstatsHeapInuse, int64(goMemStats.HeapInuse))
+
+			threadsNum, _ := runtime.ThreadCreateProfile(nil)
+			o.ObserveInt64(goGoroutines, int64(runtime.NumGoroutine()))
+			o.ObserveInt64(goThreads, int64(threadsNum))
+
+			o.ObserveFloat64(requestsInProgressGauge, stats.RequestsInProgress())
+			o.ObserveFloat64(imagesInProgressGauge, stats.ImagesInProgress())
+
+			bufferStatsMutex.Lock()
+			defer bufferStatsMutex.Unlock()
+
+			for t, v := range bufferDefaultSizes {
+				o.ObserveInt64(bufferDefaultSizeGauge, int64(v), attribute.String("type", t))
+			}
+			for t, v := range bufferMaxSizes {
+				o.ObserveInt64(bufferMaxSizeGauge, int64(v), attribute.String("type", t))
+			}
+			return nil
+		},
+		processResidentMemory,
+		processVirtualMemory,
+		goMemstatsSys,
+		goMemstatsHeapIdle,
+		goMemstatsHeapInuse,
+		goGoroutines,
+		goThreads,
+		requestsInProgressGauge,
+		imagesInProgressGauge,
+		bufferDefaultSizeGauge,
+		bufferMaxSizeGauge,
+	)
+	if err != nil {
+		return fmt.Errorf("Can't register OpenTelemetry callbacks: %s", err)
+	}
+
+	bufferSizeHist, err = meter.Int64Histogram(
+		"buffer_size_bytes",
+		instrument.WithUnit("By"),
+		instrument.WithDescription("A histogram of the buffer size in bytes."),
+	)
+	if err != nil {
+		return fmt.Errorf("Can't add buffer_size_bytes histogram to OpenTelemetry: %s", err)
+	}
+
+	return nil
+}
+
 func AddGaugeFunc(name, desc, u string, f GaugeFunc) {
-	if !enabledMetrics {
+	if meter == nil {
 		return
 	}
 
@@ -415,7 +583,30 @@ func AddGaugeFunc(name, desc, u string, f GaugeFunc) {
 	)
 	if err != nil {
 		logrus.Warnf("Can't add %s gauge to OpenTelemetry: %s", name, err)
-		return
+	}
+}
+
+func ObserveBufferSize(t string, size int) {
+	if enabledMetrics {
+		bufferSizeHist.Record(context.Background(), int64(size), attribute.String("type", t))
+	}
+}
+
+func SetBufferDefaultSize(t string, size int) {
+	if enabledMetrics {
+		bufferStatsMutex.Lock()
+		defer bufferStatsMutex.Unlock()
+
+		bufferDefaultSizes[t] = size
+	}
+}
+
+func SetBufferMaxSize(t string, size int) {
+	if enabledMetrics {
+		bufferStatsMutex.Lock()
+		defer bufferStatsMutex.Unlock()
+
+		bufferMaxSizes[t] = size
 	}
 }