Browse Source

OpenTelemetry support (#995)

Sergey Alexandrovich 2 years ago
parent
commit
36f67a6f96
10 changed files with 650 additions and 668 deletions
  1. 34 0
      config/config.go
  2. 1 0
      docs/_sidebar.md
  3. 16 0
      docs/configuration.md
  4. 47 0
      docs/open_telemetry.md
  5. 52 3
      go.mod
  6. 52 661
      go.sum
  7. 0 3
      metrics/errformat/errformat.go
  8. 19 1
      metrics/metrics.go
  9. 409 0
      metrics/otel/otel.go
  10. 20 0
      vips/vips.go

+ 34 - 0
config/config.go

@@ -141,6 +141,16 @@ var (
 	PrometheusBind      string
 	PrometheusNamespace string
 
+	OpenTelemetryEndpoint          string
+	OpenTelemetryProtocol          string
+	OpenTelemetryServiceName       string
+	OpenTelemetryEnableMetrics     bool
+	OpenTelemetryServerCert        string
+	OpenTelemetryClientCert        string
+	OpenTelemetryClientKey         string
+	OpenTelemetryPropagators       []string
+	OpenTelemetryConnectionTimeout int
+
 	BugsnagKey   string
 	BugsnagStage string
 
@@ -307,6 +317,16 @@ func Reset() {
 	PrometheusBind = ""
 	PrometheusNamespace = ""
 
+	OpenTelemetryEndpoint = ""
+	OpenTelemetryProtocol = "grpc"
+	OpenTelemetryServiceName = "imgproxy"
+	OpenTelemetryEnableMetrics = false
+	OpenTelemetryServerCert = ""
+	OpenTelemetryClientCert = ""
+	OpenTelemetryClientKey = ""
+	OpenTelemetryPropagators = make([]string, 0)
+	OpenTelemetryConnectionTimeout = 5
+
 	BugsnagKey = ""
 	BugsnagStage = "production"
 
@@ -485,6 +505,16 @@ func Configure() error {
 	configurators.String(&PrometheusBind, "IMGPROXY_PROMETHEUS_BIND")
 	configurators.String(&PrometheusNamespace, "IMGPROXY_PROMETHEUS_NAMESPACE")
 
+	configurators.String(&OpenTelemetryEndpoint, "IMGPROXY_OPEN_TELEMETRY_ENDPOINT")
+	configurators.String(&OpenTelemetryProtocol, "IMGPROXY_OPEN_TELEMETRY_PROTOCOL")
+	configurators.String(&OpenTelemetryServiceName, "IMGPROXY_OPEN_TELEMETRY_SERVICE_NAME")
+	configurators.Bool(&OpenTelemetryEnableMetrics, "IMGPROXY_OPEN_TELEMETRY_ENABLE_METRICS")
+	configurators.String(&OpenTelemetryServerCert, "IMGPROXY_OPEN_TELEMETRY_SERVER_CERT")
+	configurators.String(&OpenTelemetryClientCert, "IMGPROXY_OPEN_TELEMETRY_CLIENT_CERT")
+	configurators.String(&OpenTelemetryClientKey, "IMGPROXY_OPEN_TELEMETRY_CLIENT_KEY")
+	configurators.StringSlice(&OpenTelemetryPropagators, "IMGPROXY_OPEN_TELEMETRY_PROPAGATORS")
+	configurators.Int(&OpenTelemetryConnectionTimeout, "IMGPROXY_OPEN_TELEMETRY_CONNECTION_TIMEOUT")
+
 	configurators.String(&BugsnagKey, "IMGPROXY_BUGSNAG_KEY")
 	configurators.String(&BugsnagStage, "IMGPROXY_BUGSNAG_STAGE")
 	configurators.String(&HoneybadgerKey, "IMGPROXY_HONEYBADGER_KEY")
@@ -624,6 +654,10 @@ func Configure() error {
 		return fmt.Errorf("Can't use the same binding for the main server and Prometheus")
 	}
 
+	if OpenTelemetryConnectionTimeout < 1 {
+		return fmt.Errorf("OpenTelemetry connection timeout should be greater than zero")
+	}
+
 	if FreeMemoryInterval <= 0 {
 		return fmt.Errorf("Free memory interval should be greater than zero")
 	}

+ 1 - 0
docs/_sidebar.md

@@ -23,6 +23,7 @@
   * [New Relic](new_relic)
   * [Prometheus](prometheus)
   * [Datadog](datadog)
+  * [OpenTelemetry](open_telemetry)
 * Miscellaneous
   * [Image formats support](image_formats_support)
   * [About processing pipeline](about_processing_pipeline)

+ 16 - 0
docs/configuration.md

@@ -395,6 +395,22 @@ imgproxy can send its metrics to Datadog:
 
 Check out the [Datadog](datadog.md) guide to learn more.
 
+## OpenTelemetry metrics
+
+imgproxy can send request traces to an OpenTelemetry collector:
+
+* `IMGPROXY_OPEN_TELEMETRY_ENDPOINT`: OpenTelemetry collector endpoint (`host:port`). Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_PROTOCOL`: OpenTelemetry collector protocol. Supported protocols are `grpc`, `https`, and `http`. Default: `grpc`
+* `IMGPROXY_OPEN_TELEMETRY_SERVICE_NAME`: OpenTelemetry service name. Default: `imgproxy`
+* `IMGPROXY_OPEN_TELEMETRY_ENABLE_METRICS`: when `true`, imgproxy will send metrics over OpenTelemetry Metrics API. Default: `false`
+* `IMGPROXY_OPEN_TELEMETRY_SERVER_CERT`: OpenTelemetry collector TLS certificate, PEM-encoded. Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_CLIENT_CERT`: OpenTelemetry client TLS certificate, PEM-encoded. Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_CLIENT_KEY`: OpenTelemetry client TLS key, PEM-encoded. Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_PROPAGATORS`: a list of OpenTelemetry text map propagators, comma divided. Supported propagators are `tracecontext`, `baggage`, `b3`, `b3multi`, `jaeger`, `xray`, and `ottrace`. Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_CONNECTION_TIMEOUT`: the maximum duration (in seconds) for establishing a connection to the OpenTelemetry collector. Default: `5`
+
+Check out the [OpenTelemetry](open_telemetry.md) guide to learn more.
+
 ## Error reporting
 
 imgproxy can report occurred errors to Bugsnag, Honeybadger and Sentry:

+ 47 - 0
docs/open_telemetry.md

@@ -0,0 +1,47 @@
+# OpenTelemetry
+
+imgproxy can send request traces to an OpenTelemetry collector. To use this feature, do the following:
+
+1. Install & configure the [OpenTelemetry collector](https://opentelemetry.io/docs/collector/).
+2. Specify the collector endpoint (`host:port`) with `IMGPROXY_OPEN_TELEMETRY_ENDPOINT` and the collector protocol with `IMGPROXY_OPEN_TELEMETRY_PROTOCOL`. Supported protocols are:
+    * `grpc` _(default)_
+    * `https`
+    * `http`.
+3. _(optional)_ Set the `IMGPROXY_OPEN_TELEMETRY_SERVICE_NAME` environment variable to be the desired service name.
+4. _(optional)_ Set the `IMGPROXY_OPEN_TELEMETRY_PROPAGATORS` environment variable to be the desired list of text map propagators. Supported propagators are:
+    * `tracecontext`: [W3C Trace Context](https://www.w3.org/TR/trace-context/)
+    * `baggage`: [W3C Baggage](https://www.w3.org/TR/baggage/)
+    * `b3`: [B3 Single](./context/api-propagators.md#configuration)
+    * `b3multi`: [B3 Multi](./context/api-propagators.md#configuration)
+    * `jaeger`: [Jaeger](https://www.jaegertracing.io/docs/1.21/client-libraries/#propagation-format)
+    * `xray`: [AWS X-Ray](https://docs.aws.amazon.com/xray/latest/devguide/xray-concepts.html#xray-concepts-tracingheader)
+    * `ottrace`: [OT Trace](https://github.com/opentracing?q=basic&type=&language=)
+5. _(optional)_ [Set up TLS certificates](#tls-configuration).
+6. _(optional)_ Set `IMGPROXY_OPEN_TELEMETRY_ENABLE_METRICS` to `true` to enable sending metrics via OpenTelemetry Metrics API.
+
+imgproxy will send the following info to the collector:
+
+* Response time
+* Queue time
+* Image downloading time
+* Image processing time
+* Errors that occurred while downloading and processing an image
+
+If `IMGPROXY_OPEN_TELEMETRY_ENABLE_METRICS` is set to `true`, imgproxy will also send the following metrics to the collector:
+
+* `requests_in_progress`: the number of requests currently in progress
+* `images_in_progress`: the number of images currently in progress
+* `vips_memory_bytes`: libvips memory usage
+* `vips_max_memory_bytes`: libvips maximum memory usage
+* `vips_allocs`: the number of active vips allocations
+
+## TLS Configuration
+
+If your OpenTelemetry collector is secured with TLS, you may need to specify the collector's certificate on the imgproxy side:
+
+* `IMGPROXY_OPEN_TELEMETRY_SERVER_CERT`: OpenTelemetry collector TLS certificate, PEM-encoded. Default: blank
+
+If your collector uses mTLS for mutual authentication, you'll also need to specify the client's certificate/key pair:
+
+* `IMGPROXY_OPEN_TELEMETRY_CLIENT_CERT`: OpenTelemetry client TLS certificate, PEM-encoded. Default: blank
+* `IMGPROXY_OPEN_TELEMETRY_CLIENT_KEY`: OpenTelemetry client TLS key, PEM-encoded. Default: blank

+ 52 - 3
go.mod

@@ -24,11 +24,27 @@ require (
 	github.com/stretchr/testify v1.8.0
 	github.com/tdewolff/parse/v2 v2.6.4
 	github.com/trimmer-io/go-xmp v1.0.0
+	go.opentelemetry.io/contrib/detectors/aws/ec2 v1.10.0
+	go.opentelemetry.io/contrib/detectors/aws/ecs v1.10.0
+	go.opentelemetry.io/contrib/detectors/aws/eks v1.10.0
+	go.opentelemetry.io/contrib/propagators/autoprop v0.36.0
+	go.opentelemetry.io/contrib/propagators/aws v1.10.0
+	go.opentelemetry.io/otel v1.10.0
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.32.1
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.32.1
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.10.0
+	go.opentelemetry.io/otel/metric v0.32.1
+	go.opentelemetry.io/otel/sdk v1.10.0
+	go.opentelemetry.io/otel/sdk/metric v0.32.1
+	go.opentelemetry.io/otel/trace v1.10.0
 	go.uber.org/automaxprocs v1.5.1
 	golang.org/x/image v0.0.0-20220902085622-e7cb96979f69
 	golang.org/x/net v0.0.0-20221004154528-8021a29435af
 	golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec
 	google.golang.org/api v0.98.0
+	google.golang.org/grpc v1.49.0
 	gopkg.in/DataDog/dd-trace-go.v1 v1.42.1
 )
 
@@ -39,37 +55,53 @@ require (
 	cloud.google.com/go/pubsub v1.25.1 // indirect
 	github.com/Azure/azure-pipeline-go v0.2.3 // indirect
 	github.com/DataDog/datadog-agent/pkg/obfuscate v0.39.0 // indirect
-	github.com/DataDog/datadog-go v4.8.3+incompatible // indirect
 	github.com/DataDog/sketches-go v1.4.1 // indirect
 	github.com/Microsoft/go-winio v0.6.0 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
 	github.com/StackExchange/wmi v1.2.1 // indirect
 	github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bugsnag/panicwrap v1.3.4 // indirect
 	github.com/caio/go-tdigest v3.1.0+incompatible // indirect
+	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dgraph-io/ristretto v0.1.0 // indirect
 	github.com/dustin/go-humanize v1.0.0 // indirect
+	github.com/emicklei/go-restful v2.9.5+incompatible // indirect
+	github.com/go-logr/logr v1.2.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.5 // indirect
+	github.com/go-openapi/swag v0.19.14 // indirect
 	github.com/gofrs/uuid v4.3.0+incompatible // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/glog v1.0.0 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/google/gnostic v0.5.7-v3refs // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.2.0 // indirect
 	github.com/googleapis/gax-go/v2 v2.5.1 // indirect
 	github.com/gorilla/handlers v1.5.1 // indirect
 	github.com/gorilla/mux v1.8.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
 	github.com/ianlancetaylor/cgosymbolizer v0.0.0-20220405231054-a1ae3e4bba26 // indirect
 	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/jonboulle/clockwork v0.3.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mattn/go-ieproxy v0.0.9 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pborman/uuid v1.2.1 // indirect
 	github.com/philhofer/fwd v1.1.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
@@ -81,22 +113,39 @@ require (
 	github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
 	github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 // indirect
 	github.com/shirou/gopsutil v3.21.11+incompatible // indirect
-	github.com/stretchr/objx v0.4.0 // indirect
 	github.com/tinylib/msgp v1.1.6 // indirect
 	go.opencensus.io v0.23.0 // indirect
+	go.opentelemetry.io/contrib/propagators/b3 v1.10.0 // indirect
+	go.opentelemetry.io/contrib/propagators/jaeger v1.10.0 // indirect
+	go.opentelemetry.io/contrib/propagators/ot v1.10.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.32.1 // indirect
+	go.opentelemetry.io/proto/otlp v0.19.0 // indirect
 	go.uber.org/atomic v1.9.0 // indirect
+	go.uber.org/multierr v1.8.0 // indirect
 	golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
 	golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 // indirect
 	golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0 // indirect
+	golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
 	golang.org/x/text v0.3.7 // indirect
 	golang.org/x/time v0.0.0-20220922220347-f3bd1da661af // indirect
 	golang.org/x/tools v0.1.12 // indirect
 	golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
 	google.golang.org/appengine v1.6.7 // indirect
 	google.golang.org/genproto v0.0.0-20220930163606-c98284e70a91 // indirect
-	google.golang.org/grpc v1.49.0 // indirect
 	google.golang.org/protobuf v1.28.1 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/api v0.24.3 // indirect
+	k8s.io/apimachinery v0.24.3 // indirect
+	k8s.io/client-go v0.24.3 // indirect
+	k8s.io/klog/v2 v2.60.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42 // indirect
+	k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 // indirect
+	sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
+	sigs.k8s.io/yaml v1.2.0 // indirect
 )
 
 replace git.apache.org/thrift.git => github.com/apache/thrift v0.0.0-20180902110319-2566ecd5d999

File diff suppressed because it is too large
+ 52 - 661
go.sum


+ 0 - 3
metrics/errformat/errformat.go

@@ -4,8 +4,6 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/sirupsen/logrus"
-
 	"github.com/imgproxy/imgproxy/v3/ierrors"
 )
 
@@ -14,7 +12,6 @@ func FormatErrType(errType string, err error) string {
 
 	if _, ok := err.(*ierrors.Error); !ok {
 		errType = fmt.Sprintf("%s (%s)", errType, reflect.TypeOf(err).String())
-		logrus.Warnf("ErrType: %s", errType)
 	}
 
 	return errType

+ 19 - 1
metrics/metrics.go

@@ -6,6 +6,7 @@ import (
 
 	"github.com/imgproxy/imgproxy/v3/metrics/datadog"
 	"github.com/imgproxy/imgproxy/v3/metrics/newrelic"
+	"github.com/imgproxy/imgproxy/v3/metrics/otel"
 	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
 )
 
@@ -18,29 +19,37 @@ func Init() error {
 
 	datadog.Init()
 
+	if err := otel.Init(); err != nil {
+		return err
+	}
+
 	return nil
 }
 
 func Stop() {
 	newrelic.Stop()
 	datadog.Stop()
+	otel.Stop()
 }
 
 func Enabled() bool {
 	return prometheus.Enabled() ||
 		newrelic.Enabled() ||
-		datadog.Enabled()
+		datadog.Enabled() ||
+		otel.Enabled()
 }
 
 func StartRequest(ctx context.Context, rw http.ResponseWriter, r *http.Request) (context.Context, context.CancelFunc, http.ResponseWriter) {
 	promCancel := prometheus.StartRequest()
 	ctx, nrCancel, rw := newrelic.StartTransaction(ctx, rw, r)
 	ctx, ddCancel, rw := datadog.StartRootSpan(ctx, rw, r)
+	ctx, otelCancel, rw := otel.StartRootSpan(ctx, rw, r)
 
 	cancel := func() {
 		promCancel()
 		nrCancel()
 		ddCancel()
+		otelCancel()
 	}
 
 	return ctx, cancel, rw
@@ -50,11 +59,13 @@ func StartQueueSegment(ctx context.Context) context.CancelFunc {
 	promCancel := prometheus.StartQueueSegment()
 	nrCancel := newrelic.StartSegment(ctx, "Queue")
 	ddCancel := datadog.StartSpan(ctx, "queue")
+	otelCancel := otel.StartSpan(ctx, "queue")
 
 	cancel := func() {
 		promCancel()
 		nrCancel()
 		ddCancel()
+		otelCancel()
 	}
 
 	return cancel
@@ -64,11 +75,13 @@ func StartDownloadingSegment(ctx context.Context) context.CancelFunc {
 	promCancel := prometheus.StartDownloadingSegment()
 	nrCancel := newrelic.StartSegment(ctx, "Downloading image")
 	ddCancel := datadog.StartSpan(ctx, "downloading_image")
+	otelCancel := otel.StartSpan(ctx, "downloading_image")
 
 	cancel := func() {
 		promCancel()
 		nrCancel()
 		ddCancel()
+		otelCancel()
 	}
 
 	return cancel
@@ -78,11 +91,13 @@ func StartProcessingSegment(ctx context.Context) context.CancelFunc {
 	promCancel := prometheus.StartProcessingSegment()
 	nrCancel := newrelic.StartSegment(ctx, "Processing image")
 	ddCancel := datadog.StartSpan(ctx, "processing_image")
+	otelCancel := otel.StartSpan(ctx, "processing_image")
 
 	cancel := func() {
 		promCancel()
 		nrCancel()
 		ddCancel()
+		otelCancel()
 	}
 
 	return cancel
@@ -92,11 +107,13 @@ func StartStreamingSegment(ctx context.Context) context.CancelFunc {
 	promCancel := prometheus.StartStreamingSegment()
 	nrCancel := newrelic.StartSegment(ctx, "Streaming image")
 	ddCancel := datadog.StartSpan(ctx, "streaming_image")
+	otelCancel := otel.StartSpan(ctx, "streaming_image")
 
 	cancel := func() {
 		promCancel()
 		nrCancel()
 		ddCancel()
+		otelCancel()
 	}
 
 	return cancel
@@ -106,6 +123,7 @@ func SendError(ctx context.Context, errType string, err error) {
 	prometheus.IncrementErrorsTotal(errType)
 	newrelic.SendError(ctx, errType, err)
 	datadog.SendError(ctx, errType, err)
+	otel.SendError(ctx, errType, err)
 }
 
 func ObserveBufferSize(t string, size int) {

+ 409 - 0
metrics/otel/otel.go

@@ -0,0 +1,409 @@
+package otel
+
+import (
+	"context"
+	"crypto/tls"
+	"crypto/x509"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/felixge/httpsnoop"
+	"github.com/sirupsen/logrus"
+	"go.opentelemetry.io/contrib/detectors/aws/ec2"
+	"go.opentelemetry.io/contrib/detectors/aws/ecs"
+	"go.opentelemetry.io/contrib/detectors/aws/eks"
+	"go.opentelemetry.io/contrib/propagators/autoprop"
+	"go.opentelemetry.io/contrib/propagators/aws/xray"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	"go.opentelemetry.io/otel/metric"
+	"go.opentelemetry.io/otel/metric/instrument"
+	"go.opentelemetry.io/otel/metric/unit"
+	"go.opentelemetry.io/otel/propagation"
+	sdkmetric "go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
+	"go.opentelemetry.io/otel/trace"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+
+	"github.com/imgproxy/imgproxy/v3/config"
+	"github.com/imgproxy/imgproxy/v3/ierrors"
+	"github.com/imgproxy/imgproxy/v3/metrics/errformat"
+	"github.com/imgproxy/imgproxy/v3/metrics/stats"
+)
+
+type hasSpanCtxKey struct{}
+
+type GaugeFunc func() float64
+
+var (
+	enabled        bool
+	enabledMetrics bool
+
+	tracerProvider *sdktrace.TracerProvider
+	tracer         trace.Tracer
+
+	meterProvider *sdkmetric.MeterProvider
+	meter         metric.Meter
+
+	propagator propagation.TextMapPropagator
+)
+
+func Init() error {
+	if len(config.OpenTelemetryEndpoint) == 0 {
+		return nil
+	}
+
+	otel.SetErrorHandler(&errorHandler{entry: logrus.WithField("from", "opentelemetry")})
+
+	var (
+		traceExporter  *otlptrace.Exporter
+		metricExporter sdkmetric.Exporter
+		err            error
+	)
+
+	switch config.OpenTelemetryProtocol {
+	case "grpc":
+		traceExporter, metricExporter, err = buildGRPCExporters()
+	case "https":
+		traceExporter, metricExporter, err = buildHTTPExporters(false)
+	case "http":
+		traceExporter, metricExporter, err = buildHTTPExporters(true)
+	default:
+		return fmt.Errorf("Unknown OpenTelemetry protocol: %s", config.OpenTelemetryProtocol)
+	}
+
+	if err != nil {
+		return err
+	}
+
+	res := resource.NewWithAttributes(
+		semconv.SchemaURL,
+		semconv.ServiceNameKey.String(config.OpenTelemetryServiceName),
+	)
+
+	awsRes, _ := resource.Detect(
+		context.Background(),
+		ec2.NewResourceDetector(),
+		ecs.NewResourceDetector(),
+		eks.NewResourceDetector(),
+	)
+
+	if awsRes != nil {
+		res, _ = resource.Merge(res, awsRes)
+	}
+
+	idg := xray.NewIDGenerator()
+
+	tracerProvider = sdktrace.NewTracerProvider(
+		sdktrace.WithResource(res),
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+		sdktrace.WithBatcher(traceExporter),
+		sdktrace.WithIDGenerator(idg),
+	)
+
+	tracer = tracerProvider.Tracer("imgproxy")
+
+	if len(config.OpenTelemetryPropagators) > 0 {
+		propagator, err = autoprop.TextMapPropagator(config.OpenTelemetryPropagators...)
+		if err != nil {
+			return err
+		}
+	}
+
+	enabled = true
+
+	if metricExporter == nil {
+		return nil
+	}
+
+	metricReader := sdkmetric.NewPeriodicReader(
+		metricExporter,
+		sdkmetric.WithInterval(5*time.Second),
+	)
+
+	meterProvider = sdkmetric.NewMeterProvider(
+		sdkmetric.WithResource(res),
+		sdkmetric.WithReader(metricReader),
+	)
+
+	meter = meterProvider.Meter("imgproxy")
+
+	enabledMetrics = true
+
+	AddGaugeFunc(
+		"requests_in_progress",
+		"A gauge of the number of requests currently being in progress.",
+		"1",
+		stats.RequestsInProgress,
+	)
+	AddGaugeFunc(
+		"images_in_progress",
+		"A gauge of the number of images currently being in progress.",
+		"1",
+		stats.ImagesInProgress,
+	)
+
+	return nil
+}
+
+func buildGRPCExporters() (*otlptrace.Exporter, sdkmetric.Exporter, error) {
+	tracerOpts := []otlptracegrpc.Option{
+		otlptracegrpc.WithEndpoint(config.OpenTelemetryEndpoint),
+		otlptracegrpc.WithDialOption(grpc.WithBlock()),
+	}
+
+	meterOpts := []otlpmetricgrpc.Option{
+		otlpmetricgrpc.WithEndpoint(config.OpenTelemetryEndpoint),
+		otlpmetricgrpc.WithDialOption(grpc.WithBlock()),
+	}
+
+	tlsConf, err := buildTLSConfig()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if tlsConf != nil {
+		creds := credentials.NewTLS(tlsConf)
+		tracerOpts = append(tracerOpts, otlptracegrpc.WithTLSCredentials(creds))
+		meterOpts = append(meterOpts, otlpmetricgrpc.WithTLSCredentials(creds))
+	} else {
+		tracerOpts = append(tracerOpts, otlptracegrpc.WithInsecure())
+		meterOpts = append(meterOpts, otlpmetricgrpc.WithInsecure())
+	}
+
+	trctx, trcancel := context.WithTimeout(
+		context.Background(),
+		time.Duration(config.OpenTelemetryConnectionTimeout)*time.Second,
+	)
+	defer trcancel()
+
+	traceExporter, err := otlptracegrpc.New(trctx, tracerOpts...)
+	if err != nil {
+		err = fmt.Errorf("Can't connect to OpenTelemetry collector: %s", err)
+	}
+
+	if !config.OpenTelemetryEnableMetrics {
+		return traceExporter, nil, err
+	}
+
+	mtctx, mtcancel := context.WithTimeout(
+		context.Background(),
+		time.Duration(config.OpenTelemetryConnectionTimeout)*time.Second,
+	)
+	defer mtcancel()
+
+	metricExporter, err := otlpmetricgrpc.New(mtctx, meterOpts...)
+	if err != nil {
+		err = fmt.Errorf("Can't connect to OpenTelemetry collector: %s", err)
+	}
+
+	return traceExporter, metricExporter, err
+}
+
+func buildHTTPExporters(insecure bool) (*otlptrace.Exporter, sdkmetric.Exporter, error) {
+	tracerOpts := []otlptracehttp.Option{
+		otlptracehttp.WithEndpoint(config.OpenTelemetryEndpoint),
+	}
+
+	meterOpts := []otlpmetrichttp.Option{
+		otlpmetrichttp.WithEndpoint(config.OpenTelemetryEndpoint),
+	}
+
+	if insecure {
+		tracerOpts = append(tracerOpts, otlptracehttp.WithInsecure())
+		meterOpts = append(meterOpts, otlpmetrichttp.WithInsecure())
+	} else {
+		tlsConf, err := buildTLSConfig()
+		if err != nil {
+			return nil, nil, err
+		}
+
+		if tlsConf != nil {
+			tracerOpts = append(tracerOpts, otlptracehttp.WithTLSClientConfig(tlsConf))
+			meterOpts = append(meterOpts, otlpmetrichttp.WithTLSClientConfig(tlsConf))
+		}
+	}
+
+	trctx, trcancel := context.WithTimeout(
+		context.Background(),
+		time.Duration(config.OpenTelemetryConnectionTimeout)*time.Second,
+	)
+	defer trcancel()
+
+	traceExporter, err := otlptracehttp.New(trctx, tracerOpts...)
+	if err != nil {
+		err = fmt.Errorf("Can't connect to OpenTelemetry collector: %s", err)
+	}
+
+	if !config.OpenTelemetryEnableMetrics {
+		return traceExporter, nil, err
+	}
+
+	mtctx, mtcancel := context.WithTimeout(
+		context.Background(),
+		time.Duration(config.OpenTelemetryConnectionTimeout)*time.Second,
+	)
+	defer mtcancel()
+
+	metricExporter, err := otlpmetrichttp.New(mtctx, meterOpts...)
+	if err != nil {
+		err = fmt.Errorf("Can't connect to OpenTelemetry collector: %s", err)
+	}
+
+	return traceExporter, metricExporter, err
+}
+
+func buildTLSConfig() (*tls.Config, error) {
+	if len(config.OpenTelemetryServerCert) == 0 {
+		return nil, nil
+	}
+
+	certPool := x509.NewCertPool()
+	if !certPool.AppendCertsFromPEM([]byte(config.OpenTelemetryServerCert)) {
+		return nil, fmt.Errorf("Can't load OpenTelemetry server cert")
+	}
+
+	tlsConf := tls.Config{RootCAs: certPool}
+
+	if len(config.OpenTelemetryClientCert) > 0 && len(config.OpenTelemetryClientKey) > 0 {
+		cert, err := tls.X509KeyPair(
+			[]byte(config.OpenTelemetryClientCert),
+			[]byte(config.OpenTelemetryClientKey),
+		)
+		if err != nil {
+			return nil, fmt.Errorf("Can't load OpenTelemetry client cert/key pair: %s", err)
+		}
+
+		tlsConf.Certificates = []tls.Certificate{cert}
+	}
+
+	return &tlsConf, nil
+}
+
+func Stop() {
+	if enabled {
+		trctx, trcancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer trcancel()
+
+		tracerProvider.Shutdown(trctx)
+
+		if meterProvider != nil {
+			mtctx, mtcancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer mtcancel()
+
+			meterProvider.Shutdown(mtctx)
+		}
+	}
+}
+
+func Enabled() bool {
+	return enabled
+}
+
+func StartRootSpan(ctx context.Context, rw http.ResponseWriter, r *http.Request) (context.Context, context.CancelFunc, http.ResponseWriter) {
+	if !enabled {
+		return ctx, func() {}, rw
+	}
+
+	if propagator != nil {
+		ctx = propagator.Extract(ctx, propagation.HeaderCarrier(r.Header))
+	}
+
+	ctx, span := tracer.Start(
+		ctx, "/request",
+		trace.WithSpanKind(trace.SpanKindServer),
+		trace.WithAttributes(semconv.NetAttributesFromHTTPRequest("tcp", r)...),
+		trace.WithAttributes(semconv.EndUserAttributesFromHTTPRequest(r)...),
+		trace.WithAttributes(semconv.HTTPServerAttributesFromHTTPRequest("imgproxy", "/", r)...),
+	)
+	ctx = context.WithValue(ctx, hasSpanCtxKey{}, struct{}{})
+
+	newRw := httpsnoop.Wrap(rw, httpsnoop.Hooks{
+		WriteHeader: func(next httpsnoop.WriteHeaderFunc) httpsnoop.WriteHeaderFunc {
+			return func(statusCode int) {
+				attrs := semconv.HTTPAttributesFromHTTPStatusCode(statusCode)
+				spanStatus, spanMessage := semconv.SpanStatusFromHTTPStatusCodeAndSpanKind(statusCode, trace.SpanKindServer)
+				span.SetAttributes(attrs...)
+				span.SetStatus(spanStatus, spanMessage)
+			}
+		},
+	})
+
+	cancel := func() { span.End() }
+	return ctx, cancel, newRw
+}
+
+func StartSpan(ctx context.Context, name string) context.CancelFunc {
+	if !enabled {
+		return func() {}
+	}
+
+	if ctx.Value(hasSpanCtxKey{}) != nil {
+		_, span := tracer.Start(ctx, name, trace.WithSpanKind(trace.SpanKindInternal))
+
+		return func() { span.End() }
+	}
+
+	return func() {}
+}
+
+func SendError(ctx context.Context, errType string, err error) {
+	if !enabled {
+		return
+	}
+
+	span := trace.SpanFromContext(ctx)
+
+	attributes := []attribute.KeyValue{
+		semconv.ExceptionTypeKey.String(errformat.FormatErrType(errType, err)),
+		semconv.ExceptionMessageKey.String(err.Error()),
+	}
+
+	if ierr, ok := err.(*ierrors.Error); ok {
+		if stack := ierr.FormatStack(); len(stack) != 0 {
+			attributes = append(attributes, semconv.ExceptionStacktraceKey.String(stack))
+		}
+	}
+
+	span.AddEvent(semconv.ExceptionEventName, trace.WithAttributes(attributes...))
+}
+
+func AddGaugeFunc(name, desc, u string, f GaugeFunc) {
+	if !enabledMetrics {
+		return
+	}
+
+	gauge, _ := meter.AsyncFloat64().Gauge(
+		name,
+		instrument.WithUnit(unit.Unit(u)),
+		instrument.WithDescription(desc),
+	)
+
+	if err := meter.RegisterCallback(
+		[]instrument.Asynchronous{
+			gauge,
+		},
+		func(ctx context.Context) {
+			gauge.Observe(ctx, f())
+		},
+	); err != nil {
+		logrus.Warnf("Can't add %s gauge to OpenTelemetry: %s", name, err)
+	}
+}
+
+type errorHandler struct {
+	entry *logrus.Entry
+}
+
+func (h *errorHandler) Handle(err error) {
+	h.entry.Warn(err.Error())
+}

+ 20 - 0
vips/vips.go

@@ -23,6 +23,7 @@ import (
 	"github.com/imgproxy/imgproxy/v3/imagetype"
 	"github.com/imgproxy/imgproxy/v3/metrics/datadog"
 	"github.com/imgproxy/imgproxy/v3/metrics/newrelic"
+	"github.com/imgproxy/imgproxy/v3/metrics/otel"
 	"github.com/imgproxy/imgproxy/v3/metrics/prometheus"
 )
 
@@ -105,6 +106,25 @@ func Init() error {
 	newrelic.AddGaugeFunc("vips.max_memory", GetMemHighwater)
 	newrelic.AddGaugeFunc("vips.allocs", GetAllocs)
 
+	otel.AddGaugeFunc(
+		"vips_memory_bytes",
+		"A gauge of the vips tracked memory usage in bytes.",
+		"By",
+		GetMem,
+	)
+	otel.AddGaugeFunc(
+		"vips_max_memory_bytes",
+		"A gauge of the max vips tracked memory usage in bytes.",
+		"By",
+		GetMemHighwater,
+	)
+	otel.AddGaugeFunc(
+		"vips_allocs",
+		"A gauge of the number of active vips allocations.",
+		"By",
+		GetAllocs,
+	)
+
 	return nil
 }
 

Some files were not shown because too many files changed in this diff