Browse Source

feat: Add OpenTelemetry Metrics Support via OTLP Exporter

Jason Kidd 1 month ago
parent
commit
210dc746f0

+ 1 - 0
backend/open_webui/env.py

@@ -539,6 +539,7 @@ AUDIT_EXCLUDED_PATHS = [path.lstrip("/") for path in AUDIT_EXCLUDED_PATHS]
 ####################################
 
 ENABLE_OTEL = os.environ.get("ENABLE_OTEL", "False").lower() == "true"
+ENABLE_OTEL_METRICS = os.environ.get("ENABLE_OTEL_METRICS", "False").lower() == "true"
 OTEL_EXPORTER_OTLP_ENDPOINT = os.environ.get(
     "OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317"
 )

+ 110 - 0
backend/open_webui/utils/telemetry/metrics.py

@@ -0,0 +1,110 @@
+"""OpenTelemetry metrics bootstrap for Open WebUI.
+
+This module initialises a MeterProvider that sends metrics to an OTLP
+collector. The collector is responsible for exposing a Prometheus
+`/metrics` endpoint – WebUI does **not** expose it directly.
+
+Metrics collected:
+
+* http.server.requests (counter)
+* http.server.duration (histogram, milliseconds)
+
+Attributes used: http.method, http.route, http.status_code
+
+If you wish to add more attributes (e.g. user-agent) you can, but beware of
+high-cardinality label sets.
+"""
+
+from __future__ import annotations
+
+import time
+from typing import Dict, List, Sequence, Any
+
+from fastapi import FastAPI, Request
+from opentelemetry import metrics
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
+    OTLPMetricExporter,
+)
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.view import View
+from opentelemetry.sdk.metrics.export import (
+    PeriodicExportingMetricReader,
+)
+from opentelemetry.sdk.resources import SERVICE_NAME, Resource
+
+from open_webui.env import OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT
+
+
+_EXPORT_INTERVAL_MILLIS = 10_000  # 10 seconds
+
+
+def _build_meter_provider() -> MeterProvider:
+    """Return a configured MeterProvider."""
+
+    # Periodic reader pushes metrics over OTLP/gRPC to collector
+    readers: List[PeriodicExportingMetricReader] = [
+        PeriodicExportingMetricReader(
+            OTLPMetricExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT),
+            export_interval_millis=_EXPORT_INTERVAL_MILLIS,
+        )
+    ]
+
+    # Optional view to limit cardinality: drop user-agent etc.
+    views: List[View] = [
+        View(
+            instrument_name="http.server.duration",
+            attribute_keys=["http.method", "http.route", "http.status_code"],
+        ),
+        View(
+            instrument_name="http.server.requests",
+            attribute_keys=["http.method", "http.route", "http.status_code"],
+        ),
+    ]
+
+    provider = MeterProvider(
+        resource=Resource.create({SERVICE_NAME: OTEL_SERVICE_NAME}),
+        metric_readers=list(readers),
+        views=views,
+    )
+    return provider
+
+
+def setup_metrics(app: FastAPI) -> None:
+    """Attach OTel metrics middleware to *app* and initialise provider."""
+
+    metrics.set_meter_provider(_build_meter_provider())
+    meter = metrics.get_meter(__name__)
+
+    # Instruments
+    request_counter = meter.create_counter(
+        name="http.server.requests",
+        description="Total HTTP requests",
+        unit="1",
+    )
+    duration_histogram = meter.create_histogram(
+        name="http.server.duration",
+        description="HTTP request duration",
+        unit="ms",
+    )
+
+    # FastAPI middleware
+    @app.middleware("http")
+    async def _metrics_middleware(request: Request, call_next):
+        start_time = time.perf_counter()
+        response = await call_next(request)
+        elapsed_ms = (time.perf_counter() - start_time) * 1000.0
+
+        # Route template e.g. "/items/{item_id}" instead of real path.
+        route = request.scope.get("route")
+        route_path = getattr(route, "path", request.url.path)
+
+        attrs: Dict[str, str | int] = {
+            "http.method": request.method,
+            "http.route": route_path,
+            "http.status_code": response.status_code,
+        }
+
+        request_counter.add(1, attrs)
+        duration_histogram.record(elapsed_ms, attrs)
+
+        return response

+ 10 - 1
backend/open_webui/utils/telemetry/setup.py

@@ -7,7 +7,12 @@ from sqlalchemy import Engine
 
 from open_webui.utils.telemetry.exporters import LazyBatchSpanProcessor
 from open_webui.utils.telemetry.instrumentors import Instrumentor
-from open_webui.env import OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT
+from open_webui.utils.telemetry.metrics import setup_metrics
+from open_webui.env import (
+    OTEL_SERVICE_NAME,
+    OTEL_EXPORTER_OTLP_ENDPOINT,
+    ENABLE_OTEL_METRICS,
+)
 
 
 def setup(app: FastAPI, db_engine: Engine):
@@ -21,3 +26,7 @@ def setup(app: FastAPI, db_engine: Engine):
     exporter = OTLPSpanExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT)
     trace.get_tracer_provider().add_span_processor(LazyBatchSpanProcessor(exporter))
     Instrumentor(app=app, db_engine=db_engine).instrument()
+
+    # set up metrics only if enabled
+    if ENABLE_OTEL_METRICS:
+        setup_metrics(app)