metrics.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. """OpenTelemetry metrics bootstrap for Open WebUI.
  2. This module initialises a MeterProvider that sends metrics to an OTLP
  3. collector. The collector is responsible for exposing a Prometheus
  4. `/metrics` endpoint – WebUI does **not** expose it directly.
  5. Metrics collected:
  6. * http.server.requests (counter)
  7. * http.server.duration (histogram, milliseconds)
  8. Attributes used: http.method, http.route, http.status_code
  9. If you wish to add more attributes (e.g. user-agent) you can, but beware of
  10. high-cardinality label sets.
  11. """
  12. from __future__ import annotations
  13. import time
  14. from typing import Dict, List, Sequence, Any
  15. from fastapi import FastAPI, Request
  16. from opentelemetry import metrics
  17. from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
  18. OTLPMetricExporter,
  19. )
  20. from opentelemetry.sdk.metrics import MeterProvider
  21. from opentelemetry.sdk.metrics.view import View
  22. from opentelemetry.sdk.metrics.export import (
  23. PeriodicExportingMetricReader,
  24. )
  25. from opentelemetry.sdk.resources import SERVICE_NAME, Resource
  26. from open_webui.env import OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT
  27. from open_webui.socket.main import get_active_user_ids
  28. from open_webui.models.users import Users
  29. _EXPORT_INTERVAL_MILLIS = 10_000 # 10 seconds
  30. def _build_meter_provider() -> MeterProvider:
  31. """Return a configured MeterProvider."""
  32. # Periodic reader pushes metrics over OTLP/gRPC to collector
  33. readers: List[PeriodicExportingMetricReader] = [
  34. PeriodicExportingMetricReader(
  35. OTLPMetricExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT),
  36. export_interval_millis=_EXPORT_INTERVAL_MILLIS,
  37. )
  38. ]
  39. # Optional view to limit cardinality: drop user-agent etc.
  40. views: List[View] = [
  41. View(
  42. instrument_name="http.server.duration",
  43. attribute_keys=["http.method", "http.route", "http.status_code"],
  44. ),
  45. View(
  46. instrument_name="http.server.requests",
  47. attribute_keys=["http.method", "http.route", "http.status_code"],
  48. ),
  49. View(
  50. instrument_name="webui.users.total",
  51. ),
  52. View(
  53. instrument_name="webui.users.active",
  54. ),
  55. ]
  56. provider = MeterProvider(
  57. resource=Resource.create({SERVICE_NAME: OTEL_SERVICE_NAME}),
  58. metric_readers=list(readers),
  59. views=views,
  60. )
  61. return provider
  62. def setup_metrics(app: FastAPI) -> None:
  63. """Attach OTel metrics middleware to *app* and initialise provider."""
  64. metrics.set_meter_provider(_build_meter_provider())
  65. meter = metrics.get_meter(__name__)
  66. # Instruments
  67. request_counter = meter.create_counter(
  68. name="http.server.requests",
  69. description="Total HTTP requests",
  70. unit="1",
  71. )
  72. duration_histogram = meter.create_histogram(
  73. name="http.server.duration",
  74. description="HTTP request duration",
  75. unit="ms",
  76. )
  77. def observe_active_users(
  78. options: metrics.CallbackOptions,
  79. ) -> Sequence[metrics.Observation]:
  80. return [
  81. metrics.Observation(
  82. value=len(get_active_user_ids()),
  83. )
  84. ]
  85. def observe_total_registered_users(
  86. options: metrics.CallbackOptions,
  87. ) -> Sequence[metrics.Observation]:
  88. return [
  89. metrics.Observation(
  90. value=len(Users.get_users()["users"]),
  91. )
  92. ]
  93. meter.create_observable_gauge(
  94. name="webui.users.total",
  95. description="Total number of registered users",
  96. unit="users",
  97. callbacks=[observe_total_registered_users],
  98. )
  99. meter.create_observable_gauge(
  100. name="webui.users.active",
  101. description="Number of currently active users",
  102. unit="users",
  103. callbacks=[observe_active_users],
  104. )
  105. # FastAPI middleware
  106. @app.middleware("http")
  107. async def _metrics_middleware(request: Request, call_next):
  108. start_time = time.perf_counter()
  109. response = await call_next(request)
  110. elapsed_ms = (time.perf_counter() - start_time) * 1000.0
  111. # Route template e.g. "/items/{item_id}" instead of real path.
  112. route = request.scope.get("route")
  113. route_path = getattr(route, "path", request.url.path)
  114. attrs: Dict[str, str | int] = {
  115. "http.method": request.method,
  116. "http.route": route_path,
  117. "http.status_code": response.status_code,
  118. }
  119. request_counter.add(1, attrs)
  120. duration_histogram.record(elapsed_ms, attrs)
  121. return response