1
0

metrics.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. """OpenTelemetry metrics bootstrap for Open WebUI.
  2. This module initialises a MeterProvider that sends metrics to an OTLP
  3. collector. The collector is responsible for exposing a Prometheus
  4. `/metrics` endpoint – WebUI does **not** expose it directly.
  5. Metrics collected:
  6. * http.server.requests (counter)
  7. * http.server.duration (histogram, milliseconds)
  8. Attributes used: http.method, http.route, http.status_code
  9. If you wish to add more attributes (e.g. user-agent) you can, but beware of
  10. high-cardinality label sets.
  11. """
  12. from __future__ import annotations
  13. import time
  14. from typing import Dict, List, Sequence, Any
  15. from base64 import b64encode
  16. from fastapi import FastAPI, Request
  17. from opentelemetry import metrics
  18. from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
  19. OTLPMetricExporter,
  20. )
  21. from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
  22. OTLPMetricExporter as OTLPHttpMetricExporter,
  23. )
  24. from opentelemetry.sdk.metrics import MeterProvider
  25. from opentelemetry.sdk.metrics.view import View
  26. from opentelemetry.sdk.metrics.export import (
  27. PeriodicExportingMetricReader,
  28. )
  29. from opentelemetry.sdk.resources import Resource
  30. from open_webui.env import (
  31. OTEL_SERVICE_NAME,
  32. OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
  33. OTEL_METRICS_BASIC_AUTH_USERNAME,
  34. OTEL_METRICS_BASIC_AUTH_PASSWORD,
  35. OTEL_METRICS_OTLP_SPAN_EXPORTER,
  36. OTEL_METRICS_EXPORTER_OTLP_INSECURE,
  37. )
  38. from open_webui.socket.main import get_active_user_ids
  39. from open_webui.models.users import Users
  40. _EXPORT_INTERVAL_MILLIS = 10_000 # 10 seconds
  41. def _build_meter_provider(resource: Resource) -> MeterProvider:
  42. """Return a configured MeterProvider."""
  43. headers = []
  44. if OTEL_METRICS_BASIC_AUTH_USERNAME and OTEL_METRICS_BASIC_AUTH_PASSWORD:
  45. auth_string = (
  46. f"{OTEL_METRICS_BASIC_AUTH_USERNAME}:{OTEL_METRICS_BASIC_AUTH_PASSWORD}"
  47. )
  48. auth_header = b64encode(auth_string.encode()).decode()
  49. headers = [("authorization", f"Basic {auth_header}")]
  50. # Periodic reader pushes metrics over OTLP/gRPC to collector
  51. if OTEL_METRICS_OTLP_SPAN_EXPORTER == "http":
  52. readers: List[PeriodicExportingMetricReader] = [
  53. PeriodicExportingMetricReader(
  54. OTLPHttpMetricExporter(
  55. endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT, headers=headers
  56. ),
  57. export_interval_millis=_EXPORT_INTERVAL_MILLIS,
  58. )
  59. ]
  60. else:
  61. readers: List[PeriodicExportingMetricReader] = [
  62. PeriodicExportingMetricReader(
  63. OTLPMetricExporter(
  64. endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
  65. insecure=OTEL_METRICS_EXPORTER_OTLP_INSECURE,
  66. headers=headers,
  67. ),
  68. export_interval_millis=_EXPORT_INTERVAL_MILLIS,
  69. )
  70. ]
  71. # Optional view to limit cardinality: drop user-agent etc.
  72. views: List[View] = [
  73. View(
  74. instrument_name="http.server.duration",
  75. attribute_keys=["http.method", "http.route", "http.status_code"],
  76. ),
  77. View(
  78. instrument_name="http.server.requests",
  79. attribute_keys=["http.method", "http.route", "http.status_code"],
  80. ),
  81. View(
  82. instrument_name="webui.users.total",
  83. ),
  84. View(
  85. instrument_name="webui.users.active",
  86. ),
  87. ]
  88. provider = MeterProvider(
  89. resource=resource,
  90. metric_readers=list(readers),
  91. views=views,
  92. )
  93. return provider
  94. def setup_metrics(app: FastAPI, resource: Resource) -> None:
  95. """Attach OTel metrics middleware to *app* and initialise provider."""
  96. metrics.set_meter_provider(_build_meter_provider(resource))
  97. meter = metrics.get_meter(__name__)
  98. # Instruments
  99. request_counter = meter.create_counter(
  100. name="http.server.requests",
  101. description="Total HTTP requests",
  102. unit="1",
  103. )
  104. duration_histogram = meter.create_histogram(
  105. name="http.server.duration",
  106. description="HTTP request duration",
  107. unit="ms",
  108. )
  109. def observe_active_users(
  110. options: metrics.CallbackOptions,
  111. ) -> Sequence[metrics.Observation]:
  112. return [
  113. metrics.Observation(
  114. value=len(get_active_user_ids()),
  115. )
  116. ]
  117. def observe_total_registered_users(
  118. options: metrics.CallbackOptions,
  119. ) -> Sequence[metrics.Observation]:
  120. return [
  121. metrics.Observation(
  122. value=len(Users.get_users()["users"]),
  123. )
  124. ]
  125. meter.create_observable_gauge(
  126. name="webui.users.total",
  127. description="Total number of registered users",
  128. unit="users",
  129. callbacks=[observe_total_registered_users],
  130. )
  131. meter.create_observable_gauge(
  132. name="webui.users.active",
  133. description="Number of currently active users",
  134. unit="users",
  135. callbacks=[observe_active_users],
  136. )
  137. # FastAPI middleware
  138. @app.middleware("http")
  139. async def _metrics_middleware(request: Request, call_next):
  140. start_time = time.perf_counter()
  141. status_code = None
  142. try:
  143. response = await call_next(request)
  144. status_code = getattr(response, "status_code", 500)
  145. return response
  146. except Exception:
  147. status_code = 500
  148. raise
  149. finally:
  150. elapsed_ms = (time.perf_counter() - start_time) * 1000.0
  151. # Route template e.g. "/items/{item_id}" instead of real path.
  152. route = request.scope.get("route")
  153. route_path = getattr(route, "path", request.url.path)
  154. attrs: Dict[str, str | int] = {
  155. "http.method": request.method,
  156. "http.route": route_path,
  157. "http.status_code": status_code,
  158. }
  159. request_counter.add(1, attrs)
  160. duration_histogram.record(elapsed_ms, attrs)