prometheus.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. package prometheus
  2. import (
  3. "context"
  4. "fmt"
  5. "log/slog"
  6. "net"
  7. "net/http"
  8. "strconv"
  9. "time"
  10. "github.com/felixge/httpsnoop"
  11. "github.com/prometheus/client_golang/prometheus"
  12. "github.com/prometheus/client_golang/prometheus/promhttp"
  13. "github.com/imgproxy/imgproxy/v3/monitoring/stats"
  14. "github.com/imgproxy/imgproxy/v3/vips"
  15. )
  16. // Prometheus holds Prometheus metrics and configuration
  17. type Prometheus struct {
  18. config *Config
  19. stats *stats.Stats
  20. requestsTotal prometheus.Counter
  21. statusCodesTotal *prometheus.CounterVec
  22. errorsTotal *prometheus.CounterVec
  23. requestDuration prometheus.Histogram
  24. requestSpanDuration *prometheus.HistogramVec
  25. downloadDuration prometheus.Histogram
  26. processingDuration prometheus.Histogram
  27. workers prometheus.Gauge
  28. }
  29. // New creates a new Prometheus instance
  30. func New(config *Config, stats *stats.Stats) (*Prometheus, error) {
  31. p := &Prometheus{
  32. config: config,
  33. stats: stats,
  34. }
  35. if !config.Enabled() {
  36. return p, nil
  37. }
  38. if err := config.Validate(); err != nil {
  39. return nil, err
  40. }
  41. p.requestsTotal = prometheus.NewCounter(prometheus.CounterOpts{
  42. Namespace: config.Namespace,
  43. Name: "requests_total",
  44. Help: "A counter of the total number of HTTP requests imgproxy processed.",
  45. })
  46. p.statusCodesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
  47. Namespace: config.Namespace,
  48. Name: "status_codes_total",
  49. Help: "A counter of the response status codes.",
  50. }, []string{"status"})
  51. p.errorsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
  52. Namespace: config.Namespace,
  53. Name: "errors_total",
  54. Help: "A counter of the occurred errors separated by type.",
  55. }, []string{"type"})
  56. p.requestDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
  57. Namespace: config.Namespace,
  58. Name: "request_duration_seconds",
  59. Help: "A histogram of the response latency.",
  60. })
  61. p.requestSpanDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
  62. Namespace: config.Namespace,
  63. Name: "request_span_duration_seconds",
  64. Help: "A histogram of the queue latency.",
  65. }, []string{"span"})
  66. p.downloadDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
  67. Namespace: config.Namespace,
  68. Name: "download_duration_seconds",
  69. Help: "A histogram of the source image downloading latency.",
  70. })
  71. p.processingDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
  72. Namespace: config.Namespace,
  73. Name: "processing_duration_seconds",
  74. Help: "A histogram of the image processing latency.",
  75. })
  76. p.workers = prometheus.NewGauge(prometheus.GaugeOpts{
  77. Namespace: config.Namespace,
  78. Name: "workers",
  79. Help: "A gauge of the number of running workers.",
  80. })
  81. p.workers.Set(float64(stats.WorkersNumber))
  82. requestsInProgress := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  83. Namespace: config.Namespace,
  84. Name: "requests_in_progress",
  85. Help: "A gauge of the number of requests currently being in progress.",
  86. }, stats.RequestsInProgress)
  87. imagesInProgress := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  88. Namespace: config.Namespace,
  89. Name: "images_in_progress",
  90. Help: "A gauge of the number of images currently being in progress.",
  91. }, stats.ImagesInProgress)
  92. workersUtilization := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  93. Namespace: config.Namespace,
  94. Name: "workers_utilization",
  95. Help: "A gauge of the workers utilization in percents.",
  96. }, stats.WorkersUtilization)
  97. vipsMemoryBytes := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  98. Namespace: config.Namespace,
  99. Name: "vips_memory_bytes",
  100. Help: "A gauge of the vips tracked memory usage in bytes.",
  101. }, vips.GetMem)
  102. vipsMaxMemoryBytes := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  103. Namespace: config.Namespace,
  104. Name: "vips_max_memory_bytes",
  105. Help: "A gauge of the max vips tracked memory usage in bytes.",
  106. }, vips.GetMemHighwater)
  107. vipsAllocs := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
  108. Namespace: config.Namespace,
  109. Name: "vips_allocs",
  110. Help: "A gauge of the number of active vips allocations.",
  111. }, vips.GetAllocs)
  112. prometheus.MustRegister(
  113. p.requestsTotal,
  114. p.statusCodesTotal,
  115. p.errorsTotal,
  116. p.requestDuration,
  117. p.requestSpanDuration,
  118. p.downloadDuration,
  119. p.processingDuration,
  120. p.workers,
  121. requestsInProgress,
  122. imagesInProgress,
  123. workersUtilization,
  124. vipsMemoryBytes,
  125. vipsMaxMemoryBytes,
  126. vipsAllocs,
  127. )
  128. return p, nil
  129. }
  130. // Enabled returns true if Prometheus monitoring is enabled
  131. func (p *Prometheus) Enabled() bool {
  132. return p.config.Enabled()
  133. }
  134. // StartServer starts the Prometheus metrics server
  135. func (p *Prometheus) StartServer(cancel context.CancelFunc) error {
  136. // If not enabled, do nothing
  137. if !p.Enabled() {
  138. return nil
  139. }
  140. s := http.Server{Handler: promhttp.Handler()}
  141. l, err := net.Listen("tcp", p.config.Bind)
  142. if err != nil {
  143. return fmt.Errorf("can't start Prometheus metrics server: %s", err)
  144. }
  145. go func() {
  146. slog.Info(fmt.Sprintf("Starting Prometheus server at %s", p.config.Bind))
  147. if err := s.Serve(l); err != nil && err != http.ErrServerClosed {
  148. slog.Error(err.Error())
  149. }
  150. cancel()
  151. }()
  152. return nil
  153. }
  154. func (p *Prometheus) StartRequest(rw http.ResponseWriter) (context.CancelFunc, http.ResponseWriter) {
  155. if !p.Enabled() {
  156. return func() {}, rw
  157. }
  158. p.requestsTotal.Inc()
  159. newRw := httpsnoop.Wrap(rw, httpsnoop.Hooks{
  160. WriteHeader: func(next httpsnoop.WriteHeaderFunc) httpsnoop.WriteHeaderFunc {
  161. return func(statusCode int) {
  162. p.statusCodesTotal.With(prometheus.Labels{"status": strconv.Itoa(statusCode)}).Inc()
  163. next(statusCode)
  164. }
  165. },
  166. })
  167. return p.startDuration(p.requestDuration), newRw
  168. }
  169. func (p *Prometheus) StartQueueSegment() context.CancelFunc {
  170. if !p.Enabled() {
  171. return func() {}
  172. }
  173. return p.startDuration(p.requestSpanDuration.With(prometheus.Labels{"span": "queue"}))
  174. }
  175. func (p *Prometheus) StartDownloadingSegment() context.CancelFunc {
  176. if !p.Enabled() {
  177. return func() {}
  178. }
  179. cancel := p.startDuration(p.requestSpanDuration.With(prometheus.Labels{"span": "downloading"}))
  180. cancelLegacy := p.startDuration(p.downloadDuration)
  181. return func() {
  182. cancel()
  183. cancelLegacy()
  184. }
  185. }
  186. func (p *Prometheus) StartProcessingSegment() context.CancelFunc {
  187. if !p.Enabled() {
  188. return func() {}
  189. }
  190. cancel := p.startDuration(p.requestSpanDuration.With(prometheus.Labels{"span": "processing"}))
  191. cancelLegacy := p.startDuration(p.processingDuration)
  192. return func() {
  193. cancel()
  194. cancelLegacy()
  195. }
  196. }
  197. func (p *Prometheus) StartStreamingSegment() context.CancelFunc {
  198. if !p.Enabled() {
  199. return func() {}
  200. }
  201. return p.startDuration(p.requestSpanDuration.With(prometheus.Labels{"span": "streaming"}))
  202. }
  203. func (p *Prometheus) startDuration(m prometheus.Observer) context.CancelFunc {
  204. t := time.Now()
  205. return func() {
  206. m.Observe(time.Since(t).Seconds())
  207. }
  208. }
  209. func (p *Prometheus) IncrementErrorsTotal(t string) {
  210. if !p.Enabled() {
  211. return
  212. }
  213. p.errorsTotal.With(prometheus.Labels{"type": t}).Inc()
  214. }