Ver código fonte

rip out stats bloat

Alex Cheema 4 meses atrás
pai
commit
06c2e236b8

+ 0 - 5
exo/main.py

@@ -52,7 +52,6 @@ parser.add_argument("--models-seed-dir", type=str, default=None, help="Model see
 parser.add_argument("--listen-port", type=int, default=5678, help="Listening port for discovery")
 parser.add_argument("--download-quick-check", action="store_true", help="Quick check local path for model shards download")
 parser.add_argument("--max-parallel-downloads", type=int, default=4, help="Max parallel downloads for model shards download")
-parser.add_argument("--prometheus-client-port", type=int, default=None, help="Prometheus client port")
 parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery")
 parser.add_argument("--discovery-module", type=str, choices=["udp", "tailscale", "manual"], default="udp", help="Discovery module to use")
 parser.add_argument("--discovery-timeout", type=int, default=30, help="Discovery timeout in seconds")
@@ -170,10 +169,6 @@ def preemptively_start_download(request_id: str, opaque_status: str):
 
 node.on_opaque_status.register("start_download").on_next(preemptively_start_download)
 
-if args.prometheus_client_port:
-  from exo.stats.metrics import start_metrics_server
-  start_metrics_server(node, args.prometheus_client_port)
-
 last_broadcast_time = 0
 
 

+ 0 - 0
exo/stats/__init__.py


+ 0 - 27
exo/stats/docker-compose-stats.yml

@@ -1,27 +0,0 @@
-version: '3.8'
-
-services:
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: prometheus
-    volumes:
-      - ./prometheus.yml:/etc/prometheus/prometheus.yml
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-    ports:
-      - "9090:9090"
-    networks:
-      - monitoring
-
-  grafana:
-    image: grafana/grafana:latest
-    container_name: grafana
-    ports:
-      - "3000:3000"
-    networks:
-      - monitoring
-    depends_on:
-      - prometheus
-
-networks:
-  monitoring:

+ 0 - 29
exo/stats/metrics.py

@@ -1,29 +0,0 @@
-from exo.orchestration import Node
-from prometheus_client import start_http_server, Counter, Histogram
-import json
-
-# Create metrics to track time spent and requests made.
-PROCESS_PROMPT_COUNTER = Counter("process_prompt_total", "Total number of prompts processed", ["node_id"])
-PROCESS_TENSOR_COUNTER = Counter("process_tensor_total", "Total number of tensors processed", ["node_id"])
-PROCESS_TENSOR_TIME = Histogram("process_tensor_seconds", "Time spent processing tensor", ["node_id"])
-
-
-def start_metrics_server(node: Node, port: int):
-  start_http_server(port)
-
-  def _on_opaque_status(request_id, opaque_status: str):
-    status_data = json.loads(opaque_status)
-    _type = status_data.get("type", "")
-    node_id = status_data.get("node_id", "")
-    if _type != "node_status":
-      return
-    status = status_data.get("status", "")
-
-    if status == "end_process_prompt":
-      PROCESS_PROMPT_COUNTER.labels(node_id=node_id).inc()
-    elif status == "end_process_tensor":
-      elapsed_time_ns = status_data.get("elapsed_time_ns", 0)
-      PROCESS_TENSOR_COUNTER.labels(node_id=node_id).inc()
-      PROCESS_TENSOR_TIME.labels(node_id=node_id).observe(elapsed_time_ns/1e9)  # Convert ns to seconds
-
-  node.on_opaque_status.register("stats").on_next(_on_opaque_status)

+ 0 - 7
exo/stats/prometheus.yml

@@ -1,7 +0,0 @@
-global:
-  scrape_interval: 15s
-
-scrape_configs:
-  - job_name: 'exo-node'
-    static_configs:
-      - targets: ['host.docker.internal:8005']