Pranav Veldurthi před 9 měsíci
rodič
revize
497756f7c8

+ 106 - 19
.circleci/config.yml

@@ -27,12 +27,23 @@ commands:
             fi
 
             # Start first instance
-            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 2>&1 | tee output1.log &
+            HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> \
+              --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \
+              --chatgpt-api-response-timeout 900 --disable-tui > output1.log &
             PID1=$!
+            tail -f output1.log &
+            TAIL1=$!
 
             # Start second instance
-            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 2>&1 | tee output2.log &
+            HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> \
+              --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \
+              --chatgpt-api-response-timeout 900 --disable-tui > output2.log &
             PID2=$!
+            tail -f output2.log &
+            TAIL2=$!
+
+            # Remember to kill the tail processes at the end
+            trap 'kill $TAIL1 $TAIL2' EXIT
 
             # Wait for discovery
             sleep 10
@@ -84,18 +95,22 @@ commands:
             kill $PID1 $PID2
 
             echo ""
-            if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
-              echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
-              echo "Response 1: $response_1"
+            # Extract content using jq and check if it contains expected output
+            content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
+            content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
+
+            if [[ "$content1" != *"<<parameters.expected_output>>"* ]] || [[ "$content2" != *"<<parameters.expected_output>>"* ]]; then
+              echo "Test failed: Response does not match '<<parameters.expected_output>>'"
+              echo "Response 1 content: $content1"
               echo ""
-              echo "Response 2: $response_2"
+              echo "Response 2 content: $content2"
               echo "Output of first instance:"
               cat output1.log
               echo "Output of second instance:"
               cat output2.log
               exit 1
             else
-              echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
+              echo "Test passed: Response from both nodes matches '<<parameters.expected_output>>'"
             fi
 
 jobs:
@@ -149,9 +164,9 @@ jobs:
           name: Run discovery integration test
           command: |
             source env/bin/activate
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
             PID1=$!
-            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
+            DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
             PID2=$!
             sleep 10
             kill $PID1 $PID2
@@ -211,18 +226,10 @@ jobs:
             pip install .
       - run_chatgpt_api_test:
           inference_engine: dummy
-          model_id: dummy-model
+          model_id: dummy
           prompt: "Dummy prompt."
           expected_output: "dummy"
 
-  test_macos_m1:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run: system_profiler SPHardwareDataType
-
   chatgpt_api_integration_test_tinygrad:
     macos:
       xcode: "16.0.0"
@@ -247,13 +254,93 @@ jobs:
           prompt: "Keep responses concise. Who was the king of pop?"
           expected_output: "Michael Jackson"
 
+  measure_pip_sizes:
+    macos:
+      xcode: "16.0.0"
+    steps:
+      - checkout
+      - run:
+          name: Set up Python
+          command: |
+            brew install python@3.12
+            python3.12 -m venv env
+            source env/bin/activate
+      - run:
+          name: Install dependencies and measure sizes
+          command: |
+            source env/bin/activate
+            pip install --upgrade pip
+            pip install .
+            python ./extra/pipsize.py --json ./pipsize.json
+      - store_artifacts:
+          path: ./pipsize.json
+          destination: pip-sizes.json
+
+  check_line_count:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+
+      - run:
+          name: Setup git for PR comparison
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
+              BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
+                "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
+                | jq -r '.target_branch')
+
+              git clone -b $BASE_BRANCH --single-branch \
+                https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
+                base_branch
+            fi
+
+      - run:
+          name: Install dependencies
+          command: |
+            python -m pip install --upgrade pip
+            pip install tabulate
+
+      - run:
+          name: Run line count check
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              python extra/line_counter.py base_branch .
+            else
+              python extra/line_counter.py .
+            fi
+
+      - store_artifacts:
+          path: line-count-snapshot.json
+          destination: line-count-snapshot.json
+
+      - store_artifacts:
+          path: line-count-diff.json
+          destination: line-count-diff.json
+
+      - run:
+          name: Create test results directory
+          command: |
+            mkdir -p test-results/line-count
+            cp line-count-*.json test-results/line-count/
+
+      - store_test_results:
+          path: test-results
+
 workflows:
   version: 2
   build_and_test:
     jobs:
+      - check_line_count:
+          filters:
+            branches:
+              only: /.*/
+            tags:
+              only: /.*/
       - unit_test
       - discovery_integration_test
       - chatgpt_api_integration_test_mlx
       - chatgpt_api_integration_test_tinygrad
       - chatgpt_api_integration_test_dummy
-      - test_macos_m1
+      - measure_pip_sizes

+ 4 - 4
README.md

@@ -67,10 +67,10 @@ The current recommended way to install exo is from source.
 ### Prerequisites
 
 - Python>=3.12.0 is required because of [issues with asyncio](https://github.com/exo-explore/exo/issues/5) in previous versions.
-- Linux (with NVIDIA card):
-  - NVIDIA driver (test with `nvidia-smi`)
-  - CUDA (https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#cuda-cross-platform-installation) (test with `nvcc --version`)
-  - cuDNN (https://developer.nvidia.com/cudnn-downloads) (test with [link](https://docs.nvidia.com/deeplearning/cudnn/latest/installation/linux.html#verifying-the-install-on-linux:~:text=at%20a%20time.-,Verifying%20the%20Install%20on%20Linux,Test%20passed!,-Upgrading%20From%20Older))
+- For Linux with NVIDIA GPU support (Linux-only, skip if not using Linux or NVIDIA):
+  - NVIDIA driver - verify with `nvidia-smi`
+  - CUDA toolkit - install from [NVIDIA CUDA guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#cuda-cross-platform-installation), verify with `nvcc --version`
+  - cuDNN library - download from [NVIDIA cuDNN page](https://developer.nvidia.com/cudnn-downloads), verify installation by following [these steps](https://docs.nvidia.com/deeplearning/cudnn/latest/installation/linux.html#verifying-the-install-on-linux:~:text=at%20a%20time.-,Verifying%20the%20Install%20on%20Linux,Test%20passed!,-Upgrading%20From%20Older)
 
 ### Hardware Requirements
 

+ 2 - 1
exo/api/chatgpt_api.py

@@ -9,6 +9,7 @@ from aiohttp import web
 import aiohttp_cors
 import traceback
 import os
+import signal
 import sys
 from exo import DEBUG, VERSION
 from exo.download.download_progress import RepoProgressEvent
@@ -193,7 +194,7 @@ class ChatGPTAPI:
     response = web.json_response({"detail": "Quit signal received"}, status=200)
     await response.prepare(request)
     await response.write_eof()
-    await shutdown(signal.SIGINT, asyncio.get_event_loop())
+    await shutdown(signal.SIGINT, asyncio.get_event_loop(), self.node.server)
 
   async def timeout_middleware(self, app, handler):
     async def middleware(request):

+ 12 - 9
exo/download/hf/hf_helpers.py

@@ -17,7 +17,6 @@ from exo.helpers import DEBUG, is_frozen
 from exo.download.download_progress import RepoProgressEvent, RepoFileProgressEvent, RepoProgressCallback, RepoFileProgressCallback
 from exo.inference.shard import Shard
 import aiofiles
-from aiofiles import os as aios
 
 T = TypeVar("T")
 
@@ -109,16 +108,20 @@ async def move_models_to_hf(seed_dir: Union[str, Path]):
   """Move model in resources folder of app to .cache/huggingface/hub"""
   source_dir = Path(seed_dir)
   dest_dir = get_hf_home()/"hub"
-  await aios.makedirs(dest_dir, exist_ok=True)
-  async for path in source_dir.iterdir():
-    if path.is_dir() and path.startswith("models--"):
+  await aios.makedirs(dest_dir, exist_ok=True)  
+  for path in source_dir.iterdir():
+    if path.is_dir() and path.name.startswith("models--"):
       dest_path = dest_dir / path.name
-      if dest_path.exists():
-        if DEBUG>=1: print(f"skipping moving {dest_path}. File already exists")
+      if await aios.path.exists(dest_path):
+        print('Skipping moving model to .cache directory')
       else:
-        await aios.rename(str(path), str(dest_path))
-        
-
+        try:
+          await aios.rename(str(path), str(dest_path))
+        except Exception as e:
+          print(f'Error moving model to .cache: {e}')
+    
+    
+    
 async def fetch_file_list(session, repo_id, revision, path=""):
   api_url = f"{get_hf_endpoint()}/api/models/{repo_id}/tree/{revision}"
   url = f"{api_url}/{path}" if path else api_url

+ 1 - 2
exo/helpers.py

@@ -237,7 +237,7 @@ def get_all_ip_addresses():
     return ["localhost"]
 
 
-async def shutdown(signal, loop):
+async def shutdown(signal, loop, server):
   """Gracefully shutdown the server and close the asyncio loop."""
   print(f"Received exit signal {signal.name}...")
   print("Thank you for using exo.")
@@ -247,7 +247,6 @@ async def shutdown(signal, loop):
   print(f"Cancelling {len(server_tasks)} outstanding tasks")
   await asyncio.gather(*server_tasks, return_exceptions=True)
   await server.stop()
-  loop.stop()
 
 
 def is_frozen():

+ 9 - 18
exo/inference/dummy_inference_engine.py

@@ -1,14 +1,8 @@
 from typing import Optional, Tuple, TYPE_CHECKING
 import numpy as np
-import random
-import string
-import asyncio
-import json
 from exo.inference.inference_engine import InferenceEngine
 from exo.inference.shard import Shard
-def random_string(length: int):
-  return ''.join([random.choice(string.ascii_lowercase) for i in range(length)])
-  
+from exo.inference.tokenizers import DummyTokenizer
 
 class DummyInferenceEngine(InferenceEngine):
   def __init__(self):
@@ -18,26 +12,23 @@ class DummyInferenceEngine(InferenceEngine):
     self.eos_token_id = 0
     self.latency_mean = 0.1
     self.latency_stddev = 0.02
+    self.num_generate_dummy_tokens = 10
+    self.tokenizer = DummyTokenizer()
 
   async def encode(self, shard: Shard, prompt: str) -> np.ndarray:
-    return np.random.randint(1, self.vocab_size, size=(1, len(prompt.split())))
+    return np.array(self.tokenizer.encode(prompt))
   
   async def sample(self, x: np.ndarray) -> np.ndarray:
-    return np.random.randint(1, self.vocab_size)
+    if x[0] > self.num_generate_dummy_tokens: return np.array([self.tokenizer.eos_token_id])
+    return x
 
   async def decode(self, shard: Shard, tokens: np.ndarray) -> str:
-    return ' '.join([random_string(np.random.randint(1, 34)) for token in tokens])
+    return self.tokenizer.decode(tokens)
 
   async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray) -> np.ndarray:
     await self.ensure_shard(shard)
-    sequence_length = input_data.shape[0 if self.shard.is_first_layer() else 1]
-    output = np.random.random(size=(1, sequence_length, self.vocab_size if self.shard.is_last_layer() else self.hidden_size))
-    return output
+    return input_data + 1 if self.shard.is_last_layer() else input_data
 
   async def ensure_shard(self, shard: Shard):
-    if self.shard == shard:
-      return
-    # Simulate shard loading without making any API calls
-    await asyncio.sleep(0.1)  # Simulate a short delay
+    if self.shard == shard: return
     self.shard = shard
-    print(f"DummyInferenceEngine: Simulated loading of shard {shard.model_id}")

+ 5 - 1
exo/inference/inference_engine.py

@@ -26,7 +26,11 @@ class InferenceEngine(ABC):
   
   async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, inference_state: Optional[dict] = None) -> np.ndarray:
     tokens = await self.encode(shard, prompt)
-    output_data, inference_state = await self.infer_tensor(request_id, shard, tokens, inference_state)
+    if shard.model_id != 'stable-diffusion-2-1-base':
+      x = tokens.reshape(1, -1)
+    else:
+      x = tokens
+    output_data, inference_state = await self.infer_tensor(request_id, shard, x, inference_state)
     return output_data, inference_state
 
 inference_engine_classes = {

+ 0 - 2
exo/inference/mlx/models/StableDiffusionPipeline.py

@@ -178,8 +178,6 @@ class Model(nn.Module):
             self.first_stage_model = nn.Identity()            
 
     def __call__(self,x, step= 0, cfg_weight: float = 7.5,total_steps=50,conditioning=None,mask=None,residual=None,x_t_prev=None,is_finished=False,is_step_finished=False):
-        if self.shard.is_first_layer():
-            x = x.squeeze(0)
         t, t_prev = self.sampler.current_timestep(step=step, total_steps=total_steps)
         is_finished = False
         is_step_finished = False

+ 1 - 1
exo/inference/mlx/sharded_utils.py

@@ -126,7 +126,7 @@ def load_model_shard(
       self.shard = Shard(args.shard.model_id, args.shard.start_layer, args.shard.end_layer, args.shard.n_layers)
 
     def __call__(self, x, *args, **kwargs):
-      y = super().__call__(x[None] if self.shard.is_first_layer() else x, *args, **kwargs)
+      y = super().__call__(x, *args, **kwargs)
       return y
 
   model_args = model_args_class.from_dict(config)

+ 2 - 0
exo/inference/test_inference_engine.py

@@ -13,6 +13,7 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e
   prompt = "In a single word only, what is the last name of the current president of the USA?"
   resp_full = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=n_layers - 1, n_layers=n_layers), prompt=prompt)
   token_full = await inference_engine_1.sample(resp_full)
+  token_full = token_full.reshape(1, -1)
   next_resp_full = await inference_engine_1.infer_tensor(
     "A",
     shard=Shard(model_id=model_id, start_layer=0, end_layer=n_layers - 1, n_layers=n_layers),
@@ -27,6 +28,7 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e
     input_data=resp1,
   )
   tokens2 = await inference_engine_1.sample(resp2)
+  tokens2 = tokens2.reshape(1, -1)
   resp3 = await inference_engine_1.infer_tensor(
     "B",
     shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=n_layers),

+ 1 - 2
exo/inference/tinygrad/inference.py

@@ -40,8 +40,7 @@ MODEL_PARAMS = {
 def build_transformer(model_path: Path, shard: Shard, model_size="8B", device=None):
   # build model
   linear = nn.Linear
-  with Context(THREEFRY=0):
-    model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, max_context=8192, jit=True, shard=shard)
+  model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, max_context=8192, jit=True, shard=shard)
 
   # load weights
   if model_path.is_dir():

+ 3 - 3
exo/inference/tinygrad/models/llama.py

@@ -225,9 +225,9 @@ class Transformer:
       h = inputs
     return h
 
-  def forward(self, x: Tensor, start_pos: Variable, cache: Optional[List[Tensor]] = None):
-    if x.shape[0:2] == (1, 1) and self.forward_jit is not None:
-      return self.forward_jit(x, Variable("start_pos", 0, self.max_context).bind(start_pos), cache=cache)
+  def forward(self, x: Tensor, start_pos: int, cache: Optional[List[Tensor]] = None):
+    if x.shape[0:2] == (1, 1) and self.forward_jit is not None and start_pos != 0:
+      return self.forward_jit(x, Variable("start_pos", 1, self.max_context).bind(start_pos), cache=cache)
     return self.forward_base(x, start_pos, cache=cache)
 
   def __call__(self, tokens: Tensor, start_pos: Variable, cache: Optional[List[Tensor]] = None):

+ 8 - 3
exo/inference/tokenizers.py

@@ -4,19 +4,24 @@ from os import PathLike
 from pathlib import Path
 from typing import Union
 from transformers import AutoTokenizer, AutoProcessor
+import numpy as np
 from exo.download.hf.hf_helpers import get_local_snapshot_dir
 from exo.helpers import DEBUG
 
 
 class DummyTokenizer:
   def __init__(self):
-    self.eos_token_id = 0
+    self.eos_token_id = 69
+    self.vocab_size = 1000
 
   def apply_chat_template(self, messages, tokenize=True, add_generation_prompt=True):
-    return [1, 2, 3]
+    return "dummy_tokenized_prompt"
+
+  def encode(self, text):
+    return np.array([1])
 
   def decode(self, tokens):
-    return "dummy"
+    return "dummy" * len(tokens)
 
 
 async def resolve_tokenizer(model_id: str):

+ 17 - 3
exo/main.py

@@ -1,5 +1,6 @@
 import argparse
 import asyncio
+import atexit
 import signal
 import json
 import logging
@@ -193,6 +194,11 @@ async def run_model_cli(node: Node, inference_engine: InferenceEngine, model_nam
   finally:
     node.on_token.deregister(callback_id)
 
+def clean_path(path):
+    """Clean and resolve path"""
+    if path.startswith("Optional("):
+        path = path.strip('Optional("').rstrip('")')
+    return os.path.expanduser(path)
 
 async def main():
   loop = asyncio.get_running_loop()
@@ -211,13 +217,21 @@ async def main():
     
   if not args.models_seed_dir is None:
     try:
-      await move_models_to_hf(args.models_seed_dir)
+      models_seed_dir = clean_path(args.models_seed_dir)
+      await move_models_to_hf(models_seed_dir)
     except Exception as e:
       print(f"Error moving models to .cache/huggingface: {e}")
 
+  def restore_cursor():
+    if platform.system() != "Windows":
+        os.system("tput cnorm")  # Show cursor
+
+  # Restore the cursor when the program exits
+  atexit.register(restore_cursor)
+
   # Use a more direct approach to handle signals
   def handle_exit():
-    asyncio.ensure_future(shutdown(signal.SIGTERM, loop))
+    asyncio.ensure_future(shutdown(signal.SIGTERM, loop, node.server))
 
   if platform.system() != "Windows":
     for s in [signal.SIGINT, signal.SIGTERM]:
@@ -244,7 +258,7 @@ def run():
   except KeyboardInterrupt:
     print("Received keyboard interrupt. Shutting down...")
   finally:
-    loop.run_until_complete(shutdown(signal.SIGTERM, loop))
+    loop.run_until_complete(shutdown(signal.SIGTERM, loop, node.server))
     loop.close()
 
 

+ 63 - 61
exo/orchestration/standard_node.py

@@ -102,11 +102,7 @@ class StandardNode(Node):
   def get_topology_inference_engines(self) -> List[List[str]]:
     return self.topology_inference_engines_pool
   
-  async def encode_prompt(self, shard: Shard, prompt):
-    toks = await self.inference_engine.encode(shard, prompt)
-    return toks
-  
-  async def process_result(
+  async def process_inference_result(
     self,
     shard,
     result: np.ndarray,
@@ -116,39 +112,33 @@ class StandardNode(Node):
     if shard.model_id != 'stable-diffusion-2-1-base':
       if request_id not in self.buffered_token_output:
         self.buffered_token_output[request_id] = ([], False)
-      
-      if request_id not in self.buffered_logits:
-        self.buffered_logits[request_id] = []
-
-      self.buffered_logits[request_id] += [i for i in np.reshape(result, (-1, 1, result.shape[-1]))]
-      intermediate_result = self.buffered_token_output[request_id][0]
-
-      if shard.is_last_layer():
-        result = await self.inference_engine.sample(result)
+      is_finished = len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens
+      if shard.is_last_layer() and not is_finished:
+        token = await self.inference_engine.sample(result)
+        self.buffered_token_output[request_id][0].append(token.item())
+        intermediate_result = self.buffered_token_output[request_id][0]
+        if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}")
+        is_finished = token.item() == self.inference_engine.tokenizer.eos_token_id
+        forward = token.reshape(1, -1)
+      else:
+        forward = result
     else:
+      await self.inference_engine.ensure_shard(shard)
+      is_finished = inference_state.get('is_finished', False)
       intermediate_result, inference_state = self.handle_stable_diffusion(inference_state, result)
-    
-    await self.inference_engine.ensure_shard(shard)
-    is_finished = inference_state.get('is_finished', False) if inference_state else (result.size == 1 and result.item() == self.inference_engine.tokenizer.eos_token_id) or len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens
-    
-
-    asyncio.create_task(self.broadcast_result(request_id, intermediate_result, is_finished))  # TODO: this is n^2 communication complexity
-    if shard.model_id != 'stable-diffusion-2-1-base':
-      if result.size == 1:  # we got a new token out
-        self.buffered_token_output[request_id][0].append(result.item())
-        self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished)
-    else:
+      forward = result
+    if shard.is_last_layer():
+      asyncio.create_task(self.broadcast_result(request_id, intermediate_result, is_finished))
       self.trigger_on_token_callbacks(request_id, intermediate_result, is_finished)
-
-    if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(intermediate_result)}")
     if is_finished:
       if shard.model_id != 'stable-diffusion-2-1-base':
           self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], True)
+          intermediate_result = self.buffered_token_output[request_id][0]
     else:
-      asyncio.create_task(self.forward_to_next_shard(shard, result, request_id, inference_state))
-
-    return np.array(intermediate_result) if len(intermediate_result) > 0 else None
+      asyncio.create_task(self.forward_tensor(shard, forward, request_id, self.get_partition_index(offset = 1), inference_state))
 
+    return np.array(intermediate_result)
+  
   async def process_prompt(
     self,
     base_shard: Shard,
@@ -198,13 +188,13 @@ class StandardNode(Node):
       request_id = str(uuid.uuid4())
     shard = self.get_current_shard(base_shard)
     if DEBUG >= 2: print(f"[{request_id}] process prompt: {base_shard=} {shard=} {prompt=}")
-    if shard.start_layer != 0:
+    if not shard.is_first_layer():
       if DEBUG >= 2: print(f"[{request_id}] forwarding to next shard: {base_shard=} {shard=} {prompt=}")
-      await self.forward_to_next_shard(shard, prompt, request_id)
+      resp = await self.forward_prompt(shard, prompt, request_id, 0)
       return None
     else:
       result,inference_state = await self.inference_engine.infer_prompt(request_id, shard, prompt, inference_state)
-      ret = await self.process_result(shard, result, request_id, inference_state) 
+      ret = await self.process_inference_result(shard, result, request_id, inference_state) 
       return result
 
   async def process_tensor(
@@ -265,47 +255,58 @@ class StandardNode(Node):
     if DEBUG >= 1: print(f"[{request_id}] process_tensor: {tensor.size=} {tensor.shape=}")
     try:
       result, inference_state = await self.inference_engine.infer_tensor(request_id, shard, tensor, inference_state)
-      ret = await self.process_result(shard, result, request_id, inference_state) 
+      ret = await self.process_inference_result(shard, result, request_id, inference_state) 
       return ret
     except Exception as e:
       print(f"Error processing tensor for shard {shard}: {e}")
       traceback.print_exc()
       return None
 
-  async def forward_to_next_shard(
+  async def forward_prompt(
     self,
     base_shard: Shard,
-    tensor_or_prompt: Union[np.ndarray, str],
+    prompt: str,
     request_id: str,
+    target_index: int,
+  ) -> None:
+    if DEBUG >= 1: print(f"target partition index: {target_index}")
+    target_id = self.partitioning_strategy.partition(self.topology)[target_index].node_id
+    next_shard = self.get_current_shard(base_shard, target_index)
+    if DEBUG >= 2: print(f"Computed target from: {base_shard} {target_index}, {self.topology}. next shard: {next_shard}")
+    if target_id == self.id:
+      await self.process_prompt(next_shard, prompt, request_id)
+    else:
+      target_peer = next((p for p in self.peers if p.id() == target_id), None)
+      if not target_peer:
+        raise ValueError(f"Peer for {target_index} not found")
+      if DEBUG >= 1: print(f"Sending prompt to {target_peer.id()}: {prompt}")
+      await target_peer.send_prompt(next_shard, prompt, request_id=request_id)
+  
+  async def forward_tensor(
+    self,
+    base_shard: Shard,
+    tensor: np.ndarray,
+    request_id: str,
+    target_index: int,
     inference_state: Optional[dict] = None,
   ) -> None:
-    if not self.partitioning_strategy:
-      if DEBUG >= 1: print("No partitioning strategy found. Skipping forward.")
-      return
-
-    next_partition_index = self.get_partition_index(offset = 1)
-    if DEBUG >= 1: print(f"Next partition index: {next_partition_index}")
-    if next_partition_index is not None:
-      target_id = self.partitioning_strategy.partition(self.topology)[next_partition_index].node_id
-      next_shard = self.get_current_shard(base_shard, next_partition_index)
-      if DEBUG >= 2: print(f"Computed next from: {base_shard} {next_partition_index}, {self.topology}. Next shard: {next_shard}")
-      is_tensor = isinstance(tensor_or_prompt, np.ndarray)
-      if target_id == self.id:
-        if is_tensor:
-          await self.process_tensor(next_shard, tensor_or_prompt, request_id, inference_state)
-        else:
-          await self.process_prompt(next_shard, tensor_or_prompt, request_id, inference_state)
-      else:
-        target_peer = next((p for p in self.peers if p.id() == target_id), None)
-        if not target_peer:
-          raise ValueError(f"Peer for {next_partition_index} not found")
-        if is_tensor:
-          if DEBUG >= 1: print(f"Sending tensor to {target_peer.id()}: {tensor_or_prompt}")
-          await target_peer.send_tensor(next_shard, tensor_or_prompt, inference_state, request_id=request_id)
-        else:
-          await target_peer.send_prompt(next_shard, tensor_or_prompt, request_id=request_id)
+    if DEBUG >= 1: print(f"target partition index: {target_index}")
+    target_id = self.partitioning_strategy.partition(self.topology)[target_index].node_id
+    next_shard = self.get_current_shard(base_shard, target_index)
+    if DEBUG >= 2: print(f"Computed target from: {base_shard} {target_index}, {self.topology}. target shard: {next_shard}")
+    if target_id == self.id:
+      await self.process_tensor(next_shard, tensor, request_id, inference_state)
+    else:
+      target_peer = next((p for p in self.peers if p.id() == target_id), None)
+      if not target_peer:
+        raise ValueError(f"Peer for {target_index} not found")
+      if DEBUG >= 1: print(f"Sending tensor to {target_peer.id()}: {tensor}")
+      await target_peer.send_tensor(next_shard, tensor, request_id=request_id, inference_state=inference_state)
 
   def get_partition_index(self, offset: int = 0):
+    if not self.partitioning_strategy:
+      if DEBUG >= 1: print("No partitioning strategy found. Skipping forward.")
+      return None
     partitions = self.partitioning_strategy.partition(self.topology)
     current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None)
     if current_partition_index is None:
@@ -371,6 +372,7 @@ class StandardNode(Node):
     return len(peers_added) > 0 or len(peers_removed) > 0 or len(peers_updated) > 0
 
   async def select_best_inference_engine(self):
+    if self.inference_engine.__class__.__name__ == 'DummyInferenceEngine': return
     supported_engines = self.get_supported_inference_engines()
     await self.broadcast_supported_engines(supported_engines)
     if len(self.get_topology_inference_engines()):

+ 40 - 51
exo/tinychat/index.css

@@ -1,31 +1,11 @@
 /* define colors */
 :root {
-  --primary-color: #a52e4d;
-  --primary-color-transparent: #a52e4d66;
-  --secondary-color: #228039;
-  --secondary-color-transparent: #22803966;
-
+  --primary-color: #fff;
+  --secondary-color: #2a2a2a;
+  --secondary-color-transparent: #ffffff66;
+  --primary-bg-color: #1a1a1a;
+  --foreground-color: #f0f0f0;
   --red-color: #a52e4d;
-  --green-color: #228039;
-  --silver-color: #88808e;
-}
-@media(prefers-color-scheme: light) {
-  :root {
-    --primary-bg-color: #f0f0f0;
-    --secondary-bg-color: #eeeeee;
-    --tertiary-bg-color: #dddddd;
-    --foreground-color: #111111;
-    --accent-color: #000000;
-  }
-}
-@media(prefers-color-scheme: dark) {
-  :root {
-    --primary-bg-color: #111111;
-    --secondary-bg-color: #131313;
-    --tertiary-bg-color: #232323;
-    --foreground-color: #f0f0f0;
-    --accent-color: #aaaaaa;
-  }
 }
 
 main {
@@ -81,7 +61,11 @@ main {
   top: 0;
   position: absolute;
 
-  background: linear-gradient(180deg, var(--primary-bg-color) 0%, transparent 100%);
+  background: linear-gradient(
+    180deg,
+    var(--primary-bg-color) 0%,
+    transparent 100%
+  );
 }
 .histories-end {
   height: 3rem;
@@ -91,7 +75,11 @@ main {
   bottom: 0;
   position: absolute;
 
-  background: linear-gradient(0deg, var(--primary-bg-color) 0%, transparent 100%);
+  background: linear-gradient(
+    0deg,
+    var(--primary-bg-color) 0%,
+    transparent 100%
+  );
 }
 
 .history {
@@ -99,7 +87,7 @@ main {
   width: 100%;
   max-width: 40rem;
 
-  background-color: var(--tertiary-bg-color);
+  background-color: var(--secondary-color);
   border-radius: 10px;
   border-left: 2px solid var(--primary-color);
 
@@ -109,7 +97,7 @@ main {
   opacity: var(--opacity, 1);
 }
 .history:hover {
-  background-color: var(--secondary-bg-color);
+  background-color: var(--secondary-color);
 }
 
 .history-delete-button {
@@ -120,14 +108,14 @@ main {
   margin: 0;
   outline: none;
   border: none;
-  background-color: var(--secondary-bg-color);
+  background-color: var(--secondary-color);
   color: var(--foreground-color);
   border-radius: 0 0 0 10px;
   cursor: pointer;
   transition: 0.2s;
 }
 .history-delete-button:hover {
-  background-color: var(--tertiary-bg-color);
+  background-color: var(--secondary-color);
   padding: 0.75rem;
 }
 
@@ -135,6 +123,7 @@ main {
   overflow-y: auto;
   height: 100%;
   width: 100%;
+  max-width: 1200px;
 
   display: flex;
   flex-direction: column;
@@ -145,24 +134,19 @@ main {
 }
 
 .message {
-  width: 96%;
-  max-width: 80rem;
-
-  display: grid;
-
-  background-color: var(--secondary-bg-color);
+  max-width: 75%;
   padding: 0.5rem 1rem;
-  border-radius: 10px;
+  border-radius: 20px;
 }
 .message-role-assistant {
-  border-bottom: 2px solid var(--primary-color);
-  border-left: 2px solid var(--primary-color);
-  box-shadow: -10px 10px 20px 2px var(--primary-color-transparent);
+  background-color: var(--secondary-color);
+  margin-right: auto;
+  color: #fff;
 }
 .message-role-user {
-  border-bottom: 2px solid var(--secondary-color);
-  border-right: 2px solid var(--secondary-color);
-  box-shadow: 10px 10px 20px 2px var(--secondary-color-transparent);
+  margin-left: auto;
+  background-color: var(--primary-color);
+  color: #000;
 }
 .download-progress {
   margin-bottom: 12em;
@@ -275,14 +259,14 @@ main {
   margin: 0;
   outline: none;
   border: none;
-  background-color: var(--secondary-bg-color);
+  background-color: var(--secondary-color);
   color: var(--foreground-color);
   border-radius: 0 0 0 10px;
   cursor: pointer;
   transition: 0.2s;
 }
 .clipboard-button:hover {
-  background-color: var(--tertiary-bg-color);
+  background-color: var(--secondary-color);
   padding: 0.75rem;
 }
 
@@ -291,9 +275,14 @@ main {
   bottom: 0;
 
   /* linear gradient from background-color to transparent on the top */
-  background: linear-gradient(0deg, var(--primary-bg-color) 55%, transparent 100%);
+  background: linear-gradient(
+    0deg,
+    var(--primary-bg-color) 55%,
+    transparent 100%
+  );
 
   width: 100%;
+  max-width: 1200px;
   display: flex;
   flex-direction: column;
   justify-content: center;
@@ -340,7 +329,7 @@ main {
   min-height: 3rem;
   max-height: 8rem;
 
-  background-color: var(--tertiary-bg-color);
+  background-color: var(--secondary-color);
   color: var(--foreground-color);
   border-radius: 10px;
   border: none;
@@ -352,8 +341,8 @@ main {
   height: 3rem;
   width: 4rem;
 
-  background-color: var(--secondary-color);
-  color: var(--foreground-color);
+  background-color: var(--primary-color);
+  color: var(--secondary-color);
   border-radius: 10px;
   padding: 0.5rem;
   cursor: pointer;
@@ -362,7 +351,7 @@ main {
   background-color: var(--secondary-color-transparent);
 }
 .input-button:disabled {
-  background-color: var(--secondary-bg-color);
+  background-color: var(--secondary-color);
   cursor: not-allowed;
 }
 

+ 3 - 0
exo/topology/device_capabilities.py

@@ -96,6 +96,9 @@ CHIP_FLOPS = {
   "NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS),
   # GTX 10 series
   "NVIDIA GEFORCE GTX 1050 TI": DeviceFlops(fp32=2.0*TFLOPS, fp16=4.0*TFLOPS, int8=8.0*TFLOPS),
+  "NVIDIA GEFORCE GTX 1070": DeviceFlops(fp32=6.463*TFLOPS, fp16=0.101*TFLOPS, int8=25.852*TFLOPS),
+  "NVIDIA GEFORCE GTX 1080": DeviceFlops(fp32=8.873*TFLOPS, fp16=0.138*TFLOPS, int8=35.492*TFLOPS),
+  "NVIDIA GEFORCE GTX 1080 TI": DeviceFlops(fp32=11.34*TFLOPS, fp16=0.177*TFLOPS, int8=45.36*TFLOPS),
   # GTX 16 series
   "NVIDIA GeForce GTX 1660 TI": DeviceFlops(fp32=4.8*TFLOPS, fp16=9.6*TFLOPS, int8=19.2*TFLOPS),
   # QUADRO RTX Ampere series

+ 1065 - 0
extra/dashboard/dashboard.py

@@ -0,0 +1,1065 @@
+import os
+import json
+import logging
+import asyncio
+import aiohttp
+import pandas as pd
+import plotly.express as px
+from typing import List, Dict, Optional
+from pathlib import Path
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+import time
+import simpleaudio as sa
+from datetime import datetime
+
+class AsyncCircleCIClient:
+    def __init__(self, token: str, project_slug: str):
+        self.token = token
+        self.project_slug = project_slug
+        self.base_url = "https://circleci.com/api/v2"
+        self.headers = {
+            "Circle-Token": token,
+            "Accept": "application/json"
+        }
+        self.logger = logging.getLogger("CircleCI")
+
+    async def get_json(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict:
+        async with session.get(url, params=params) as response:
+            response.raise_for_status()
+            return await response.json()
+
+    async def get_recent_pipelines(
+        self,
+        session: aiohttp.ClientSession,
+        org_slug: str = None,
+        page_token: str = None,
+        limit: int = None,
+        branch: str = None
+    ):
+        """
+        Get recent pipelines for a project with pagination support
+
+        Args:
+            session: aiohttp client session
+            org_slug: Organization slug
+            page_token: Token for pagination
+            limit: Maximum number of pipelines to return
+            branch: Specific branch to fetch pipelines from
+        """
+
+        params = {
+            "branch": branch,
+            "page-token": page_token
+        }
+
+        # Remove None values
+        params = {k: v for k, v in params.items() if v is not None}
+
+        url = f"{self.base_url}/project/{self.project_slug}/pipeline"
+        data = await self.get_json(session, url, params)
+        pipelines = data["items"]
+
+        next_page_token = data.get("next_page_token")
+
+        # If there are more pages and we haven't hit the limit, recursively get them
+        if next_page_token and (limit is None or len(pipelines) < limit):
+            next_pipelines = await self.get_recent_pipelines(
+                session,
+                org_slug,
+                page_token=next_page_token,
+                limit=limit,
+                branch=branch
+            )
+            pipelines.extend(next_pipelines)
+
+        return pipelines
+
+    async def get_workflow_jobs(self, session: aiohttp.ClientSession, pipeline_id: str) -> List[Dict]:
+        self.logger.debug(f"Fetching workflows for pipeline {pipeline_id}")
+        url = f"{self.base_url}/pipeline/{pipeline_id}/workflow"
+        workflows_data = await self.get_json(session, url)
+        workflows = workflows_data["items"]
+
+        # Fetch all jobs for all workflows in parallel
+        jobs_tasks = []
+        for workflow in workflows:
+            url = f"{self.base_url}/workflow/{workflow['id']}/job"
+            jobs_tasks.append(self.get_json(session, url))
+
+        jobs_responses = await asyncio.gather(*jobs_tasks, return_exceptions=True)
+
+        all_jobs = []
+        for jobs_data in jobs_responses:
+            if isinstance(jobs_data, Exception):
+                continue
+            all_jobs.extend(jobs_data["items"])
+
+        return all_jobs
+
+    async def get_artifacts(self, session: aiohttp.ClientSession, job_number: str) -> List[Dict]:
+        url = f"{self.base_url}/project/{self.project_slug}/{job_number}/artifacts"
+        data = await self.get_json(session, url)
+        return data["items"]
+
+class PackageSizeTracker:
+    def __init__(self, token: str, project_slug: str, debug: bool = False):
+        self.setup_logging(debug)
+        self.client = AsyncCircleCIClient(token, project_slug)
+        self.logger = logging.getLogger("PackageSizeTracker")
+        self.last_data_hash = None
+        self.notification_sound_path = Path(__file__).parent / "notification.wav"
+        self.debug = debug
+
+        # Sound file paths - replace these with your actual sound files
+        sounds_dir = Path(__file__).parent / "sounds"
+        self.sounds = {
+            'lines_up': sounds_dir / "lines_increased.wav",
+            'lines_down': sounds_dir / "lines_decreased.wav",
+            'tokens_up': sounds_dir / "tokens_increased.wav",
+            'tokens_down': sounds_dir / "tokens_decreased.wav",
+            'size_up': sounds_dir / "size_increased.wav",
+            'size_down': sounds_dir / "size_decreased.wav"
+        }
+
+    def setup_logging(self, debug: bool):
+        level = logging.DEBUG if debug else logging.INFO
+        logging.basicConfig(
+            level=level,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            datefmt='%H:%M:%S'
+        )
+
+    def extract_commit_info(self, pipeline: Dict) -> Optional[Dict]:
+        try:
+            # Extract from github_app first (preferred)
+            if 'trigger_parameters' in pipeline and 'github_app' in pipeline['trigger_parameters']:
+                github_app = pipeline['trigger_parameters']['github_app']
+                return {
+                    'commit_hash': github_app.get('checkout_sha'),
+                    'web_url': f"{github_app.get('repo_url')}/commit/{github_app.get('checkout_sha')}",
+                    'branch': github_app.get('branch', 'unknown'),
+                    'author': {
+                        'name': github_app.get('commit_author_name'),
+                        'email': github_app.get('commit_author_email'),
+                        'username': github_app.get('user_username')
+                    },
+                    'message': github_app.get('commit_message')
+                }
+
+            # Fallback to git parameters
+            if 'trigger_parameters' in pipeline and 'git' in pipeline['trigger_parameters']:
+                git = pipeline['trigger_parameters']['git']
+                return {
+                    'commit_hash': git.get('checkout_sha'),
+                    'web_url': f"{git.get('repo_url')}/commit/{git.get('checkout_sha')}",
+                    'branch': git.get('branch', 'unknown'),
+                    'author': {
+                        'name': git.get('commit_author_name'),
+                        'email': git.get('commit_author_email'),
+                        'username': git.get('author_login')
+                    },
+                    'message': git.get('commit_message')
+                }
+
+            self.logger.warning(f"Could not find commit info in pipeline {pipeline['id']}")
+            return None
+
+        except Exception as e:
+            self.logger.error(f"Error extracting commit info: {str(e)}")
+            return None
+
+    async def process_pipeline(self, session: aiohttp.ClientSession, pipeline: Dict) -> Optional[Dict]:
+        try:
+            commit_info = self.extract_commit_info(pipeline)
+            if not commit_info:
+                return None
+
+            data_point = {
+                "commit_hash": commit_info['commit_hash'],
+                "commit_url": commit_info['web_url'],
+                "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
+                "pipeline_status": pipeline.get("state", "unknown"),
+                "branch": commit_info['branch'],
+                "author": commit_info['author'],
+                "commit_message": commit_info['message']
+            }
+
+            jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
+
+            # Get package size data
+            size_job = next(
+                (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
+                None
+            )
+
+            # Get line count data
+            linecount_job = next(
+                (j for j in jobs if j["name"] == "check_line_count" and j["status"] == "success"),
+                None
+            )
+
+            # Get benchmark data from runner job
+            benchmark_job = next(
+                (j for j in jobs if j["name"] == "runner" and j["status"] == "success"),
+                None
+            )
+
+            # Return None if no relevant jobs found
+            if not size_job and not linecount_job and not benchmark_job:
+                self.logger.debug(f"No relevant jobs found for pipeline {pipeline['id']}")
+                return None
+
+            # Process benchmark data if available
+            if benchmark_job:
+                benchmark_artifacts = await self.client.get_artifacts(session, benchmark_job["job_number"])
+                benchmark_report = next(
+                    (a for a in benchmark_artifacts if a["path"].endswith("benchmark.json")),
+                    None
+                )
+                if benchmark_report:
+                    benchmark_data = await self.client.get_json(session, benchmark_report["url"])
+                    data_point.update({
+                        "tokens_per_second": benchmark_data["tokens_per_second"],
+                        "time_to_first_token": benchmark_data.get("time_to_first_token", 0)
+                    })
+                    self.logger.info(
+                        f"Processed benchmark data for pipeline {pipeline['id']}: "
+                        f"commit {commit_info['commit_hash'][:7]}, "
+                        f"tokens/s {benchmark_data['tokens_per_second']:.2f}"
+                    )
+
+            # Process size data if available
+            if size_job:
+                size_artifacts = await self.client.get_artifacts(session, size_job["job_number"])
+                size_report = next(
+                    (a for a in size_artifacts if a["path"].endswith("pip-sizes.json")),
+                    None
+                )
+                if size_report:
+                    size_data = await self.client.get_json(session, size_report["url"])
+                    data_point.update({
+                        "total_size_mb": size_data["total_size_mb"],
+                        "packages": size_data["packages"]
+                    })
+                    self.logger.info(
+                        f"Processed size data for pipeline {pipeline['id']}: "
+                        f"commit {commit_info['commit_hash'][:7]}, "
+                        f"size {size_data['total_size_mb']:.2f}MB"
+                    )
+
+            # Process linecount data if available
+            if linecount_job:
+                linecount_artifacts = await self.client.get_artifacts(session, linecount_job["job_number"])
+                linecount_report = next(
+                    (a for a in linecount_artifacts if a["path"].endswith("line-count-snapshot.json")),
+                    None
+                )
+                if linecount_report:
+                    linecount_data = await self.client.get_json(session, linecount_report["url"])
+                    data_point.update({
+                        "total_lines": linecount_data["total_lines"],
+                        "total_files": linecount_data["total_files"],
+                        "files": linecount_data["files"]
+                    })
+                    self.logger.info(
+                        f"Processed line count data for pipeline {pipeline['id']}: "
+                        f"commit {commit_info['commit_hash'][:7]}, "
+                        f"lines {linecount_data['total_lines']:,}"
+                    )
+
+            return data_point
+
+        except Exception as e:
+            self.logger.error(f"Error processing pipeline {pipeline['id']}: {str(e)}")
+            return None
+
+    async def collect_data(self) -> List[Dict]:
+        self.logger.info("Starting data collection...")
+        async with aiohttp.ClientSession(headers=self.client.headers) as session:
+            # Get pipelines from both main and circleci branches
+            main_pipelines = await self.client.get_recent_pipelines(
+                session,
+                org_slug=self.client.project_slug,
+                limit=20,
+                branch="main"
+            )
+            circleci_pipelines = await self.client.get_recent_pipelines(
+                session,
+                org_slug=self.client.project_slug,
+                limit=20,
+                branch="circleci"
+            )
+
+            pipelines = main_pipelines + circleci_pipelines
+            # Sort pipelines by created_at date
+            pipelines.sort(key=lambda x: x.get("created_at", x.get("updated_at")), reverse=True)
+
+            self.logger.info(f"Found {len(pipelines)} recent pipelines")
+
+            # Process all pipelines in parallel
+            tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
+            results = await asyncio.gather(*tasks)
+
+            # Filter out None results
+            data_points = [r for r in results if r is not None]
+
+        return data_points
+
+    def generate_report(self, data: List[Dict], output_dir: str = "reports") -> Optional[str]:
+        self.logger.info("Generating report...")
+        if not data:
+            self.logger.error("No data to generate report from!")
+            return None
+
+        # Get latest pipeline status based on errors
+        latest_main_pipeline = next((d for d in data if d.get('branch') == 'main'), None)
+        latest_pipeline_status = 'success' if latest_main_pipeline and not latest_main_pipeline.get('errors') else 'failure'
+
+        # Log the pipeline status
+        if latest_main_pipeline:
+            self.logger.info(
+                f"Latest main branch pipeline status: {latest_pipeline_status} "
+                f"(commit: {latest_main_pipeline['commit_hash'][:7]})"
+            )
+        else:
+            self.logger.warning("No pipeline data found for main branch")
+
+        # Convert output_dir to Path object
+        output_dir = Path(output_dir)
+
+        # Create output directory if it doesn't exist
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Create separate dataframes for each metric
+        df_size = pd.DataFrame([d for d in data if 'total_size_mb' in d])
+        df_lines = pd.DataFrame([d for d in data if 'total_lines' in d])
+        df_benchmark = pd.DataFrame([d for d in data if 'tokens_per_second' in d])
+
+        # Create a single figure with subplots
+        fig = make_subplots(
+            rows=3, cols=2,
+            subplot_titles=('', 'Package Size', '', 'Line Count', '', 'Tokens per Second'),
+            vertical_spacing=0.2,
+            column_widths=[0.2, 0.8],
+            specs=[[{"type": "indicator"}, {"type": "scatter"}],
+                   [None, {"type": "scatter"}],
+                   [None, {"type": "scatter"}]]
+        )
+
+        # Add package size trace if we have data
+        if not df_size.empty:
+            df_size['timestamp'] = pd.to_datetime(df_size['timestamp'])
+            df_size = df_size.sort_values('timestamp')
+
+            fig.add_trace(
+                go.Scatter(
+                    x=df_size['timestamp'],
+                    y=df_size['total_size_mb'],
+                    mode='lines+markers',
+                    name='Package Size',
+                    customdata=df_size[['commit_hash', 'commit_url']].values,
+                    hovertemplate="<br>".join([
+                        "Size: %{y:.2f}MB",
+                        "Date: %{x}",
+                        "Commit: %{customdata[0]}",
+                        "<extra></extra>"
+                    ])
+                ),
+                row=1, col=2
+            )
+            fig.update_yaxes(title_text="Size (MB)", row=1, col=2)
+
+        # Add line count trace if we have data
+        if not df_lines.empty:
+            df_lines['timestamp'] = pd.to_datetime(df_lines['timestamp'])
+            df_lines = df_lines.sort_values('timestamp')
+
+            fig.add_trace(
+                go.Scatter(
+                    x=df_lines['timestamp'],
+                    y=df_lines['total_lines'],
+                    mode='lines+markers',
+                    name='Line Count',
+                    customdata=df_lines[['commit_hash', 'commit_url']].values,
+                    hovertemplate="<br>".join([
+                        "Lines: %{y:,.0f}",
+                        "Date: %{x}",
+                        "Commit: %{customdata[0]}",
+                        "<extra></extra>"
+                    ])
+                ),
+                row=2, col=2
+            )
+            fig.update_yaxes(title_text="Total Lines", row=2, col=2)
+
+        # Add tokens per second trace if we have data
+        if not df_benchmark.empty:
+            df_benchmark['timestamp'] = pd.to_datetime(df_benchmark['timestamp'])
+            df_benchmark = df_benchmark.sort_values('timestamp')
+
+            fig.add_trace(
+                go.Scatter(
+                    x=df_benchmark['timestamp'],
+                    y=df_benchmark['tokens_per_second'],
+                    mode='lines+markers',
+                    name='Tokens/Second',
+                    customdata=df_benchmark[['commit_hash', 'commit_url']].values,
+                    hovertemplate="<br>".join([
+                        "Tokens/s: %{y:.2f}",
+                        "Date: %{x}",
+                        "Commit: %{customdata[0]}",
+                        "<extra></extra>"
+                    ])
+                ),
+                row=3, col=2
+            )
+            fig.update_yaxes(title_text="Tokens per Second", row=3, col=2)
+
+        # Update layout
+        fig.update_layout(
+            height=800,
+            showlegend=False,
+            title_text="Package Metrics Dashboard",
+            title_x=0.5,
+            plot_bgcolor='white',
+            paper_bgcolor='white',
+            font=dict(size=12),
+            hovermode='x unified'
+        )
+
+        # Update the dashboard HTML with date range picker
+        dashboard_html = f"""
+        <html>
+        <head>
+            <title>Package Metrics Dashboard</title>
+            <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/daterangepicker/daterangepicker.css" />
+            <style>
+                body {{
+                    background-color: #f5f6fa;
+                    margin: 0;
+                    padding: 20px;
+                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+                }}
+
+                .date-picker-container {{
+                    background: white;
+                    padding: 15px;
+                    border-radius: 12px;
+                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                    margin: 20px auto;
+                    width: fit-content;
+                }}
+
+                #daterange {{
+                    padding: 8px 12px;
+                    border: 1px solid #ddd;
+                    border-radius: 8px;
+                    font-size: 14px;
+                    width: 300px;
+                    cursor: pointer;
+                }}
+
+                .quick-ranges {{
+                    margin-top: 10px;
+                    display: flex;
+                    gap: 8px;
+                    justify-content: center;
+                }}
+
+                .quick-ranges button {{
+                    padding: 8px 16px;
+                    border: 1px solid #e1e4e8;
+                    border-radius: 8px;
+                    background: white;
+                    cursor: pointer;
+                    font-size: 13px;
+                    transition: all 0.2s ease;
+                }}
+
+                .quick-ranges button:hover {{
+                    background: #f0f0f0;
+                    transform: translateY(-1px);
+                }}
+
+                .dashboard-grid {{
+                    display: grid;
+                    grid-template-columns: 300px 1fr;
+                    gap: 20px;
+                    margin-top: 20px;
+                }}
+
+                .chart-container {{
+                    background: white;
+                    border-radius: 12px;
+                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                    padding: 20px;
+                    height: 350px;
+                }}
+
+                .chart-row {{
+                    display: grid;
+                    grid-template-columns: repeat(2, 1fr);
+                    gap: 20px;
+                }}
+
+                .chart-row-full {{
+                    grid-column: 2 / -1;
+                }}
+
+                .chart-box {{
+                    background: white;
+                    border-radius: 12px;
+                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                    padding: 20px;
+                    display: flex;
+                    flex-direction: column;
+                }}
+
+                .chart-title {{
+                    font-size: 16px;
+                    font-weight: 600;
+                    color: #2c3e50;
+                    margin-bottom: 15px;
+                    padding-bottom: 10px;
+                    border-bottom: 1px solid #eee;
+                }}
+
+                .status-container {{
+                    background: white;
+                    border-radius: 12px;
+                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                    padding: 20px;
+                    height: 350px;
+                    display: flex;
+                    flex-direction: column;
+                    align-items: center;
+                    justify-content: center;
+                }}
+
+                .traffic-light {{
+                    width: 150px;
+                    height: 150px;
+                    border-radius: 50%;
+                    margin: 20px;
+                    box-shadow: 0 0 20px rgba(0,0,0,0.2);
+                    position: relative;
+                }}
+
+                .traffic-light.success {{
+                    background: #2ecc71;  /* Bright green */
+                    border: 8px solid #27ae60;  /* Darker green border */
+                }}
+
+                .traffic-light.failure {{
+                    background: #e74c3c;  /* Bright red */
+                    border: 8px solid #c0392b;  /* Darker red border */
+                }}
+
+                .status-text {{
+                    font-size: 24px;
+                    font-weight: bold;
+                    margin-top: 20px;
+                    color: #2c3e50;
+                }}
+
+                /* Override Plotly's default margins */
+                .js-plotly-plot .plotly {{
+                    margin: 0 !important;
+                }}
+            </style>
+        </head>
+        <body>
+            <div class="date-picker-container">
+                <input type="text" id="daterange" />
+                <div class="quick-ranges">
+                    <button onclick="setQuickRange('1h')">Last Hour</button>
+                    <button onclick="setQuickRange('6h')">Last 6 Hours</button>
+                    <button onclick="setQuickRange('1d')">Last 24 Hours</button>
+                    <button onclick="setQuickRange('7d')">Last 7 Days</button>
+                    <button onclick="setQuickRange('30d')">Last 30 Days</button>
+                    <button onclick="setQuickRange('all')">All Time</button>
+                </div>
+            </div>
+
+            <div class="dashboard-grid">
+                <div class="status-container">
+                    <div class="chart-title">Pipeline Status</div>
+                    <div class="traffic-light {'success' if latest_pipeline_status == 'success' else 'failure'}"></div>
+                    <div class="status-text">
+                        {'✓ Pipeline Passing' if latest_pipeline_status == 'success' else '✗ Pipeline Failing'}
+                    </div>
+                </div>
+                <div class="chart-row">
+                    <div class="chart-box">
+                        <div class="chart-title">Package Size</div>
+                        <div id="size-chart"></div>
+                    </div>
+                    <div class="chart-box">
+                        <div class="chart-title">Line Count</div>
+                        <div id="lines-chart"></div>
+                    </div>
+                </div>
+                <div class="chart-row chart-row-full">
+                    <div class="chart-box">
+                        <div class="chart-title">Tokens per Second</div>
+                        <div id="tokens-chart"></div>
+                    </div>
+                </div>
+            </div>
+
+            <script type="text/javascript" src="https://cdn.jsdelivr.net/jquery/latest/jquery.min.js"></script>
+            <script type="text/javascript" src="https://cdn.jsdelivr.net/momentjs/latest/moment.min.js"></script>
+            <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/daterangepicker/daterangepicker.min.js"></script>
+            <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+            <script>
+                let globalMinDate = null;
+                let globalMaxDate = null;
+
+                // Split the original figure into separate charts
+                const originalData = {fig.to_json()};
+
+                function initializeCharts() {{
+                    // Create the size trend chart
+                    const sizeTrace = originalData.data.find(trace => trace.name === 'Package Size');
+                    if (sizeTrace) {{
+                        Plotly.newPlot('size-chart',
+                            [sizeTrace],
+                            {{
+                                showlegend: false,
+                                height: 280,
+                                margin: {{ t: 10, b: 40, l: 50, r: 20 }},
+                                yaxis: {{ title: 'Size (MB)' }},
+                                xaxis: {{
+                                    type: 'date',
+                                    title: null,
+                                    range: [sizeTrace.x[0], sizeTrace.x[sizeTrace.x.length - 1]]
+                                }}
+                            }}
+                        );
+                    }}
+
+                    // Create the line count chart
+                    const lineTrace = originalData.data.find(trace => trace.name === 'Line Count');
+                    if (lineTrace) {{
+                        Plotly.newPlot('lines-chart',
+                            [lineTrace],
+                            {{
+                                showlegend: false,
+                                height: 280,
+                                margin: {{ t: 10, b: 40, l: 50, r: 20 }},
+                                yaxis: {{ title: 'Total Lines' }},
+                                xaxis: {{
+                                    type: 'date',
+                                    title: null,
+                                    range: [lineTrace.x[0], lineTrace.x[lineTrace.x.length - 1]]
+                                }}
+                            }}
+                        );
+                    }}
+
+                    // Create the tokens per second chart
+                    const tokensTrace = originalData.data.find(trace => trace.name === 'Tokens/Second');
+                    if (tokensTrace) {{
+                        Plotly.newPlot('tokens-chart',
+                            [tokensTrace],
+                            {{
+                                showlegend: false,
+                                height: 280,
+                                margin: {{ t: 10, b: 40, l: 50, r: 20 }},
+                                yaxis: {{ title: 'Tokens/Second' }},
+                                xaxis: {{
+                                    type: 'date',
+                                    title: null,
+                                    range: [tokensTrace.x[0], tokensTrace.x[tokensTrace.x.length - 1]]
+                                }}
+                            }}
+                        );
+                    }}
+
+                    // Add debug logs to check axis names
+                    console.log('Size Chart Layout:', document.getElementById('size-chart').layout);
+                    console.log('Lines Chart Layout:', document.getElementById('lines-chart').layout);
+                    console.log('Tokens Chart Layout:', document.getElementById('tokens-chart').layout);
+                }}
+
+                function setQuickRange(range) {{
+                    let start, end = moment();
+
+                    switch(range) {{
+                        case '1h':
+                            start = moment().subtract(1, 'hours');
+                            break;
+                        case '6h':
+                            start = moment().subtract(6, 'hours');
+                            break;
+                        case '1d':
+                            start = moment().subtract(1, 'days');
+                            break;
+                        case '7d':
+                            start = moment().subtract(7, 'days');
+                            break;
+                        case '30d':
+                            start = moment().subtract(30, 'days');
+                            break;
+                        case 'all':
+                            start = moment(globalMinDate);
+                            end = moment(globalMaxDate);
+                            break;
+                    }}
+
+                    $('#daterange').data('daterangepicker').setStartDate(start);
+                    $('#daterange').data('daterangepicker').setEndDate(end);
+                    updatePlotRange(start.toISOString(), end.toISOString());
+                }}
+
+                function updatePlotRange(startDate, endDate) {{
+                    console.log('Updating range:', startDate, endDate);
+
+                    // Get the actual x-axis names from the chart layouts
+                    const sizeChartLayout = document.getElementById('size-chart').layout;
+                    const sizeXAxisName = Object.keys(sizeChartLayout).find(key => key.startsWith('xaxis'));
+
+                    const linesChartLayout = document.getElementById('lines-chart').layout;
+                    const linesXAxisName = Object.keys(linesChartLayout).find(key => key.startsWith('xaxis'));
+
+                    const tokensChartLayout = document.getElementById('tokens-chart').layout;
+                    const tokensXAxisName = Object.keys(tokensChartLayout).find(key => key.startsWith('xaxis'));
+
+                    // Update the ranges
+                    const sizeUpdateLayout = {{}};
+                    sizeUpdateLayout[`{{sizeXAxisName}}.range`] = [startDate, endDate];
+
+                    const linesUpdateLayout = {{}};
+                    linesUpdateLayout[`{{linesXAxisName}}.range`] = [startDate, endDate];
+
+                    const tokensUpdateLayout = {{}};
+                    tokensUpdateLayout[`{{tokensXAxisName}}.range`] = [startDate, endDate];
+
+                    // Update both charts
+                    Plotly.relayout('size-chart', sizeUpdateLayout)
+                        .catch(err => console.error('Error updating size chart:', err));
+
+                    Plotly.relayout('lines-chart', linesUpdateLayout)
+                        .catch(err => console.error('Error updating lines chart:', err));
+
+                    Plotly.relayout('tokens-chart', tokensUpdateLayout)
+                        .catch(err => console.error('Error updating tokens chart:', err));
+                }}
+
+                function findDateRange(data) {{
+                    let minDate = null;
+                    let maxDate = null;
+
+                    data.forEach(trace => {{
+                        if (trace.x && trace.x.length > 0) {{
+                            const dates = trace.x.map(d => new Date(d));
+                            const traceMin = new Date(Math.min(...dates));
+                            const traceMax = new Date(Math.max(...dates));
+
+                            if (!minDate || traceMin < minDate) minDate = traceMin;
+                            if (!maxDate || traceMax > maxDate) maxDate = traceMax;
+                        }}
+                    }});
+
+                    return {{ minDate, maxDate }};
+                }}
+
+                // Initialize everything when document is ready
+                $(document).ready(function() {{
+                    // Initialize charts
+                    initializeCharts();
+
+                    // Find date range from data
+                    const {{ minDate, maxDate }} = findDateRange(originalData.data);
+                    globalMinDate = minDate;
+                    globalMaxDate = maxDate;
+
+                    // Initialize daterangepicker
+                    $('#daterange').daterangepicker({{
+                        startDate: minDate,
+                        endDate: maxDate,
+                        minDate: minDate,
+                        maxDate: maxDate,
+                        timePicker: true,
+                        timePicker24Hour: true,
+                        timePickerIncrement: 1,
+                        opens: 'center',
+                        locale: {{
+                            format: 'YYYY-MM-DD HH:mm',
+                            applyLabel: "Apply",
+                            cancelLabel: "Cancel",
+                            customRangeLabel: "Custom Range"
+                        }},
+                        ranges: {{
+                            'Last Hour': [moment().subtract(1, 'hours'), moment()],
+                            'Last 6 Hours': [moment().subtract(6, 'hours'), moment()],
+                            'Last 24 Hours': [moment().subtract(1, 'days'), moment()],
+                            'Last 7 Days': [moment().subtract(7, 'days'), moment()],
+                            'Last 30 Days': [moment().subtract(30, 'days'), moment()],
+                            'All Time': [moment(minDate), moment(maxDate)]
+                        }}
+                    }});
+
+                    // Update plots when date range changes
+                    $('#daterange').on('apply.daterangepicker', function(ev, picker) {{
+                        console.log('Date range changed:', picker.startDate.toISOString(), picker.endDate.toISOString());
+                        updatePlotRange(picker.startDate.toISOString(), picker.endDate.toISOString());
+                    }});
+
+                    // Add click handlers for charts
+                    ['size-chart', 'lines-chart', 'tokens-chart'].forEach(chartId => {{
+                        const chart = document.getElementById(chartId);
+                        if (chart) {{
+                            chart.on('plotly_click', function(data) {{
+                                const point = data.points[0];
+                                if (point.customdata && point.customdata[1]) {{
+                                    window.open(point.customdata[1], '_blank');
+                                }}
+                            }});
+                        }}
+                    }});
+
+                    // Add debug logging for chart initialization
+                    console.log('Size Chart:', document.getElementById('size-chart'));
+                    console.log('Lines Chart:', document.getElementById('lines-chart'));
+                    console.log('Tokens Chart:', document.getElementById('tokens-chart'));
+                }});
+            </script>
+        </body>
+        </html>
+        """
+
+        # Write the dashboard
+        dashboard_path = output_dir / "dashboard.html"
+        with open(dashboard_path, "w") as f:
+            f.write(dashboard_html)
+
+        # Generate summary with available metrics
+        latest_data = {}
+
+        if not df_size.empty:
+            latest = df_size.iloc[-1]
+            previous = df_size.iloc[-2] if len(df_size) > 1 else latest
+            size_change = float(latest['total_size_mb'] - previous['total_size_mb'])
+            latest_data.update({
+                'timestamp': latest['timestamp'].isoformat(),
+                'commit_hash': latest['commit_hash'],
+                'commit_url': latest['commit_url'],
+                'total_size_mb': float(latest['total_size_mb']),
+                'size_change_mb': size_change,
+                'packages': latest.get('packages', [])
+            })
+
+        if not df_lines.empty:
+            latest = df_lines.iloc[-1]
+            previous = df_lines.iloc[-2] if len(df_lines) > 1 else latest
+            linecount_change = int(latest['total_lines'] - previous['total_lines'])
+            if not latest_data:  # Only add timestamp and commit info if not already added
+                latest_data.update({
+                    'timestamp': latest['timestamp'].isoformat(),
+                    'commit_hash': latest['commit_hash'],
+                    'commit_url': latest['commit_url'],
+                })
+            latest_data.update({
+                'total_lines': int(latest['total_lines']),
+                'linecount_change': linecount_change
+            })
+
+        if not df_benchmark.empty:
+            latest = df_benchmark.iloc[-1]
+            previous = df_benchmark.iloc[-2] if len(df_benchmark) > 1 else latest
+            tokens_change = float(latest['tokens_per_second'] - previous['tokens_per_second'])
+            if not latest_data:  # Only add timestamp and commit info if not already added
+                latest_data.update({
+                    'timestamp': latest['timestamp'].isoformat(),
+                    'commit_hash': latest['commit_hash'],
+                    'commit_url': latest['commit_url'],
+                })
+            latest_data.update({
+                'tokens_per_second': float(latest['tokens_per_second']),
+                'tokens_change': tokens_change
+            })
+
+        if latest_data:
+            with open(output_dir / 'latest_data.json', 'w') as f:
+                json.dump(latest_data, f, indent=2)
+
+            self._print_summary(latest_data)
+            self.logger.info(f"Report generated in {output_dir}")
+            return str(output_dir)
+
+        return None
+
+    def _print_summary(self, latest_data: Dict):
+        print("\n=== Package Size Summary ===")
+        print(f"Timestamp: {latest_data['timestamp']}")
+        print(f"Commit: {latest_data['commit_hash'][:7]}")
+
+        if 'total_size_mb' in latest_data:
+            print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
+            change = latest_data['size_change_mb']
+            change_symbol = "↓" if change <= 0 else "↑"
+            print(f"Change: {change_symbol} {abs(change):.2f}MB")
+
+            if latest_data.get('packages'):
+                print("\nTop 5 Largest Packages:")
+                sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
+                for pkg in sorted_packages[:5]:
+                    print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
+
+        if 'total_lines' in latest_data:
+            print("\nLine Count Stats:")
+            print(f"Total Lines: {latest_data['total_lines']:,}")
+            change = latest_data['linecount_change']
+            change_symbol = "↓" if change <= 0 else "↑"
+            print(f"Change: {change_symbol} {abs(change):,}")
+
+        if 'tokens_per_second' in latest_data:
+            print("\nBenchmark Stats:")
+            print(f"Tokens per Second: {latest_data['tokens_per_second']:.2f}")
+            if 'time_to_first_token' in latest_data:
+                print(f"Time to First Token: {latest_data['time_to_first_token']:.3f}s")
+
+        print("\n")
+
+    def _calculate_data_hash(self, data: List[Dict]) -> str:
+        """Calculate a hash of the data to detect changes"""
+        return hash(str(sorted([
+            (d.get('commit_hash'), d.get('timestamp'))
+            for d in data
+        ])))
+
+    def _play_sound(self, sound_key: str):
+        """Play a specific notification sound"""
+        try:
+            sound_path = self.sounds.get(sound_key)
+            if sound_path and sound_path.exists():
+                wave_obj = sa.WaveObject.from_wave_file(str(sound_path))
+                wave_obj.play()
+            else:
+                self.logger.warning(f"Sound file not found: {sound_key} at {sound_path}")
+        except Exception as e:
+            self.logger.error(f"Failed to play sound {sound_key}: {e}")
+
+    def _check_metrics_changes(self, current_data: List[Dict], previous_data: List[Dict]):
+        """Check for specific metric changes and play appropriate sounds"""
+        if not previous_data:
+            return
+
+        # Get latest data points
+        current = current_data[-1]
+        previous = previous_data[-1]
+
+        # Check line count changes
+        if 'total_lines' in current and 'total_lines' in previous:
+            diff = current['total_lines'] - previous['total_lines']
+            if diff > 0:
+                self.logger.info(f"Lines of code increased by {diff:,}")
+                self._play_sound('lines_up')
+            elif diff < 0:
+                self.logger.info(f"Lines of code decreased by {abs(diff):,}")
+                self._play_sound('lines_down')
+
+        # Check tokens per second changes
+        if 'tokens_per_second' in current and 'tokens_per_second' in previous:
+            diff = current['tokens_per_second'] - previous['tokens_per_second']
+            if diff > 0:
+                self.logger.info(f"Tokens per second increased by {diff:.2f}")
+                self._play_sound('tokens_up')
+            elif diff < 0:
+                self.logger.info(f"Tokens per second decreased by {abs(diff):.2f}")
+                self._play_sound('tokens_down')
+
+        # Check package size changes
+        if 'total_size_mb' in current and 'total_size_mb' in previous:
+            diff = current['total_size_mb'] - previous['total_size_mb']
+            if diff > 0:
+                self.logger.info(f"Package size increased by {diff:.2f}MB")
+                self._play_sound('size_up')
+            elif diff < 0:
+                self.logger.info(f"Package size decreased by {abs(diff):.2f}MB")
+                self._play_sound('size_down')
+
+    async def run_dashboard(self, update_interval: int = 30):
+        """Run the dashboard with periodic updates"""
+        try:
+            # Force convert update_interval to float and log its type
+            update_interval = float(update_interval)
+            self.logger.debug(f"Update interval type: {type(update_interval)}, value: {update_interval}")
+        except ValueError as e:
+            self.logger.error(f"Failed to convert update_interval to float: {update_interval}")
+            raise
+
+        self.logger.info(f"Starting real-time dashboard with {update_interval}s updates")
+        previous_data = None
+
+        while True:
+            try:
+                start_time = time.time()
+                self.logger.debug(f"Start time type: {type(start_time)}, value: {start_time}")
+
+                # Collect new data
+                current_data = await self.collect_data()
+                if not current_data:
+                    self.logger.warning("No data collected")
+                    await asyncio.sleep(update_interval)
+                    continue
+
+                # Generate report
+                report_path = self.generate_report(current_data)
+                if report_path:
+                    self.logger.info(
+                        f"Dashboard updated at {datetime.now().strftime('%H:%M:%S')}"
+                    )
+
+                    # Check for metric changes and play appropriate sounds
+                    self._check_metrics_changes(current_data, previous_data)
+
+                # Update previous data
+                previous_data = current_data
+
+                # Calculate sleep time with explicit type conversion and logging
+                elapsed = float(time.time() - start_time)
+                self.logger.debug(f"Elapsed time type: {type(elapsed)}, value: {elapsed}")
+                sleep_time = max(0.0, float(update_interval) - elapsed)
+                self.logger.debug(f"Sleep time type: {type(sleep_time)}, value: {sleep_time}")
+
+                await asyncio.sleep(sleep_time)
+
+            except Exception as e:
+                self.logger.error(f"Error in dashboard update loop: {e}", exc_info=True)
+                if self.debug:
+                    raise
+                await asyncio.sleep(float(update_interval))
+
+async def main():
+    token = os.getenv("CIRCLECI_TOKEN")
+    project_slug = os.getenv("CIRCLECI_PROJECT_SLUG")
+    debug = os.getenv("DEBUG", "").lower() in ("true", "1", "yes")
+
+    try:
+        # Get update interval from environment or use default
+        update_interval = float(os.getenv("UPDATE_INTERVAL", "30"))
+        print(f"Update interval type: {type(update_interval)}, value: {update_interval}")  # Debug print
+    except ValueError as e:
+        print(f"Error converting UPDATE_INTERVAL to float: {os.getenv('UPDATE_INTERVAL')}")
+        update_interval = 30.0
+
+    if not token or not project_slug:
+        print("Error: Please set CIRCLECI_TOKEN and CIRCLECI_PROJECT_SLUG environment variables")
+        return
+
+    tracker = PackageSizeTracker(token, project_slug, debug)
+
+    try:
+        await tracker.run_dashboard(update_interval)
+    except KeyboardInterrupt:
+        print("\nDashboard stopped by user")
+    except Exception as e:
+        logging.error(f"Error: {str(e)}", exc_info=True)
+        if debug:
+            raise
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 5 - 0
extra/dashboard/requirements.txt

@@ -0,0 +1,5 @@
+plotly
+pandas
+requests
+aiohttp
+simpleaudio

+ 210 - 0
extra/line_counter.py

@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import token
+import tokenize
+from datetime import datetime, timezone
+
+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
+
+def is_docstring(t):
+    return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
+
+def gen_stats(base_path="."):
+    table = []
+    exo_path = os.path.join(base_path, "exo")
+    if not os.path.exists(exo_path):
+        print(f"Warning: {exo_path} directory not found")
+        return table
+
+    for path, _, files in os.walk(exo_path):
+        for name in files:
+            if not name.endswith(".py"):
+                continue
+
+            filepath = os.path.join(path, name)
+            relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
+
+            try:
+                with tokenize.open(filepath) as file_:
+                    tokens = [t for t in tokenize.generate_tokens(file_.readline)
+                            if t.type in TOKEN_WHITELIST and not is_docstring(t)]
+                    token_count = len(tokens)
+                    line_count = len(set([x for t in tokens
+                                        for x in range(t.start[0], t.end[0]+1)]))
+                    if line_count > 0:
+                        table.append([relfilepath, line_count, token_count/line_count])
+            except Exception as e:
+                print(f"Error processing {filepath}: {e}")
+                continue
+
+    return table
+
+def gen_diff(table_old, table_new):
+    table = []
+    files_new = set([x[0] for x in table_new])
+    files_old = set([x[0] for x in table_old])
+
+    added = files_new - files_old
+    deleted = files_old - files_new
+    unchanged = files_new & files_old
+
+    for file in added:
+        file_stat = [stats for stats in table_new if file in stats][0]
+        table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
+
+    for file in deleted:
+        file_stat = [stats for stats in table_old if file in stats][0]
+        table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
+
+    for file in unchanged:
+        file_stat_old = [stats for stats in table_old if file in stats][0]
+        file_stat_new = [stats for stats in table_new if file in stats][0]
+        if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
+            table.append([
+                file_stat_new[0],
+                file_stat_new[1],
+                file_stat_new[1] - file_stat_old[1],
+                file_stat_new[2],
+                file_stat_new[2] - file_stat_old[2]
+            ])
+
+    return table
+
+def create_json_report(table, is_diff=False):
+    timestamp = datetime.now(timezone.utc).isoformat()
+    commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
+    branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
+    pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
+
+    if is_diff:
+        files = [{
+            'name': row[0],
+            'current_lines': row[1],
+            'line_diff': row[2],
+            'current_tokens_per_line': row[3],
+            'tokens_per_line_diff': row[4]
+        } for row in table]
+
+        report = {
+            'type': 'diff',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'pr_number': pr_number,
+            'files': files,
+            'total_line_changes': sum(row[2] for row in table),
+            'total_files_changed': len(files)
+        }
+    else:
+        files = [{
+            'name': row[0],
+            'lines': row[1],
+            'tokens_per_line': row[2]
+        } for row in table]
+
+        report = {
+            'type': 'snapshot',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'files': files,
+            'total_lines': sum(row[1] for row in table),
+            'total_files': len(files)
+        }
+
+    return report
+
+def display_diff(diff):
+    return "+" + str(diff) if diff > 0 else str(diff)
+
+def format_table(rows, headers, floatfmt):
+    if not rows:
+        return ""
+
+    # Add headers as first row
+    all_rows = [headers] + rows
+
+    # Calculate column widths
+    col_widths = []
+    for col in range(len(headers)):
+        col_width = max(len(str(row[col])) for row in all_rows)
+        col_widths.append(col_width)
+
+    # Format rows
+    output = []
+    for row_idx, row in enumerate(all_rows):
+        formatted_cols = []
+        for col_idx, (value, width) in enumerate(zip(row, col_widths)):
+            if isinstance(value, float):
+                # Handle float formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+.1f}"
+                else:
+                    value = f"{value:.1f}"
+            elif isinstance(value, int) and col_idx > 0:  # Skip filename column
+                # Handle integer formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+d}"
+                else:
+                    value = f"{value:d}"
+            formatted_cols.append(str(value).ljust(width))
+        output.append("  ".join(formatted_cols))
+
+        # Add separator line after headers
+        if row_idx == 0:
+            separator = []
+            for width in col_widths:
+                separator.append("-" * width)
+            output.append("  ".join(separator))
+
+    return "\n".join(output)
+
+if __name__ == "__main__":
+    if len(sys.argv) == 3:
+        # Comparing two directories
+        headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
+        table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
+
+        if table:
+            # Print table output
+            print("### Code Changes in 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
+                headers,
+                (".1f", "d", "+d", ".1f", "+.1f")
+            ))
+            total_changes = sum(row[2] for row in table)
+            print(f"\nTotal line changes: {display_diff(total_changes)}")
+            print("```")
+
+            # Generate JSON report
+            report = create_json_report(table, is_diff=True)
+            with open('line-count-diff.json', 'w') as f:
+                json.dump(report, f, indent=2)
+    else:
+        # Single directory analysis
+        headers = ["File", "Lines", "Tokens/Line"]
+        table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
+
+        if table:
+            # Print table output
+            print("### Code Statistics for 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: x[1], reverse=True),
+                headers,
+                (".1f", "d", ".1f")
+            ))
+            total_lines = sum(row[1] for row in table)
+            print(f"\nTotal lines: {total_lines}")
+            print("```")
+
+            # Generate JSON report
+            report = create_json_report(table, is_diff=False)
+            with open('line-count-snapshot.json', 'w') as f:
+                json.dump(report, f, indent=2)

+ 113 - 0
extra/pipsize.py

@@ -0,0 +1,113 @@
+import os
+import importlib.metadata
+import importlib.util
+import json
+import sys
+
+
+def calc_container(path):
+  """Calculate total size of a directory or file."""
+  if os.path.isfile(path):
+    try:
+      return os.path.getsize(path)
+    except (OSError, FileNotFoundError):
+      return 0
+
+  total_size = 0
+  for dirpath, dirnames, filenames in os.walk(path):
+    for f in filenames:
+      fp = os.path.join(dirpath, f)
+      try:
+        total_size += os.path.getsize(fp)
+      except (OSError, FileNotFoundError):
+        continue
+  return total_size
+
+
+def get_package_location(package_name):
+  """Get the actual location of a package's files."""
+  try:
+    spec = importlib.util.find_spec(package_name)
+    if spec is None:
+      return None
+
+    if spec.submodule_search_locations:
+      # Return the first location for namespace packages
+      return spec.submodule_search_locations[0]
+    elif spec.origin:
+      # For single-file modules, return the file path itself
+      return spec.origin
+  except ImportError:
+    return None
+
+
+def get_package_sizes(min_size_mb=0.1):
+  """Get sizes of installed packages above minimum size threshold."""
+  package_sizes = []
+
+  # Get all installed distributions
+  for dist in importlib.metadata.distributions():
+    try:
+      package_name = dist.metadata["Name"]
+      location = get_package_location(package_name.replace("-", "_"))
+
+      if location and os.path.exists(location):
+        size = calc_container(location)
+        size_mb = size / (1024 * 1024)
+
+        if size_mb > min_size_mb:
+          package_sizes.append((package_name, size))
+    except Exception as e:
+      print(
+        f"Error processing {dist.metadata.get('Name', 'Unknown package')}: {e}"
+      )
+
+  return package_sizes
+
+
+def main():
+  # Get and sort package sizes
+  package_sizes = get_package_sizes()
+  package_sizes.sort(key=lambda x: x[1], reverse=True)
+
+  # Convert sizes to MB and prepare data
+  table_data = [(name, size/(1024*1024)) for name, size in package_sizes]
+  total_size = sum(size for _, size in package_sizes)/(1024*1024)
+
+  # Check if --json flag is present
+  if "--json" in sys.argv:
+    try:
+      output_file = sys.argv[sys.argv.index("--json") + 1]
+      json_data = {
+        "packages": [{
+          "name": name,
+          "size_mb": round(size, 2)
+        } for name, size in table_data],
+        "total_size_mb": round(total_size, 2)
+      }
+
+      with open(output_file, 'w') as f:
+        json.dump(json_data, f, indent=2)
+      print(f"JSON data written to {output_file}")
+      return
+    except IndexError:
+      print("Error: Please provide a filename after --json")
+      sys.exit(1)
+    except Exception as e:
+      print(f"Error writing JSON file: {e}")
+      sys.exit(1)
+
+  # Original table output code
+  max_name_width = max(len(name) for name, _ in table_data)
+  max_name_width = max(max_name_width, len("Package"))
+
+  print(f"\n{'Package':<{max_name_width}} | Size (MB)")
+  print("-" * max_name_width + "-+-" + "-" * 10)
+
+  for name, size in table_data:
+    print(f"{name:<{max_name_width}} | {size:>8.2f}")
+
+  print(f"\nTotal size: {total_size:.2f} MB\n")
+
+if __name__ == "__main__":
+  main()

+ 3 - 5
scripts/build_exo.py

@@ -14,8 +14,8 @@ def run():
         "--follow-imports",
         "--standalone",
         "--output-filename=exo",
-        "--onefile",
-        "--python-flag=no_site"
+        "--python-flag=no_site",
+        "--onefile"
     ]
 
     if sys.platform == "darwin": 
@@ -24,8 +24,6 @@ def run():
             "--macos-app-mode=gui",
             "--macos-app-version=0.0.1",
             "--macos-signed-app-name=com.exolabs.exo",
-            "--macos-sign-identity=auto",
-            "--macos-sign-notarization",
             "--include-distribution-meta=mlx",
             "--include-module=mlx._reprlib_fix",
             "--include-module=mlx._os_warning",
@@ -57,4 +55,4 @@ def run():
         print(f"An error occurred: {e}")
 
 if __name__ == "__main__":
-    run()
+    run()

+ 2 - 2
scripts/compile_grpc.sh

@@ -1,7 +1,7 @@
 #!/bin/bash
 source ./install.sh
 pushd exo/networking/grpc
-python3.12 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. node_service.proto
-sed -i "s/import node_service_pb2/from . &/" node_service_pb2_grpc.py
+python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. node_service.proto
+sed -i "s/import\ node_service_pb2/from . &/" node_service_pb2_grpc.py
 popd
 

+ 3 - 3
setup.py

@@ -13,11 +13,11 @@ install_requires = [
   "Jinja2==3.1.4",
   "netifaces==0.11.0",
   "numpy==2.0.0",
-  "nuitka==2.4.10",
+  "nuitka==2.5.1",
   "nvidia-ml-py==12.560.30",
   "pillow==10.4.0",
   "prometheus-client==0.20.0",
-  "protobuf==5.27.1",
+  "protobuf==5.28.1",
   "psutil==6.0.0",
   "pydantic==2.9.2",
   "requests==2.32.3",
@@ -26,7 +26,7 @@ install_requires = [
   "tqdm==4.66.4",
   "transformers==4.46.3",
   "uuid==1.30",
-  "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad",
+  "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@3b26e51fcebfc6576f4e0f99693e6f1406d61d79",
 ]
 
 extras_require = {