瀏覽代碼

Merge pull request #495 from exo-explore/dummyintegration

dummy integration test fix
Alex Cheema 5 月之前
父節點
當前提交
97a8d2e573
共有 4 個文件被更改,包括 27 次插入22 次删除
  1. 10 15
      .circleci/config.yml
  2. 8 4
      exo/inference/dummy_inference_engine.py
  3. 8 3
      exo/inference/tokenizers.py
  4. 1 0
      exo/orchestration/standard_node.py

+ 10 - 15
.circleci/config.yml

@@ -84,18 +84,22 @@ commands:
             kill $PID1 $PID2
 
             echo ""
-            if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
-              echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
-              echo "Response 1: $response_1"
+            # Extract content using jq and check if it contains expected output
+            content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
+            content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
+
+            if [[ "$content1" != *"<<parameters.expected_output>>"* ]] || [[ "$content2" != *"<<parameters.expected_output>>"* ]]; then
+              echo "Test failed: Response does not match '<<parameters.expected_output>>'"
+              echo "Response 1 content: $content1"
               echo ""
-              echo "Response 2: $response_2"
+              echo "Response 2 content: $content2"
               echo "Output of first instance:"
               cat output1.log
               echo "Output of second instance:"
               cat output2.log
               exit 1
             else
-              echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
+              echo "Test passed: Response from both nodes matches '<<parameters.expected_output>>'"
             fi
 
 jobs:
@@ -211,18 +215,10 @@ jobs:
             pip install .
       - run_chatgpt_api_test:
           inference_engine: dummy
-          model_id: dummy-model
+          model_id: dummy
           prompt: "Dummy prompt."
           expected_output: "dummy"
 
-  test_macos_m1:
-    macos:
-      xcode: "16.0.0"
-    resource_class: m2pro.large
-    steps:
-      - checkout
-      - run: system_profiler SPHardwareDataType
-
   chatgpt_api_integration_test_tinygrad:
     macos:
       xcode: "16.0.0"
@@ -336,5 +332,4 @@ workflows:
       - chatgpt_api_integration_test_mlx
       - chatgpt_api_integration_test_tinygrad
       - chatgpt_api_integration_test_dummy
-      - test_macos_m1
       - measure_pip_sizes

+ 8 - 4
exo/inference/dummy_inference_engine.py

@@ -3,9 +3,10 @@ import numpy as np
 import random
 import string
 import asyncio
-import json
 from exo.inference.inference_engine import InferenceEngine
 from exo.inference.shard import Shard
+from exo.inference.tokenizers import DummyTokenizer
+
 def random_string(length: int):
   return ''.join([random.choice(string.ascii_lowercase) for i in range(length)])
   
@@ -18,15 +19,18 @@ class DummyInferenceEngine(InferenceEngine):
     self.eos_token_id = 0
     self.latency_mean = 0.1
     self.latency_stddev = 0.02
+    self.tokenizer = DummyTokenizer()
 
   async def encode(self, shard: Shard, prompt: str) -> np.ndarray:
-    return np.random.randint(1, self.vocab_size, size=(1, len(prompt.split())))
+    return np.array(self.tokenizer.encode(prompt))
   
   async def sample(self, x: np.ndarray) -> np.ndarray:
-    return np.random.randint(1, self.vocab_size)
+    if random.random() < 0.1:
+      return np.array([self.tokenizer.eos_token_id])
+    return np.array([np.random.randint(1, self.vocab_size)])
 
   async def decode(self, shard: Shard, tokens: np.ndarray) -> str:
-    return ' '.join([random_string(np.random.randint(1, 34)) for token in tokens])
+    return self.tokenizer.decode(tokens)
 
   async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray) -> np.ndarray:
     await self.ensure_shard(shard)

+ 8 - 3
exo/inference/tokenizers.py

@@ -4,19 +4,24 @@ from os import PathLike
 from pathlib import Path
 from typing import Union
 from transformers import AutoTokenizer, AutoProcessor
+import numpy as np
 from exo.download.hf.hf_helpers import get_local_snapshot_dir
 from exo.helpers import DEBUG
 
 
 class DummyTokenizer:
   def __init__(self):
-    self.eos_token_id = 0
+    self.eos_token_id = 69
+    self.vocab_size = 1000
 
   def apply_chat_template(self, messages, tokenize=True, add_generation_prompt=True):
-    return [1, 2, 3]
+    return "dummy_tokenized_prompt"
+
+  def encode(self, text):
+    return np.random.randint(1, self.vocab_size, size=(1, len(text.split())))
 
   def decode(self, tokens):
-    return "dummy"
+    return "dummy" * len(tokens)
 
 
 async def resolve_tokenizer(model_id: str):

+ 1 - 0
exo/orchestration/standard_node.py

@@ -360,6 +360,7 @@ class StandardNode(Node):
     return len(peers_added) > 0 or len(peers_removed) > 0 or len(peers_updated) > 0
 
   async def select_best_inference_engine(self):
+    if self.inference_engine.__class__.__name__ == 'DummyInferenceEngine': return
     supported_engines = self.get_supported_inference_engines()
     await self.broadcast_supported_engines(supported_engines)
     if len(self.get_topology_inference_engines()):