config.yml 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. version: 2.1
  2. orbs:
  3. python: circleci/python@2
  4. commands:
  5. run_chatgpt_api_test:
  6. parameters:
  7. inference_engine:
  8. type: string
  9. model_id:
  10. type: string
  11. expected_output:
  12. type: string
  13. prompt:
  14. type: string
  15. steps:
  16. - run:
  17. name: Run chatgpt api integration test (<<parameters.inference_engine>>, <<parameters.model_id>>)
  18. command: |
  19. source env/bin/activate
  20. # Set CLANG=1 for tinygrad only
  21. if [ "<<parameters.inference_engine>>" = "tinygrad" ]; then
  22. pip install llvmlite
  23. export TOKENIZERS_PARALLELISM=true SUPPORT_BF16=0 CLANG=1
  24. fi
  25. # Start first instance
  26. HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout 900 --disable-tui 2>&1 | tee output1.log &
  27. PID1=$!
  28. # Start second instance
  29. HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <<parameters.inference_engine>> --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout 900 --disable-tui 2>&1 | tee output2.log &
  30. PID2=$!
  31. # Wait for discovery
  32. sleep 10
  33. # Function to check if processes are still running
  34. check_processes() {
  35. if ! kill -0 $PID1 2>/dev/null; then
  36. echo "First instance (PID $PID1) died unexpectedly. Log output:"
  37. cat output1.log
  38. exit 1
  39. fi
  40. if ! kill -0 $PID2 2>/dev/null; then
  41. echo "Second instance (PID $PID2) died unexpectedly. Log output:"
  42. cat output2.log
  43. exit 1
  44. fi
  45. }
  46. # Check processes before proceeding
  47. check_processes
  48. echo "Sending request to first instance..."
  49. response_1=$(curl -s http://localhost:8000/v1/chat/completions \
  50. -H "Content-Type: application/json" \
  51. -d '{
  52. "model": "<<parameters.model_id>>",
  53. "messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
  54. "temperature": 0.7
  55. }')
  56. echo "Response 1: $response_1"
  57. # Check processes after first response
  58. check_processes
  59. echo "Sending request to second instance..."
  60. response_2=$(curl -s http://localhost:8001/v1/chat/completions \
  61. -H "Content-Type: application/json" \
  62. -d '{
  63. "model": "<<parameters.model_id>>",
  64. "messages": [{"role": "user", "content": "<<parameters.prompt>>"}],
  65. "temperature": 0.7
  66. }')
  67. echo "Response 2: $response_2"
  68. # Check processes after second response
  69. check_processes
  70. # Stop both instances
  71. kill $PID1 $PID2
  72. echo ""
  73. if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
  74. echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
  75. echo "Response 1: $response_1"
  76. echo ""
  77. echo "Response 2: $response_2"
  78. echo "Output of first instance:"
  79. cat output1.log
  80. echo "Output of second instance:"
  81. cat output2.log
  82. exit 1
  83. else
  84. echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
  85. fi
  86. jobs:
  87. unit_test:
  88. macos:
  89. xcode: "16.0.0"
  90. resource_class: m2pro.large
  91. steps:
  92. - checkout
  93. - run:
  94. name: Set up Python
  95. command: |
  96. brew install python@3.12
  97. python3.12 -m venv env
  98. source env/bin/activate
  99. - run:
  100. name: Install dependencies
  101. command: |
  102. source env/bin/activate
  103. pip install --upgrade pip
  104. pip install .
  105. - run:
  106. name: Run tests
  107. command: |
  108. source env/bin/activate
  109. # set TEMPERATURE to 0 for deterministic sampling
  110. echo "Running inference engine tests..."
  111. METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine
  112. echo "Running tokenizer tests..."
  113. python3 ./test/test_tokenizers.py
  114. python3 ./test/test_model_helpers.py
  115. discovery_integration_test:
  116. macos:
  117. xcode: "16.0.0"
  118. steps:
  119. - checkout
  120. - run:
  121. name: Set up Python
  122. command: |
  123. brew install python@3.12
  124. python3.12 -m venv env
  125. source env/bin/activate
  126. - run:
  127. name: Install dependencies
  128. command: |
  129. source env/bin/activate
  130. pip install --upgrade pip
  131. pip install .
  132. - run:
  133. name: Run discovery integration test
  134. command: |
  135. source env/bin/activate
  136. DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
  137. PID1=$!
  138. DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
  139. PID2=$!
  140. sleep 10
  141. kill $PID1 $PID2
  142. if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then
  143. echo "Test passed: Both instances discovered each other"
  144. exit 0
  145. else
  146. echo "Test failed: Devices did not discover each other"
  147. echo "Output of first instance:"
  148. cat output1.log
  149. echo "Output of second instance:"
  150. cat output2.log
  151. exit 1
  152. fi
  153. chatgpt_api_integration_test_mlx:
  154. macos:
  155. xcode: "16.0.0"
  156. resource_class: m2pro.large
  157. steps:
  158. - checkout
  159. - run:
  160. name: Set up Python
  161. command: |
  162. brew install python@3.12
  163. python3.12 -m venv env
  164. source env/bin/activate
  165. - run:
  166. name: Install dependencies
  167. command: |
  168. source env/bin/activate
  169. pip install --upgrade pip
  170. pip install .
  171. - run_chatgpt_api_test:
  172. inference_engine: mlx
  173. model_id: llama-3.2-1b
  174. prompt: "Keep responses concise. Who was the king of pop?"
  175. expected_output: "Michael Jackson"
  176. chatgpt_api_integration_test_dummy:
  177. macos:
  178. xcode: "16.0.0"
  179. resource_class: m2pro.large
  180. steps:
  181. - checkout
  182. - run:
  183. name: Set up Python
  184. command: |
  185. brew install python@3.12
  186. python3.12 -m venv env
  187. source env/bin/activate
  188. - run:
  189. name: Install dependencies
  190. command: |
  191. source env/bin/activate
  192. pip install --upgrade pip
  193. pip install .
  194. - run_chatgpt_api_test:
  195. inference_engine: dummy
  196. model_id: dummy-model
  197. prompt: "Dummy prompt."
  198. expected_output: "dummy"
  199. test_macos_m1:
  200. macos:
  201. xcode: "16.0.0"
  202. resource_class: m2pro.large
  203. steps:
  204. - checkout
  205. - run: system_profiler SPHardwareDataType
  206. chatgpt_api_integration_test_tinygrad:
  207. macos:
  208. xcode: "16.0.0"
  209. resource_class: m2pro.large
  210. steps:
  211. - checkout
  212. - run:
  213. name: Set up Python
  214. command: |
  215. brew install python@3.12
  216. python3.12 -m venv env
  217. source env/bin/activate
  218. - run:
  219. name: Install dependencies
  220. command: |
  221. source env/bin/activate
  222. pip install --upgrade pip
  223. pip install .
  224. - run_chatgpt_api_test:
  225. inference_engine: tinygrad
  226. model_id: llama-3.2-1b
  227. prompt: "Keep responses concise. Who was the king of pop?"
  228. expected_output: "Michael Jackson"
  229. measure_pip_sizes:
  230. macos:
  231. xcode: "16.0.0"
  232. steps:
  233. - checkout
  234. - run:
  235. name: Set up Python
  236. command: |
  237. brew install python@3.12
  238. python3.12 -m venv env
  239. source env/bin/activate
  240. - run:
  241. name: Install dependencies and measure sizes
  242. command: |
  243. source env/bin/activate
  244. pip install --upgrade pip
  245. pip install .
  246. python ./extra/pipsize.py --json ./pipsize.json
  247. - store_artifacts:
  248. path: ./pipsize.json
  249. destination: pip-sizes.json
  250. workflows:
  251. version: 2
  252. build_and_test:
  253. jobs:
  254. - unit_test
  255. - discovery_integration_test
  256. - chatgpt_api_integration_test_mlx
  257. - chatgpt_api_integration_test_tinygrad
  258. - chatgpt_api_integration_test_dummy
  259. - test_macos_m1
  260. - measure_pip_sizes