Преглед изворни кода

require exact match on response from llms in integration tests

Alex Cheema пре 8 месеци
родитељ
комит
37056403e7
1 измењених фајлова са 7 додато и 7 уклоњено
  1. 7 7
      .circleci/config.yml

+ 7 - 7
.circleci/config.yml

@@ -84,8 +84,8 @@ commands:
             kill $PID1 $PID2
 
             echo ""
-            if ! echo "$response_1" | grep -q "<<parameters.expected_output>>" || ! echo "$response_2" | grep -q "<<parameters.expected_output>>"; then
-              echo "Test failed: Response does not contain '<<parameters.expected_output>>'"
+            if ! echo "$response_1" | grep -q "^<<parameters.expected_output>>$" || ! echo "$response_2" | grep -q "^<<parameters.expected_output>>$"; then
+              echo "Test failed: Response does not match '<<parameters.expected_output>>'"
               echo "Response 1: $response_1"
               echo ""
               echo "Response 2: $response_2"
@@ -95,7 +95,7 @@ commands:
               cat output2.log
               exit 1
             else
-              echo "Test passed: Response from both nodes contains '<<parameters.expected_output>>'"
+              echo "Test passed: Response from both nodes matches '<<parameters.expected_output>>'"
             fi
 
 jobs:
@@ -188,8 +188,8 @@ jobs:
       - run_chatgpt_api_test:
           inference_engine: mlx
           model_id: llama-3.2-1b
-          prompt: "Keep responses concise. Who was the king of pop?"
-          expected_output: "Michael Jackson"
+          prompt: "Keep responses concise. Who was the king of pop? Properly capitalize and end your response with a period."
+          expected_output: "Michael Jackson."
 
   chatgpt_api_integration_test_dummy:
     macos:
@@ -244,8 +244,8 @@ jobs:
       - run_chatgpt_api_test:
           inference_engine: tinygrad
           model_id: llama-3.2-1b
-          prompt: "Keep responses concise. Who was the king of pop?"
-          expected_output: "Michael Jackson"
+          prompt: "Keep responses concise. Who was the king of pop? Properly capitalize and end your response with a period."
+          expected_output: "Michael Jackson."
 
   measure_pip_sizes:
     macos: