7 months ago · 0c6ab35333
--- a/.github/bench.py
+++ b/.github/bench.py
@@ -226,7 +226,7 @@ async def measure_performance(api_endpoint: str, prompt: str, model: str) -> Dic
 
				     }
			
 
				 
			
 
				     # Get token count
			
 
				-    session = aiohttp.ClientSession()
			
 
				+    session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600, connect=10, sock_read=600, sock_connect=10))
			
 
				     try:
			
 
				         response = await session.post(
			
 
				             "http://localhost:52415/v1/chat/token/encode",
			
@@ -397,4 +397,4 @@ if __name__ == "__main__":
 
				     check_system_state()
			
 
				     check_gpu_access()
			
 
				     optimize_system_performance()
			
 
				-    asyncio.run(main())
			
 
				+    asyncio.run(main())