|
@@ -268,9 +268,10 @@ class ChatGPTAPI:
|
|
callback = self.node.on_token.register(callback_id)
|
|
callback = self.node.on_token.register(callback_id)
|
|
|
|
|
|
if DEBUG >= 2: print(f"Sending prompt from ChatGPT api {request_id=} {shard=} {prompt=} {image_str=}")
|
|
if DEBUG >= 2: print(f"Sending prompt from ChatGPT api {request_id=} {shard=} {prompt=} {image_str=}")
|
|
- asyncio.create_task(self.node.process_prompt(shard, prompt, image_str, request_id=request_id))
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
|
+ await asyncio.wait_for(self.node.process_prompt(shard, prompt, image_str, request_id=request_id), timeout=self.response_timeout)
|
|
|
|
+
|
|
if DEBUG >= 2: print(f"Waiting for response to finish. timeout={self.response_timeout}s")
|
|
if DEBUG >= 2: print(f"Waiting for response to finish. timeout={self.response_timeout}s")
|
|
|
|
|
|
if stream:
|
|
if stream:
|
|
@@ -345,6 +346,9 @@ class ChatGPTAPI:
|
|
return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion"))
|
|
return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion"))
|
|
except asyncio.TimeoutError:
|
|
except asyncio.TimeoutError:
|
|
return web.json_response({"detail": "Response generation timed out"}, status=408)
|
|
return web.json_response({"detail": "Response generation timed out"}, status=408)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ if DEBUG >= 2: traceback.print_exc()
|
|
|
|
+ return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500)
|
|
finally:
|
|
finally:
|
|
deregistered_callback = self.node.on_token.deregister(callback_id)
|
|
deregistered_callback = self.node.on_token.deregister(callback_id)
|
|
if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}")
|
|
if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}")
|