3 months ago · e76f77bcb7
--- a/backend/open_webui/env.py
+++ b/backend/open_webui/env.py
@@ -570,16 +570,18 @@ else:
 
															 CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = os.environ.get(
														
 
															-    "CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE", "10485760" # 10MB
														
 
															+    "CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE", ""
														
 
															 )
														
 
															 if CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE == "":
														
 
															-    CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10
														
 
															+    CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = None
														
 
															 else:
														
 
															     try:
														
 
															-        CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = int(CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE)
														
 
															+        CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = int(
														
 
															+            CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE
														
 
															+        )
														
 
															     except Exception:
														
 
															-        CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10
														
 
															+        CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = None
														
 
															 ####################################
														
--- a/backend/open_webui/routers/openai.py
+++ b/backend/open_webui/routers/openai.py
@@ -45,7 +45,7 @@ from open_webui.utils.payload import (
 
															 )
														
 
															 from open_webui.utils.misc import (
														
 
															     convert_logit_bias_input_to_json,
														
 
															-    handle_large_stream_chunks,
														
 
															+    stream_chunks_handler,
														
 
															 )
														
 
															 from open_webui.utils.auth import get_admin_user, get_verified_user
														
@@ -953,7 +953,7 @@ async def generate_chat_completion(
 
															         if "text/event-stream" in r.headers.get("Content-Type", ""):
														
 
															             streaming = True
														
 
															             return StreamingResponse(
														
 
															-                handle_large_stream_chunks(r.content),
														
 
															+                stream_chunks_handler(r.content),
														
 
															                 status_code=r.status,
														
 
															                 headers=dict(r.headers),
														
 
															                 background=BackgroundTask(
														
--- a/backend/open_webui/utils/misc.py
+++ b/backend/open_webui/utils/misc.py
@@ -542,21 +542,21 @@ def extract_urls(text: str) -> list[str]:
 
															     return url_pattern.findall(text)
														
 
															-def handle_large_stream_chunks(stream: aiohttp.StreamReader, max_buffer_size: int = CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE):
														
 
															+def stream_chunks_handler(stream: aiohttp.StreamReader):
														
 
															     """
														
 
															     Handle stream response chunks, supporting large data chunks that exceed the original 16kb limit.
														
 
															     When a single line exceeds max_buffer_size, returns an empty JSON string {} and skips subsequent data
														
 
															     until encountering normally sized data.
														
 
															     :param stream: The stream reader to handle.
														
 
															-    :param max_buffer_size: The maximum buffer size in bytes, -1 means not handle large chunks, default is 10MB.
														
 
															     :return: An async generator that yields the stream data.
														
 
															     """
														
 
															-    if max_buffer_size <= 0:
														
 
															+    max_buffer_size = CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE
														
 
															+    if max_buffer_size is None or max_buffer_size <= 0:
														
 
															         return stream
														
 
															-    async def handle_stream_chunks():
														
 
															+    async def yield_safe_stream_chunks():
														
 
															         buffer = b""
														
 
															         skip_mode = False
														
@@ -603,5 +603,5 @@ def handle_large_stream_chunks(stream: aiohttp.StreamReader, max_buffer_size: in
 
															         # Process remaining buffer data
														
 
															         if buffer and not skip_mode:
														
 
															             yield buffer
														
 
															-    
														
 
															-    return handle_stream_chunks()
														
 
															+
														
 
															+    return yield_safe_stream_chunks()