Ver código fonte

enh: CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE

Timothy Jaeryang Baek 1 mês atrás
pai
commit
4e9c75be50
2 arquivos alterados com 21 adições e 1 exclusões
  1. 19 0
      backend/open_webui/env.py
  2. 2 1
      backend/open_webui/utils/middleware.py

+ 19 - 0
backend/open_webui/env.py

@@ -487,6 +487,25 @@ else:
         MODELS_CACHE_TTL = 1
 
 
+####################################
+# CHAT
+####################################
+
+CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = os.environ.get(
+    "CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE", "1"
+)
+
+if CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE == "":
+    CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = 1
+else:
+    try:
+        CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = int(
+            CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE
+        )
+    except Exception:
+        CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = 1
+
+
 ####################################
 # WEBSOCKET SUPPORT
 ####################################

+ 2 - 1
backend/open_webui/utils/middleware.py

@@ -95,6 +95,7 @@ from open_webui.config import (
 from open_webui.env import (
     SRC_LOG_LEVELS,
     GLOBAL_LOG_LEVEL,
+    CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE,
     BYPASS_MODEL_ACCESS_CONTROL,
     ENABLE_REALTIME_CHAT_SAVE,
 )
@@ -1819,7 +1820,7 @@ async def process_chat_response(
 
                     delta_count = 0
                     delta_chunk_size = max(
-                        1,
+                        CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE,
                         int(
                             metadata.get("params", {}).get("stream_delta_chunk_size")
                             or 1