浏览代码

feat: stream delta chunk

Co-Authored-By: Jan Kessler <Ithanil@users.noreply.github.com>
Timothy Jaeryang Baek 1 月之前
父节点
当前提交
1a93891d97

+ 25 - 10
backend/open_webui/main.py

@@ -1401,6 +1401,16 @@ async def chat_completion(
             request.state.direct = True
             request.state.model = model
 
+        # Chat Params
+        stream_delta_chunk_size = form_data.get("params", {}).get(
+            "stream_delta_chunk_size"
+        )
+        # Model Params
+        if model_info and model_info.params:
+            stream_delta_chunk_size = model_info.params.model_dump().get(
+                "stream_delta_chunk_size"
+            )
+
         metadata = {
             "user_id": user.id,
             "chat_id": form_data.pop("chat_id", None),
@@ -1414,16 +1424,21 @@ async def chat_completion(
             "variables": form_data.get("variables", {}),
             "model": model,
             "direct": model_item.get("direct", False),
-            **(
-                {"function_calling": "native"}
-                if form_data.get("params", {}).get("function_calling") == "native"
-                or (
-                    model_info
-                    and model_info.params.model_dump().get("function_calling")
-                    == "native"
-                )
-                else {}
-            ),
+            "params": {
+                "stream_delta_chunk_size": stream_delta_chunk_size,
+                "function_calling": (
+                    "native"
+                    if (
+                        form_data.get("params", {}).get("function_calling") == "native"
+                        or (
+                            model_info
+                            and model_info.params.model_dump().get("function_calling")
+                            == "native"
+                        )
+                    )
+                    else "default"
+                ),
+            },
         }
 
         if metadata.get("chat_id") and (user and user.role != "admin"):

+ 28 - 7
backend/open_webui/utils/middleware.py

@@ -684,6 +684,7 @@ def apply_params_to_form_data(form_data, model):
 
     open_webui_params = {
         "stream_response": bool,
+        "stream_delta_chunk_size": int,
         "function_calling": str,
         "system": str,
     }
@@ -930,7 +931,7 @@ async def process_chat_payload(request, form_data, user, metadata, model):
                 }
 
     if tools_dict:
-        if metadata.get("function_calling") == "native":
+        if metadata.get("params", {}).get("function_calling") == "native":
             # If the function calling is native, then call the tools function calling handler
             metadata["tools"] = tools_dict
             form_data["tools"] = [
@@ -1816,6 +1817,15 @@ async def process_chat_response(
 
                     response_tool_calls = []
 
+                    delta_count = 0
+                    delta_chunk_size = max(
+                        1,
+                        int(
+                            metadata.get("params", {}).get("stream_delta_chunk_size")
+                            or 1
+                        ),
+                    )
+
                     async for line in response.body_iterator:
                         line = line.decode("utf-8") if isinstance(line, bytes) else line
                         data = line
@@ -2063,12 +2073,23 @@ async def process_chat_response(
                                                 ),
                                             }
 
-                                await event_emitter(
-                                    {
-                                        "type": "chat:completion",
-                                        "data": data,
-                                    }
-                                )
+                                if delta:
+                                    delta_count += 1
+                                    if delta_count >= delta_chunk_size:
+                                        await event_emitter(
+                                            {
+                                                "type": "chat:completion",
+                                                "data": data,
+                                            }
+                                        )
+                                        delta_count = 0
+                                else:
+                                    await event_emitter(
+                                        {
+                                            "type": "chat:completion",
+                                            "data": data,
+                                        }
+                                    )
                         except Exception as e:
                             done = "data: [DONE]" in line
                             if done:

+ 1 - 0
backend/open_webui/utils/payload.py

@@ -69,6 +69,7 @@ def remove_open_webui_params(params: dict) -> dict:
     """
     open_webui_params = {
         "stream_response": bool,
+        "stream_delta_chunk_size": int,
         "function_calling": str,
         "system": str,
     }

+ 58 - 0
src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte

@@ -15,6 +15,7 @@
 	const defaultParams = {
 		// Advanced
 		stream_response: null, // Set stream responses for this model individually
+		stream_delta_chunk_size: null, // Set the chunk size for streaming responses
 		function_calling: null,
 		seed: null,
 		stop: null,
@@ -88,6 +89,63 @@
 		</Tooltip>
 	</div>
 
+	{#if admin}
+		<div>
+			<Tooltip
+				content={$i18n.t(
+					'The stream delta chunk size for the model. Increasing the chunk size will make the model respond with larger pieces of text at once.'
+				)}
+				placement="top-start"
+				className="inline-tooltip"
+			>
+				<div class="flex w-full justify-between">
+					<div class=" self-center text-xs font-medium">
+						{$i18n.t('Stream Delta Chunk Size')}
+					</div>
+					<button
+						class="p-1 px-3 text-xs flex rounded-sm transition shrink-0 outline-hidden"
+						type="button"
+						on:click={() => {
+							params.stream_delta_chunk_size =
+								(params?.stream_delta_chunk_size ?? null) === null ? 1 : null;
+						}}
+					>
+						{#if (params?.stream_delta_chunk_size ?? null) === null}
+							<span class="ml-2 self-center"> {$i18n.t('Default')} </span>
+						{:else}
+							<span class="ml-2 self-center"> {$i18n.t('Custom')} </span>
+						{/if}
+					</button>
+				</div>
+			</Tooltip>
+
+			{#if (params?.stream_delta_chunk_size ?? null) !== null}
+				<div class="flex mt-0.5 space-x-2">
+					<div class=" flex-1">
+						<input
+							id="steps-range"
+							type="range"
+							min="1"
+							max="128"
+							step="1"
+							bind:value={params.stream_delta_chunk_size}
+							class="w-full h-2 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
+						/>
+					</div>
+					<div>
+						<input
+							bind:value={params.stream_delta_chunk_size}
+							type="number"
+							class=" bg-transparent text-center w-14"
+							min="1"
+							step="any"
+						/>
+					</div>
+				</div>
+			{/if}
+		</div>
+	{/if}
+
 	<div>
 		<Tooltip
 			content={$i18n.t(

+ 3 - 0
src/lib/components/chat/Settings/General.svelte

@@ -42,6 +42,7 @@
 	let params = {
 		// Advanced
 		stream_response: null,
+		stream_delta_chunk_size: null,
 		function_calling: null,
 		seed: null,
 		temperature: null,
@@ -71,6 +72,8 @@
 			system: system !== '' ? system : undefined,
 			params: {
 				stream_response: params.stream_response !== null ? params.stream_response : undefined,
+				stream_delta_chunk_size:
+					params.stream_delta_chunk_size !== null ? params.stream_delta_chunk_size : undefined,
 				function_calling: params.function_calling !== null ? params.function_calling : undefined,
 				seed: (params.seed !== null ? params.seed : undefined) ?? undefined,
 				stop: params.stop ? params.stop.split(',').filter((e) => e) : undefined,