Timothy Jaeryang Baek 2 dagar sedan
förälder
incheckning
d40c710354
3 ändrade filer med 268 tillägg och 46 borttagningar
  1. 22 44
      backend/open_webui/routers/retrieval.py
  2. 2 2
      backend/open_webui/utils/middleware.py
  3. 244 0
      package-lock.json

+ 22 - 44
backend/open_webui/routers/retrieval.py

@@ -5,6 +5,7 @@ import os
 import shutil
 import asyncio
 
+import re
 import uuid
 from datetime import datetime
 from pathlib import Path
@@ -1690,50 +1691,34 @@ def process_text(
         )
 
 
-@router.post("/process/youtube")
-def process_youtube_video(
-    request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user)
-):
-    try:
-        collection_name = form_data.collection_name
-        if not collection_name:
-            collection_name = calculate_sha256_string(form_data.url)[:63]
+def is_youtube_url(url: str) -> bool:
+    youtube_regex = r"^(https?://)?(www\.)?(youtube\.com|youtu\.be)/.+$"
+    return re.match(youtube_regex, url) is not None
+
 
-        loader = YoutubeLoader(
-            form_data.url,
+def get_loader(request, url: str):
+    if is_youtube_url(url):
+        return YoutubeLoader(
+            url,
             language=request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
             proxy_url=request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
         )
-
-        docs = loader.load()
-        content = " ".join([doc.page_content for doc in docs])
-        log.debug(f"text_content: {content}")
-
-        save_docs_to_vector_db(
-            request, docs, collection_name, overwrite=True, user=user
+    else:
+        return get_web_loader(
+            url,
+            verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
+            requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
         )
 
-        return {
-            "status": True,
-            "collection_name": collection_name,
-            "filename": form_data.url,
-            "file": {
-                "data": {
-                    "content": content,
-                },
-                "meta": {
-                    "name": form_data.url,
-                },
-            },
-        }
-    except Exception as e:
-        log.exception(e)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=ERROR_MESSAGES.DEFAULT(e),
-        )
 
+def get_content_from_url(request, url: str) -> str:
+    loader = get_loader(request, url)
+    docs = loader.load()
+    content = " ".join([doc.page_content for doc in docs])
+    return content, docs
 
+
+@router.post("/process/youtube")
 @router.post("/process/web")
 def process_web(
     request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user)
@@ -1743,14 +1728,7 @@ def process_web(
         if not collection_name:
             collection_name = calculate_sha256_string(form_data.url)[:63]
 
-        loader = get_web_loader(
-            form_data.url,
-            verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
-            requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
-        )
-        docs = loader.load()
-        content = " ".join([doc.page_content for doc in docs])
-
+        content, docs = get_content_from_url(request, form_data.url)
         log.debug(f"text_content: {content}")
 
         if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:

+ 2 - 2
backend/open_webui/utils/middleware.py

@@ -1001,11 +1001,11 @@ async def process_chat_payload(request, form_data, user, metadata, model):
     log.debug(f"form_data: {form_data}")
 
     system_message = get_system_message(form_data.get("messages", []))
-    if system_message:
+    if system_message:  # Chat Controls/User Settings
         try:
             form_data = apply_system_prompt_to_body(
                 system_message.get("content"), form_data, metadata, user, replace=True
-            )
+            )  # Required to handle system prompt variables
         except:
             pass
 

+ 244 - 0
package-lock.json

@@ -94,6 +94,7 @@
 				"undici": "^7.3.0",
 				"uuid": "^9.0.1",
 				"vega": "^6.2.0",
+				"vega-lite": "^6.4.1",
 				"vite-plugin-static-copy": "^2.2.0",
 				"y-prosemirror": "^1.3.7",
 				"yaml": "^2.7.1",
@@ -5593,6 +5594,99 @@
 				"node": ">=8"
 			}
 		},
+		"node_modules/cliui": {
+			"version": "9.0.1",
+			"resolved": "https://registry.npmjs.org/cliui/-/cliui-9.0.1.tgz",
+			"integrity": "sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w==",
+			"license": "ISC",
+			"dependencies": {
+				"string-width": "^7.2.0",
+				"strip-ansi": "^7.1.0",
+				"wrap-ansi": "^9.0.0"
+			},
+			"engines": {
+				"node": ">=20"
+			}
+		},
+		"node_modules/cliui/node_modules/ansi-regex": {
+			"version": "6.2.2",
+			"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz",
+			"integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/ansi-regex?sponsor=1"
+			}
+		},
+		"node_modules/cliui/node_modules/ansi-styles": {
+			"version": "6.2.3",
+			"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.3.tgz",
+			"integrity": "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/ansi-styles?sponsor=1"
+			}
+		},
+		"node_modules/cliui/node_modules/emoji-regex": {
+			"version": "10.5.0",
+			"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz",
+			"integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==",
+			"license": "MIT"
+		},
+		"node_modules/cliui/node_modules/string-width": {
+			"version": "7.2.0",
+			"resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
+			"integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==",
+			"license": "MIT",
+			"dependencies": {
+				"emoji-regex": "^10.3.0",
+				"get-east-asian-width": "^1.0.0",
+				"strip-ansi": "^7.1.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
+		"node_modules/cliui/node_modules/strip-ansi": {
+			"version": "7.1.2",
+			"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz",
+			"integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==",
+			"license": "MIT",
+			"dependencies": {
+				"ansi-regex": "^6.0.1"
+			},
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/strip-ansi?sponsor=1"
+			}
+		},
+		"node_modules/cliui/node_modules/wrap-ansi": {
+			"version": "9.0.2",
+			"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-9.0.2.tgz",
+			"integrity": "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==",
+			"license": "MIT",
+			"dependencies": {
+				"ansi-styles": "^6.2.1",
+				"string-width": "^7.0.0",
+				"strip-ansi": "^7.1.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+			}
+		},
 		"node_modules/clone": {
 			"version": "2.1.2",
 			"resolved": "https://registry.npmjs.org/clone/-/clone-2.1.2.tgz",
@@ -7069,6 +7163,15 @@
 				"@esbuild/win32-x64": "0.25.1"
 			}
 		},
+		"node_modules/escalade": {
+			"version": "3.2.0",
+			"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+			"integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=6"
+			}
+		},
 		"node_modules/escape-string-regexp": {
 			"version": "4.0.0",
 			"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -7781,6 +7884,27 @@
 			"resolved": "https://registry.npmjs.org/gc-hook/-/gc-hook-0.3.1.tgz",
 			"integrity": "sha512-E5M+O/h2o7eZzGhzRZGex6hbB3k4NWqO0eA+OzLRLXxhdbYPajZnynPwAtphnh+cRHPwsj5Z80dqZlfI4eK55A=="
 		},
+		"node_modules/get-caller-file": {
+			"version": "2.0.5",
+			"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+			"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+			"license": "ISC",
+			"engines": {
+				"node": "6.* || 8.* || >= 10.*"
+			}
+		},
+		"node_modules/get-east-asian-width": {
+			"version": "1.4.0",
+			"resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.4.0.tgz",
+			"integrity": "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
 		"node_modules/get-func-name": {
 			"version": "2.0.2",
 			"resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz",
@@ -8794,6 +8918,12 @@
 			"integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
 			"dev": true
 		},
+		"node_modules/json-stringify-pretty-compact": {
+			"version": "4.0.0",
+			"resolved": "https://registry.npmjs.org/json-stringify-pretty-compact/-/json-stringify-pretty-compact-4.0.0.tgz",
+			"integrity": "sha512-3CNZ2DnrpByG9Nqj6Xo8vqbjT4F6N+tb4Gb28ESAZjYZ5yqvmc56J+/kuIwkaAMOyblTQhUW7PxMkUb8Q36N3Q==",
+			"license": "MIT"
+		},
 		"node_modules/json-stringify-safe": {
 			"version": "5.0.1",
 			"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
@@ -13281,6 +13411,35 @@
 				"vega-util": "^2.1.0"
 			}
 		},
+		"node_modules/vega-lite": {
+			"version": "6.4.1",
+			"resolved": "https://registry.npmjs.org/vega-lite/-/vega-lite-6.4.1.tgz",
+			"integrity": "sha512-KO3ybHNouRK4A0al/+2fN9UqgTEfxrd/ntGLY933Hg5UOYotDVQdshR3zn7OfXwQ7uj0W96Vfa5R+QxO8am3IQ==",
+			"license": "BSD-3-Clause",
+			"dependencies": {
+				"json-stringify-pretty-compact": "~4.0.0",
+				"tslib": "~2.8.1",
+				"vega-event-selector": "~4.0.0",
+				"vega-expression": "~6.1.0",
+				"vega-util": "~2.1.0",
+				"yargs": "~18.0.0"
+			},
+			"bin": {
+				"vl2pdf": "bin/vl2pdf",
+				"vl2png": "bin/vl2png",
+				"vl2svg": "bin/vl2svg",
+				"vl2vg": "bin/vl2vg"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://app.hubspot.com/payments/GyPC972GD9Rt"
+			},
+			"peerDependencies": {
+				"vega": "^6.0.0"
+			}
+		},
 		"node_modules/vega-loader": {
 			"version": "5.1.0",
 			"resolved": "https://registry.npmjs.org/vega-loader/-/vega-loader-5.1.0.tgz",
@@ -14642,6 +14801,15 @@
 				"yjs": "^13.0.0"
 			}
 		},
+		"node_modules/y18n": {
+			"version": "5.0.8",
+			"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+			"integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+			"license": "ISC",
+			"engines": {
+				"node": ">=10"
+			}
+		},
 		"node_modules/yallist": {
 			"version": "5.0.0",
 			"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
@@ -14663,6 +14831,82 @@
 				"node": ">= 14"
 			}
 		},
+		"node_modules/yargs": {
+			"version": "18.0.0",
+			"resolved": "https://registry.npmjs.org/yargs/-/yargs-18.0.0.tgz",
+			"integrity": "sha512-4UEqdc2RYGHZc7Doyqkrqiln3p9X2DZVxaGbwhn2pi7MrRagKaOcIKe8L3OxYcbhXLgLFUS3zAYuQjKBQgmuNg==",
+			"license": "MIT",
+			"dependencies": {
+				"cliui": "^9.0.1",
+				"escalade": "^3.1.1",
+				"get-caller-file": "^2.0.5",
+				"string-width": "^7.2.0",
+				"y18n": "^5.0.5",
+				"yargs-parser": "^22.0.0"
+			},
+			"engines": {
+				"node": "^20.19.0 || ^22.12.0 || >=23"
+			}
+		},
+		"node_modules/yargs-parser": {
+			"version": "22.0.0",
+			"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-22.0.0.tgz",
+			"integrity": "sha512-rwu/ClNdSMpkSrUb+d6BRsSkLUq1fmfsY6TOpYzTwvwkg1/NRG85KBy3kq++A8LKQwX6lsu+aWad+2khvuXrqw==",
+			"license": "ISC",
+			"engines": {
+				"node": "^20.19.0 || ^22.12.0 || >=23"
+			}
+		},
+		"node_modules/yargs/node_modules/ansi-regex": {
+			"version": "6.2.2",
+			"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.2.tgz",
+			"integrity": "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==",
+			"license": "MIT",
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/ansi-regex?sponsor=1"
+			}
+		},
+		"node_modules/yargs/node_modules/emoji-regex": {
+			"version": "10.5.0",
+			"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.5.0.tgz",
+			"integrity": "sha512-lb49vf1Xzfx080OKA0o6l8DQQpV+6Vg95zyCJX9VB/BqKYlhG7N4wgROUUHRA+ZPUefLnteQOad7z1kT2bV7bg==",
+			"license": "MIT"
+		},
+		"node_modules/yargs/node_modules/string-width": {
+			"version": "7.2.0",
+			"resolved": "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz",
+			"integrity": "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==",
+			"license": "MIT",
+			"dependencies": {
+				"emoji-regex": "^10.3.0",
+				"get-east-asian-width": "^1.0.0",
+				"strip-ansi": "^7.1.0"
+			},
+			"engines": {
+				"node": ">=18"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/sindresorhus"
+			}
+		},
+		"node_modules/yargs/node_modules/strip-ansi": {
+			"version": "7.1.2",
+			"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.2.tgz",
+			"integrity": "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==",
+			"license": "MIT",
+			"dependencies": {
+				"ansi-regex": "^6.0.1"
+			},
+			"engines": {
+				"node": ">=12"
+			},
+			"funding": {
+				"url": "https://github.com/chalk/strip-ansi?sponsor=1"
+			}
+		},
 		"node_modules/yauzl": {
 			"version": "2.10.0",
 			"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",