Explorar el Código

refac/enh: web search domain allow/block filter

Timothy Jaeryang Baek hace 3 meses
padre
commit
ee10f372a0
Se han modificado 2 ficheros con 21 adiciones y 2 borrados
  1. 1 0
      backend/open_webui/config.py
  2. 20 2
      backend/open_webui/retrieval/web/main.py

+ 1 - 0
backend/open_webui/config.py

@@ -2840,6 +2840,7 @@ WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
         # "wikipedia.com",
         # "wikimedia.org",
         # "wikidata.org",
+        # "!stackoverflow.com",
     ],
 )
 

+ 20 - 2
backend/open_webui/retrieval/web/main.py

@@ -9,14 +9,32 @@ from pydantic import BaseModel
 def get_filtered_results(results, filter_list):
     if not filter_list:
         return results
+
+    # Domains starting without "!" → allowed
+    allow_list = [d for d in filter_list if not d.startswith("!")]
+    # Domains starting with "!" → blocked
+    block_list = [d[1:] for d in filter_list if d.startswith("!")]
+
     filtered_results = []
+
     for result in results:
         url = result.get("url") or result.get("link", "") or result.get("href", "")
         if not validators.url(url):
             continue
+
         domain = urlparse(url).netloc
-        if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
-            filtered_results.append(result)
+
+        # If allow list is non-empty, require domain to match one of them
+        if allow_list:
+            if not any(domain.endswith(allowed) for allowed in allow_list):
+                continue
+
+        # Block list always removes matches
+        if any(domain.endswith(blocked) for blocked in block_list):
+            continue
+
+        filtered_results.append(result)
+
     return filtered_results