Forráskód Böngészése

Merge pull request #13191 from tth37/feat_firecrawl_search_engine

feat: Add Firecrawl search engine
Tim Jaeryang Baek 3 hónapja
szülő
commit
e87f2669fa

+ 49 - 0
backend/open_webui/retrieval/web/firecrawl.py

@@ -0,0 +1,49 @@
+import logging
+from typing import Optional, List
+from urllib.parse import urljoin
+
+import requests
+from open_webui.retrieval.web.main import SearchResult, get_filtered_results
+from open_webui.env import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+
+def search_firecrawl(
+    firecrawl_url: str,
+    firecrawl_api_key: str,
+    query: str,
+    count: int,
+    filter_list: Optional[List[str]] = None,
+) -> List[SearchResult]:
+    try:
+        firecrawl_search_url = urljoin(firecrawl_url, "/v1/search")
+        response = requests.post(
+            firecrawl_search_url,
+            headers={
+                "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
+                "Authorization": f"Bearer {firecrawl_api_key}",
+            },
+            json={
+                "query": query,
+                "limit": count,
+            },
+        )
+        response.raise_for_status()
+        results = response.json().get("data", [])
+        if filter_list:
+            results = get_filtered_results(results, filter_list)
+        results = [
+            SearchResult(
+                link=result.get("url"),
+                title=result.get("title"),
+                snippet=result.get("description"),
+            )
+            for result in results[:count]
+        ]
+        log.info(f"External search results: {results}")
+        return results
+    except Exception as e:
+        log.error(f"Error in External search: {e}")
+        return []

+ 9 - 0
backend/open_webui/routers/retrieval.py

@@ -62,6 +62,7 @@ from open_webui.retrieval.web.bing import search_bing
 from open_webui.retrieval.web.exa import search_exa
 from open_webui.retrieval.web.perplexity import search_perplexity
 from open_webui.retrieval.web.sougou import search_sougou
+from open_webui.retrieval.web.firecrawl import search_firecrawl
 from open_webui.retrieval.web.external import search_external
 
 from open_webui.retrieval.utils import (
@@ -1525,6 +1526,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
             raise Exception(
                 "No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables"
             )
+    elif engine == "firecrawl":
+        return search_firecrawl(
+            request.app.state.config.FIRECRAWL_API_BASE_URL,
+            request.app.state.config.FIRECRAWL_API_KEY,
+            query,
+            request.app.state.config.WEB_SEARCH_RESULT_COUNT,
+            request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
+        )
     elif engine == "external":
         return search_external(
             request.app.state.config.EXTERNAL_WEB_SEARCH_URL,

+ 33 - 1
src/lib/components/admin/Settings/WebSearch.svelte

@@ -32,6 +32,7 @@
 		'exa',
 		'perplexity',
 		'sougou',
+		'firecrawl',
 		'external'
 	];
 	let webLoaderEngines = ['playwright', 'firecrawl', 'tavily', 'external'];
@@ -480,6 +481,37 @@
 									/>
 								</div>
 							</div>
+						{:else if webConfig.WEB_SEARCH_ENGINE === 'firecrawl'}
+							<div class="mb-2.5 flex w-full flex-col">
+								<div>
+									<div class=" self-center text-xs font-medium mb-1">
+										{$i18n.t('Firecrawl API Base URL')}
+									</div>
+
+									<div class="flex w-full">
+										<div class="flex-1">
+											<input
+												class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
+												type="text"
+												placeholder={$i18n.t('Enter Firecrawl API Base URL')}
+												bind:value={webConfig.FIRECRAWL_API_BASE_URL}
+												autocomplete="off"
+											/>
+										</div>
+									</div>
+								</div>
+
+								<div class="mt-2">
+									<div class=" self-center text-xs font-medium mb-1">
+										{$i18n.t('Firecrawl API Key')}
+									</div>
+
+									<SensitiveInput
+										placeholder={$i18n.t('Enter Firecrawl API Key')}
+										bind:value={webConfig.FIRECRAWL_API_KEY}
+									/>
+								</div>
+							</div>
 						{:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
 							<div class="mb-2.5 flex w-full flex-col">
 								<div>
@@ -668,7 +700,7 @@
 								</div>
 							</div>
 						</div>
-					{:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl'}
+					{:else if webConfig.WEB_LOADER_ENGINE === 'firecrawl' && webConfig.WEB_SEARCH_ENGINE !== 'firecrawl'}
 						<div class="mb-2.5 flex w-full flex-col">
 							<div>
 								<div class=" self-center text-xs font-medium mb-1">