Browse Source

fix: FireCrawlLoader

Timothy Jaeryang Baek 1 week ago
parent
commit
09874ab83d

+ 6 - 1
backend/open_webui/retrieval/web/utils.py

@@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
                     mode=self.mode,
                     params=self.params,
                 )
-                yield from loader.lazy_load()
+                for document in loader.lazy_load():
+                    if not document.metadata.get("source"):
+                        document.metadata["source"] = document.metadata.get("sourceURL")
+                    yield document
             except Exception as e:
                 if self.continue_on_failure:
                     log.exception(f"Error loading {url}: {e}")
@@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
                     params=self.params,
                 )
                 async for document in loader.alazy_load():
+                    if not document.metadata.get("source"):
+                        document.metadata["source"] = document.metadata.get("sourceURL")
                     yield document
             except Exception as e:
                 if self.continue_on_failure:

+ 2 - 2
backend/open_webui/routers/retrieval.py

@@ -1536,8 +1536,8 @@ async def process_web_search(
         )
         docs = await loader.aload()
         urls = [
-            doc.metadata["source"] for doc in docs
-        ]  # only keep URLs which could be retrieved
+            doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
+        ]  # only keep URLs
 
         if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
             return {