Browse Source

only keep URLs as sources for which the content could actually be retrieved

Jan Kessler 3 months ago
parent
commit
a506a1a61e
1 changed files with 1 additions and 0 deletions
  1. 1 0
      backend/open_webui/routers/retrieval.py

+ 1 - 0
backend/open_webui/routers/retrieval.py

@@ -1478,6 +1478,7 @@ async def process_web_search(
             trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
             trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
         )
         )
         docs = await loader.aload()
         docs = await loader.aload()
+        urls = [doc.metadata["source"] for doc in docs] # only keep URLs which could be retrieved
 
 
         if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
         if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
             return {
             return {