|
@@ -387,6 +387,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|
|
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
|
|
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
|
|
+ "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
|
|
|
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
|
|
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
|
|
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
|
@@ -439,6 +440,7 @@ class WebConfig(BaseModel):
|
|
|
WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None
|
|
|
WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = []
|
|
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
|
|
+ BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None
|
|
|
SEARXNG_QUERY_URL: Optional[str] = None
|
|
|
YACY_QUERY_URL: Optional[str] = None
|
|
|
YACY_USERNAME: Optional[str] = None
|
|
@@ -751,6 +753,9 @@ async def update_rag_config(
|
|
|
request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
|
|
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
|
|
)
|
|
|
+ request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = (
|
|
|
+ form_data.web.BYPASS_WEB_SEARCH_WEB_LOADER
|
|
|
+ )
|
|
|
request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL
|
|
|
request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL
|
|
|
request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME
|
|
@@ -875,6 +880,7 @@ async def update_rag_config(
|
|
|
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
|
|
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
|
|
+ "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
|
|
|
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
|
|
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
|
|
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
|
@@ -1678,13 +1684,29 @@ async def process_web_search(
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
- loader = get_web_loader(
|
|
|
- urls,
|
|
|
- verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
|
|
|
- requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
- trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
|
|
|
- )
|
|
|
- docs = await loader.aload()
|
|
|
+ if request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER:
|
|
|
+ docs = [
|
|
|
+ Document(
|
|
|
+ page_content=result.snippet,
|
|
|
+ metadata={
|
|
|
+ "source": result.link,
|
|
|
+ "title": result.title,
|
|
|
+ "snippet": result.snippet,
|
|
|
+ "link": result.link,
|
|
|
+ },
|
|
|
+ )
|
|
|
+ for result in search_results
|
|
|
+ if hasattr(result, "snippet")
|
|
|
+ ]
|
|
|
+ else:
|
|
|
+ loader = get_web_loader(
|
|
|
+ urls,
|
|
|
+ verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
|
|
|
+ requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
+ trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
|
|
|
+ )
|
|
|
+ docs = await loader.aload()
|
|
|
+
|
|
|
urls = [
|
|
|
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
|
|
|
] # only keep the urls returned by the loader
|