|
@@ -1,140 +1,118 @@
|
|
|
-from fastapi import (
|
|
|
- FastAPI,
|
|
|
- Depends,
|
|
|
- HTTPException,
|
|
|
- status,
|
|
|
- UploadFile,
|
|
|
- File,
|
|
|
- Form,
|
|
|
-)
|
|
|
-from fastapi.middleware.cors import CORSMiddleware
|
|
|
-import requests
|
|
|
-import os, shutil, logging, re
|
|
|
+import json
|
|
|
+import logging
|
|
|
+import mimetypes
|
|
|
+import os
|
|
|
+import shutil
|
|
|
+import socket
|
|
|
+import urllib.parse
|
|
|
+import uuid
|
|
|
from datetime import datetime
|
|
|
-
|
|
|
from pathlib import Path
|
|
|
-from typing import Union, Sequence, Iterator, Any
|
|
|
-
|
|
|
-from chromadb.utils.batch_utils import create_batches
|
|
|
-from langchain_core.documents import Document
|
|
|
-
|
|
|
-from langchain_community.document_loaders import (
|
|
|
- WebBaseLoader,
|
|
|
- TextLoader,
|
|
|
- PyPDFLoader,
|
|
|
- CSVLoader,
|
|
|
- BSHTMLLoader,
|
|
|
- Docx2txtLoader,
|
|
|
- UnstructuredEPubLoader,
|
|
|
- UnstructuredWordDocumentLoader,
|
|
|
- UnstructuredMarkdownLoader,
|
|
|
- UnstructuredXMLLoader,
|
|
|
- UnstructuredRSTLoader,
|
|
|
- UnstructuredExcelLoader,
|
|
|
- UnstructuredPowerPointLoader,
|
|
|
- YoutubeLoader,
|
|
|
- OutlookMessageLoader,
|
|
|
-)
|
|
|
-from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
+from typing import Iterator, Optional, Sequence, Union
|
|
|
|
|
|
+import requests
|
|
|
import validators
|
|
|
-import urllib.parse
|
|
|
-import socket
|
|
|
-
|
|
|
-
|
|
|
-from pydantic import BaseModel
|
|
|
-from typing import Optional
|
|
|
-import mimetypes
|
|
|
-import uuid
|
|
|
-import json
|
|
|
-
|
|
|
-from apps.webui.models.documents import (
|
|
|
- Documents,
|
|
|
- DocumentForm,
|
|
|
- DocumentResponse,
|
|
|
-)
|
|
|
-from apps.webui.models.files import (
|
|
|
- Files,
|
|
|
-)
|
|
|
-
|
|
|
-from apps.rag.utils import (
|
|
|
- get_model_path,
|
|
|
+from open_webui.apps.rag.search.brave import search_brave
|
|
|
+from open_webui.apps.rag.search.duckduckgo import search_duckduckgo
|
|
|
+from open_webui.apps.rag.search.google_pse import search_google_pse
|
|
|
+from open_webui.apps.rag.search.jina_search import search_jina
|
|
|
+from open_webui.apps.rag.search.main import SearchResult
|
|
|
+from open_webui.apps.rag.search.searchapi import search_searchapi
|
|
|
+from open_webui.apps.rag.search.searxng import search_searxng
|
|
|
+from open_webui.apps.rag.search.serper import search_serper
|
|
|
+from open_webui.apps.rag.search.serply import search_serply
|
|
|
+from open_webui.apps.rag.search.serpstack import search_serpstack
|
|
|
+from open_webui.apps.rag.search.tavily import search_tavily
|
|
|
+from open_webui.apps.rag.utils import (
|
|
|
get_embedding_function,
|
|
|
- query_doc,
|
|
|
- query_doc_with_hybrid_search,
|
|
|
+ get_model_path,
|
|
|
query_collection,
|
|
|
query_collection_with_hybrid_search,
|
|
|
+ query_doc,
|
|
|
+ query_doc_with_hybrid_search,
|
|
|
)
|
|
|
-
|
|
|
-from apps.rag.search.brave import search_brave
|
|
|
-from apps.rag.search.google_pse import search_google_pse
|
|
|
-from apps.rag.search.main import SearchResult
|
|
|
-from apps.rag.search.searxng import search_searxng
|
|
|
-from apps.rag.search.serper import search_serper
|
|
|
-from apps.rag.search.serpstack import search_serpstack
|
|
|
-from apps.rag.search.serply import search_serply
|
|
|
-from apps.rag.search.duckduckgo import search_duckduckgo
|
|
|
-from apps.rag.search.tavily import search_tavily
|
|
|
-from apps.rag.search.jina_search import search_jina
|
|
|
-
|
|
|
-from utils.misc import (
|
|
|
- calculate_sha256,
|
|
|
- calculate_sha256_string,
|
|
|
- sanitize_filename,
|
|
|
- extract_folders_after_data_docs,
|
|
|
-)
|
|
|
-from utils.utils import get_verified_user, get_admin_user
|
|
|
-
|
|
|
-from config import (
|
|
|
- AppConfig,
|
|
|
- ENV,
|
|
|
- SRC_LOG_LEVELS,
|
|
|
- UPLOAD_DIR,
|
|
|
- DOCS_DIR,
|
|
|
+from open_webui.apps.webui.models.documents import DocumentForm, Documents
|
|
|
+from open_webui.apps.webui.models.files import Files
|
|
|
+from chromadb.utils.batch_utils import create_batches
|
|
|
+from open_webui.config import (
|
|
|
+ BRAVE_SEARCH_API_KEY,
|
|
|
+ CHROMA_CLIENT,
|
|
|
+ CHUNK_OVERLAP,
|
|
|
+ CHUNK_SIZE,
|
|
|
CONTENT_EXTRACTION_ENGINE,
|
|
|
- TIKA_SERVER_URL,
|
|
|
- RAG_TOP_K,
|
|
|
- RAG_RELEVANCE_THRESHOLD,
|
|
|
- RAG_FILE_MAX_SIZE,
|
|
|
- RAG_FILE_MAX_COUNT,
|
|
|
+ CORS_ALLOW_ORIGIN,
|
|
|
+ DEVICE_TYPE,
|
|
|
+ DOCS_DIR,
|
|
|
+ ENABLE_RAG_HYBRID_SEARCH,
|
|
|
+ ENABLE_RAG_LOCAL_WEB_FETCH,
|
|
|
+ ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
|
|
+ ENABLE_RAG_WEB_SEARCH,
|
|
|
+ ENV,
|
|
|
+ GOOGLE_PSE_API_KEY,
|
|
|
+ GOOGLE_PSE_ENGINE_ID,
|
|
|
+ PDF_EXTRACT_IMAGES,
|
|
|
RAG_EMBEDDING_ENGINE,
|
|
|
RAG_EMBEDDING_MODEL,
|
|
|
RAG_EMBEDDING_MODEL_AUTO_UPDATE,
|
|
|
RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
|
|
|
- ENABLE_RAG_HYBRID_SEARCH,
|
|
|
- ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
|
|
+ RAG_EMBEDDING_OPENAI_BATCH_SIZE,
|
|
|
+ RAG_FILE_MAX_COUNT,
|
|
|
+ RAG_FILE_MAX_SIZE,
|
|
|
+ RAG_OPENAI_API_BASE_URL,
|
|
|
+ RAG_OPENAI_API_KEY,
|
|
|
+ RAG_RELEVANCE_THRESHOLD,
|
|
|
RAG_RERANKING_MODEL,
|
|
|
- PDF_EXTRACT_IMAGES,
|
|
|
RAG_RERANKING_MODEL_AUTO_UPDATE,
|
|
|
RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
|
|
|
- RAG_OPENAI_API_BASE_URL,
|
|
|
- RAG_OPENAI_API_KEY,
|
|
|
- DEVICE_TYPE,
|
|
|
- CHROMA_CLIENT,
|
|
|
- CHUNK_SIZE,
|
|
|
- CHUNK_OVERLAP,
|
|
|
RAG_TEMPLATE,
|
|
|
- ENABLE_RAG_LOCAL_WEB_FETCH,
|
|
|
- YOUTUBE_LOADER_LANGUAGE,
|
|
|
- ENABLE_RAG_WEB_SEARCH,
|
|
|
- RAG_WEB_SEARCH_ENGINE,
|
|
|
+ RAG_TOP_K,
|
|
|
+ RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
|
|
+ RAG_WEB_SEARCH_ENGINE,
|
|
|
+ RAG_WEB_SEARCH_RESULT_COUNT,
|
|
|
+ SEARCHAPI_API_KEY,
|
|
|
+ SEARCHAPI_ENGINE,
|
|
|
SEARXNG_QUERY_URL,
|
|
|
- GOOGLE_PSE_API_KEY,
|
|
|
- GOOGLE_PSE_ENGINE_ID,
|
|
|
- BRAVE_SEARCH_API_KEY,
|
|
|
- SERPSTACK_API_KEY,
|
|
|
- SERPSTACK_HTTPS,
|
|
|
SERPER_API_KEY,
|
|
|
SERPLY_API_KEY,
|
|
|
+ SERPSTACK_API_KEY,
|
|
|
+ SERPSTACK_HTTPS,
|
|
|
TAVILY_API_KEY,
|
|
|
- RAG_WEB_SEARCH_RESULT_COUNT,
|
|
|
- RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
- RAG_EMBEDDING_OPENAI_BATCH_SIZE,
|
|
|
- CORS_ALLOW_ORIGIN,
|
|
|
+ TIKA_SERVER_URL,
|
|
|
+ UPLOAD_DIR,
|
|
|
+ YOUTUBE_LOADER_LANGUAGE,
|
|
|
+ AppConfig,
|
|
|
)
|
|
|
-
|
|
|
-from constants import ERROR_MESSAGES
|
|
|
+from open_webui.constants import ERROR_MESSAGES
|
|
|
+from open_webui.env import SRC_LOG_LEVELS
|
|
|
+from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, status
|
|
|
+from fastapi.middleware.cors import CORSMiddleware
|
|
|
+from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
+from langchain_community.document_loaders import (
|
|
|
+ BSHTMLLoader,
|
|
|
+ CSVLoader,
|
|
|
+ Docx2txtLoader,
|
|
|
+ OutlookMessageLoader,
|
|
|
+ PyPDFLoader,
|
|
|
+ TextLoader,
|
|
|
+ UnstructuredEPubLoader,
|
|
|
+ UnstructuredExcelLoader,
|
|
|
+ UnstructuredMarkdownLoader,
|
|
|
+ UnstructuredPowerPointLoader,
|
|
|
+ UnstructuredRSTLoader,
|
|
|
+ UnstructuredXMLLoader,
|
|
|
+ WebBaseLoader,
|
|
|
+ YoutubeLoader,
|
|
|
+)
|
|
|
+from langchain_core.documents import Document
|
|
|
+from pydantic import BaseModel
|
|
|
+from open_webui.utils.misc import (
|
|
|
+ calculate_sha256,
|
|
|
+ calculate_sha256_string,
|
|
|
+ extract_folders_after_data_docs,
|
|
|
+ sanitize_filename,
|
|
|
+)
|
|
|
+from open_webui.utils.utils import get_admin_user, get_verified_user
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
@@ -189,6 +167,8 @@ app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
|
|
|
app.state.config.SERPER_API_KEY = SERPER_API_KEY
|
|
|
app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
|
|
|
app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
|
|
|
+app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
|
|
|
+app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
|
|
|
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
|
|
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
|
|
|
|
@@ -427,6 +407,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
|
|
"serper_api_key": app.state.config.SERPER_API_KEY,
|
|
|
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
|
|
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
|
|
+ "searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
|
|
+ "seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
|
|
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
|
|
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
|
},
|
|
@@ -466,6 +448,8 @@ class WebSearchConfig(BaseModel):
|
|
|
serper_api_key: Optional[str] = None
|
|
|
serply_api_key: Optional[str] = None
|
|
|
tavily_api_key: Optional[str] = None
|
|
|
+ searchapi_api_key: Optional[str] = None
|
|
|
+ searchapi_engine: Optional[str] = None
|
|
|
result_count: Optional[int] = None
|
|
|
concurrent_requests: Optional[int] = None
|
|
|
|
|
@@ -529,6 +513,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|
|
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
|
|
|
app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
|
|
|
app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
|
|
|
+ app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
|
|
|
+ app.state.config.SEARCHAPI_ENGINE = form_data.web.search.searchapi_engine
|
|
|
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
|
|
|
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
|
|
|
form_data.web.search.concurrent_requests
|
|
@@ -566,6 +552,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|
|
"serpstack_https": app.state.config.SERPSTACK_HTTPS,
|
|
|
"serper_api_key": app.state.config.SERPER_API_KEY,
|
|
|
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
|
|
+ "serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
|
|
+ "searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
|
|
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
|
|
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
|
|
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
|
@@ -817,6 +805,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
|
|
- SERPER_API_KEY
|
|
|
- SERPLY_API_KEY
|
|
|
- TAVILY_API_KEY
|
|
|
+ - SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
|
|
|
Args:
|
|
|
query (str): The query to search for
|
|
|
"""
|
|
@@ -904,6 +893,17 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
|
|
)
|
|
|
else:
|
|
|
raise Exception("No TAVILY_API_KEY found in environment variables")
|
|
|
+ elif engine == "searchapi":
|
|
|
+ if app.state.config.SEARCHAPI_API_KEY:
|
|
|
+ return search_searchapi(
|
|
|
+ app.state.config.SEARCHAPI_API_KEY,
|
|
|
+ app.state.config.SEARCHAPI_ENGINE,
|
|
|
+ query,
|
|
|
+ app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
|
|
+ app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ raise Exception("No SEARCHAPI_API_KEY found in environment variables")
|
|
|
elif engine == "jina":
|
|
|
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
|
|
|
else:
|
|
@@ -954,7 +954,6 @@ def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
|
|
def store_data_in_vector_db(
|
|
|
data, collection_name, metadata: Optional[dict] = None, overwrite: bool = False
|
|
|
) -> bool:
|
|
|
-
|
|
|
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
chunk_size=app.state.config.CHUNK_SIZE,
|
|
|
chunk_overlap=app.state.config.CHUNK_OVERLAP,
|
|
@@ -1315,7 +1314,6 @@ def store_text(
|
|
|
form_data: TextRAGForm,
|
|
|
user=Depends(get_verified_user),
|
|
|
):
|
|
|
-
|
|
|
collection_name = form_data.collection_name
|
|
|
if collection_name is None:
|
|
|
collection_name = calculate_sha256_string(form_data.content)
|