Browse Source

rename BM25_WEIGHT -> HYBRID_BM25_WEIGHT

Jan Kessler 1 month ago
parent
commit
e70dd33233

+ 4 - 4
backend/open_webui/config.py

@@ -1928,10 +1928,10 @@ RAG_RELEVANCE_THRESHOLD = PersistentConfig(
     "rag.relevance_threshold",
     float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")),
 )
-RAG_BM25_WEIGHT = PersistentConfig(
-    "RAG_BM25_WEIGHT",
-    "rag.bm25_weight",
-    float(os.environ.get("RAG_BM25_WEIGHT", "0.5")),
+RAG_HYBRID_BM25_WEIGHT = PersistentConfig(
+    "RAG_HYBRID_BM25_WEIGHT",
+    "rag.hybrid_bm25_weight",
+    float(os.environ.get("RAG_HYBRID_BM25_WEIGHT", "0.5")),
 )
 
 ENABLE_RAG_HYBRID_SEARCH = PersistentConfig(

+ 2 - 2
backend/open_webui/main.py

@@ -199,7 +199,7 @@ from open_webui.config import (
     RAG_TOP_K,
     RAG_TOP_K_RERANKER,
     RAG_RELEVANCE_THRESHOLD,
-    RAG_BM25_WEIGHT,
+    RAG_HYBRID_BM25_WEIGHT,
     RAG_ALLOWED_FILE_EXTENSIONS,
     RAG_FILE_MAX_COUNT,
     RAG_FILE_MAX_SIZE,
@@ -647,7 +647,7 @@ app.state.FUNCTIONS = {}
 app.state.config.TOP_K = RAG_TOP_K
 app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER
 app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
-app.state.config.BM25_WEIGHT = RAG_BM25_WEIGHT
+app.state.config.HYBRID_BM25_WEIGHT = RAG_HYBRID_BM25_WEIGHT
 app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS
 app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE
 app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT

+ 8 - 8
backend/open_webui/retrieval/utils.py

@@ -116,7 +116,7 @@ def query_doc_with_hybrid_search(
     reranking_function,
     k_reranker: int,
     r: float,
-    bm25_weight: float,
+    hybrid_bm25_weight: float,
 ) -> dict:
     try:
         log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
@@ -132,18 +132,18 @@ def query_doc_with_hybrid_search(
             top_k=k,
         )
 
-        if bm25_weight <= 0:
+        if hybrid_bm25_weight <= 0:
             ensemble_retriever = EnsembleRetriever(
                 retrievers=[vector_search_retriever], weights=[1.]
             )
-        elif bm25_weight >= 1:
+        elif hybrid_bm25_weight >= 1:
             ensemble_retriever = EnsembleRetriever(
                 retrievers=[bm25_retriever], weights=[1.]
             )
         else:
             ensemble_retriever = EnsembleRetriever(
                 retrievers=[bm25_retriever, vector_search_retriever],
-                weights=[bm25_weight, 1. - bm25_weight]
+                weights=[hybrid_bm25_weight, 1. - hybrid_bm25_weight]
             )
 
         compressor = RerankCompressor(
@@ -325,7 +325,7 @@ def query_collection_with_hybrid_search(
     reranking_function,
     k_reranker: int,
     r: float,
-    bm25_weight: float,
+    hybrid_bm25_weight: float,
 ) -> dict:
     results = []
     error = False
@@ -359,7 +359,7 @@ def query_collection_with_hybrid_search(
                 reranking_function=reranking_function,
                 k_reranker=k_reranker,
                 r=r,
-                bm25_weight=bm25_weight,
+                hybrid_bm25_weight=hybrid_bm25_weight,
             )
             return result, None
         except Exception as e:
@@ -447,7 +447,7 @@ def get_sources_from_files(
     reranking_function,
     k_reranker,
     r,
-    bm25_weight,
+    hybrid_bm25_weight,
     hybrid_search,
     full_context=False,
 ):
@@ -565,7 +565,7 @@ def get_sources_from_files(
                                     reranking_function=reranking_function,
                                     k_reranker=k_reranker,
                                     r=r,
-                                    bm25_weight=bm25_weight,
+                                    hybrid_bm25_weight=hybrid_bm25_weight,
                                 )
                             except Exception as e:
                                 log.debug(

+ 15 - 15
backend/open_webui/routers/retrieval.py

@@ -349,7 +349,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
         "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
         "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
         "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
-        "BM25_WEIGHT": request.app.state.config.BM25_WEIGHT,
+        "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
         # Content extraction settings
         "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
         "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@@ -493,7 +493,7 @@ class ConfigForm(BaseModel):
     ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None
     TOP_K_RERANKER: Optional[int] = None
     RELEVANCE_THRESHOLD: Optional[float] = None
-    BM25_WEIGHT: Optional[float] = None
+    HYBRID_BM25_WEIGHT: Optional[float] = None
 
     # Content extraction settings
     CONTENT_EXTRACTION_ENGINE: Optional[str] = None
@@ -580,10 +580,10 @@ async def update_rag_config(
         if form_data.RELEVANCE_THRESHOLD is not None
         else request.app.state.config.RELEVANCE_THRESHOLD
     )
-    request.app.state.config.BM25_WEIGHT = (
-        form_data.BM25_WEIGHT
-        if form_data.BM25_WEIGHT is not None
-        else request.app.state.config.BM25_WEIGHT
+    request.app.state.config.HYBRID_BM25_WEIGHT = (
+        form_data.HYBRID_BM25_WEIGHT
+        if form_data.HYBRID_BM25_WEIGHT is not None
+        else request.app.state.config.HYBRID_BM25_WEIGHT
     )
 
     # Content extraction settings
@@ -844,7 +844,7 @@ async def update_rag_config(
         "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
         "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
         "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
-        "BM25_WEIGHT": request.app.state.config.BM25_WEIGHT,
+        "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
         # Content extraction settings
         "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
         "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@@ -1782,10 +1782,10 @@ def query_doc_handler(
                     if form_data.r
                     else request.app.state.config.RELEVANCE_THRESHOLD
                 ),
-                bm25_weight=(
-                    form_data.bm25_weight
-                    if form_data.bm25_weight
-                    else request.app.state.config.BM25_WEIGHT
+                hybrid_bm25_weight=(
+                    form_data.hybrid_bm25_weight
+                    if form_data.hybrid_bm25_weight
+                    else request.app.state.config.HYBRID_BM25_WEIGHT
                 ),
                 user=user,
             )
@@ -1838,10 +1838,10 @@ def query_collection_handler(
                     if form_data.r
                     else request.app.state.config.RELEVANCE_THRESHOLD
                 ),
-                bm25_weight=(
-                    form_data.bm25_weight
-                    if form_data.bm25_weight
-                    else request.app.state.config.BM25_WEIGHT
+                hybrid_bm25_weight=(
+                    form_data.hybrid_bm25_weight
+                    if form_data.hybrid_bm25_weight
+                    else request.app.state.config.HYBRID_BM25_WEIGHT
                 ),
             )
         else:

+ 1 - 1
backend/open_webui/utils/middleware.py

@@ -603,7 +603,7 @@ async def chat_completion_files_handler(
                         reranking_function=request.app.state.rf,
                         k_reranker=request.app.state.config.TOP_K_RERANKER,
                         r=request.app.state.config.RELEVANCE_THRESHOLD,
-                        bm25_weight=request.app.state.config.BM25_WEIGHT,
+                        hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT,
                         hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
                         full_context=request.app.state.config.RAG_FULL_CONTEXT,
                     ),

+ 2 - 2
src/lib/components/admin/Settings/Documents.svelte

@@ -773,14 +773,14 @@
 
 							{#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true}
 								<div class="mb-2.5 flex w-full justify-between">
-									<div class="self-center text-xs font-medium">{$i18n.t('BM25 Weight')}</div>
+									<div class="self-center text-xs font-medium">{$i18n.t('Weight of BM25 Retrieval')}</div>
 									<div class="flex items-center relative">
 										<input
 											class="flex-1 w-full text-sm bg-transparent outline-hidden"
 											type="number"
 											step="0.01"
 											placeholder={$i18n.t('Enter BM25 Weight')}
-											bind:value={RAGConfig.BM25_WEIGHT}
+											bind:value={RAGConfig.HYBRID_BM25_WEIGHT}
 											autocomplete="off"
 											min="0.0"
 											max="1.0"