Просмотр исходного кода

Address edge case with k < k_reranker, sort results for cutting off

Marko Henning 1 год назад
Родитель
Сommit
c877b59cbc
1 измененных файлов с 13 добавлено и 3 удалено
  1. 13 3
      backend/open_webui/retrieval/utils.py

+ 13 - 3
backend/open_webui/retrieval/utils.py

@@ -139,10 +139,20 @@ def query_doc_with_hybrid_search(
         )
 
         result = compression_retriever.invoke(query)
+
+        distances = [d.metadata.get("score") for d in result]
+        documents = [d.page_content for d in result]
+        metadatas = [d.metadata for d in result]
+
+        # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
+        if k < k_reranker:
+            sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
+            sorted_items = sorted_items[:k]
+            distances, documents, metadatas = map(list, zip(*sorted_items))
         result = {
-            "distances": [[d.metadata.get("score") for d in result]],
-            "documents": [[d.page_content for d in result]],
-            "metadatas": [[d.metadata for d in result]],
+            "distances": [distances],
+            "documents": [documents]
+            "metadatas": [metadatas],
         }
 
         log.info(