瀏覽代碼

feat: add qdrant indices for metadata fields

All fieldnames which are part of a query should
have an index for performance reasons. This is
even enforced on some qdrant cluster like those
on qdrant.io, and queries using a unindexed column
fail with an error.
guenhter 3 月之前
父節點
當前提交
5c2e0e4beb

+ 19 - 0
backend/open_webui/retrieval/vector/dbs/qdrant.py

@@ -87,6 +87,25 @@ class QdrantClient(VectorDBBase):
             ),
         )
 
+        # Create payload indexes for efficient filtering
+        self.client.create_payload_index(
+            collection_name=collection_name_with_prefix,
+            field_name="metadata.hash",
+            field_schema=models.KeywordIndexParams(
+                type=models.KeywordIndexType.KEYWORD,
+                is_tenant=False,
+                on_disk=self.QDRANT_ON_DISK,
+            ),
+        )
+        self.client.create_payload_index(
+            collection_name=collection_name_with_prefix,
+            field_name="metadata.file_id",
+            field_schema=models.KeywordIndexParams(
+                type=models.KeywordIndexType.KEYWORD,
+                is_tenant=False,
+                on_disk=self.QDRANT_ON_DISK,
+            ),
+        )
         log.info(f"collection {collection_name_with_prefix} successfully created!")
 
     def _create_collection_if_not_exists(self, collection_name, dimension):

+ 19 - 0
backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py

@@ -229,6 +229,25 @@ class QdrantClient(VectorDBBase):
                 ),
                 wait=True,
             )
+            # Create payload indexes for efficient filtering on metadata.hash and metadata.file_id
+            self.client.create_payload_index(
+                collection_name=mt_collection_name,
+                field_name="metadata.hash",
+                field_schema=models.KeywordIndexParams(
+                    type=models.KeywordIndexType.KEYWORD,
+                    is_tenant=False,
+                    on_disk=self.QDRANT_ON_DISK,
+                ),
+            )
+            self.client.create_payload_index(
+                collection_name=mt_collection_name,
+                field_name="metadata.file_id",
+                field_schema=models.KeywordIndexParams(
+                    type=models.KeywordIndexType.KEYWORD,
+                    is_tenant=False,
+                    on_disk=self.QDRANT_ON_DISK,
+                ),
+            )
 
             log.info(
                 f"Multi-tenant collection {mt_collection_name} created with dimension {dimension}!"