Browse Source

Merge pull request #15250 from dlamoris/dev

fix: opensearch race condition, use keyword search instead of full text search for filter query
Tim Jaeryang Baek 3 days ago
parent
commit
baa5920b7a
1 changed files with 6 additions and 3 deletions
  1. 6 3
      backend/open_webui/retrieval/vector/dbs/opensearch.py

+ 6 - 3
backend/open_webui/retrieval/vector/dbs/opensearch.py

@@ -157,10 +157,10 @@ class OpenSearchClient(VectorDBBase):
 
         for field, value in filter.items():
             query_body["query"]["bool"]["filter"].append(
-                {"match": {"metadata." + str(field): value}}
+                {"term": {"metadata." + str(field) + ".keyword": value}}
             )
 
-        size = limit if limit else 10
+        size = limit if limit else 10000
 
         try:
             result = self.client.search(
@@ -206,6 +206,7 @@ class OpenSearchClient(VectorDBBase):
                 for item in batch
             ]
             bulk(self.client, actions)
+        self.client.indices.refresh(self._get_index_name(collection_name))
 
     def upsert(self, collection_name: str, items: list[VectorItem]):
         self._create_index_if_not_exists(
@@ -228,6 +229,7 @@ class OpenSearchClient(VectorDBBase):
                 for item in batch
             ]
             bulk(self.client, actions)
+        self.client.indices.refresh(self._get_index_name(collection_name))
 
     def delete(
         self,
@@ -251,11 +253,12 @@ class OpenSearchClient(VectorDBBase):
             }
             for field, value in filter.items():
                 query_body["query"]["bool"]["filter"].append(
-                    {"match": {"metadata." + str(field): value}}
+                    {"term": {"metadata." + str(field) + ".keyword": value}}
                 )
             self.client.delete_by_query(
                 index=self._get_index_name(collection_name), body=query_body
             )
+        self.client.indices.refresh(self._get_index_name(collection_name))
 
     def reset(self):
         indices = self.client.indices.get(index=f"{self.index_prefix}_*")