浏览代码

Knn vector rescoring to sort score docs (#122653)

RescoreKnnVectorQuery rewrites to KnnScoreDocQuery, which takes a sorted array of
doc ids and corresponding array including scores fo such docs. A binary search is
performed on top of the docs array, and such global ids are converted back to
segment level ids (subtracting the context docbase) when scoring docs.

RescoreKnnVectoryQuery did not sort the array of docs which caused binary search
to return non deterministic results, which in turn made us look up wrong docs,
something using out of bound ids. One symptom of this was observed in a DFSProfilerIT
test failure which triggered a Lucene assertion around doc id being outside of the
range of the bitset of live docs.

The fix is to simply sort the score docs array before extracting docs ids and scores
and providing them to KnnScoreDocQuery upon rewrite.

Relates to #116663

Closes #119711
Luca Cavanna 8 月之前
父节点
当前提交
9adb91d4fe

+ 6 - 0
docs/changelog/122653.yaml

@@ -0,0 +1,6 @@
+pr: 122653
+summary: Knn vector rescoring to sort score docs
+area: Vector Search
+type: bug
+issues:
+ - 119711

+ 0 - 3
muted-tests.yml

@@ -150,9 +150,6 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/117740
   issue: https://github.com/elastic/elasticsearch/issues/117740
 - class: org.elasticsearch.xpack.security.authc.ldap.MultiGroupMappingIT
 - class: org.elasticsearch.xpack.security.authc.ldap.MultiGroupMappingIT
   issue: https://github.com/elastic/elasticsearch/issues/119599
   issue: https://github.com/elastic/elasticsearch/issues/119599
-- class: org.elasticsearch.search.profile.dfs.DfsProfilerIT
-  method: testProfileDfs
-  issue: https://github.com/elastic/elasticsearch/issues/119711
 - class: org.elasticsearch.multi_cluster.MultiClusterYamlTestSuiteIT
 - class: org.elasticsearch.multi_cluster.MultiClusterYamlTestSuiteIT
   issue: https://github.com/elastic/elasticsearch/issues/119983
   issue: https://github.com/elastic/elasticsearch/issues/119983
 - class: org.elasticsearch.xpack.test.rest.XPackRestIT
 - class: org.elasticsearch.xpack.test.rest.XPackRestIT

+ 2 - 0
server/src/main/java/org/elasticsearch/search/vectors/RescoreKnnVectorQuery.java

@@ -23,6 +23,7 @@ import org.elasticsearch.search.profile.query.QueryProfiler;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Arrays;
+import java.util.Comparator;
 import java.util.Objects;
 import java.util.Objects;
 
 
 /**
 /**
@@ -60,6 +61,7 @@ public class RescoreKnnVectorQuery extends Query implements QueryProfilerProvide
         TopDocs topDocs = searcher.search(query, k);
         TopDocs topDocs = searcher.search(query, k);
         vectorOperations = topDocs.totalHits.value();
         vectorOperations = topDocs.totalHits.value();
         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+        Arrays.sort(scoreDocs, Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
         int[] docIds = new int[scoreDocs.length];
         int[] docIds = new int[scoreDocs.length];
         float[] scores = new float[scoreDocs.length];
         float[] scores = new float[scoreDocs.length];
         for (int i = 0; i < scoreDocs.length; i++) {
         for (int i = 0; i < scoreDocs.length; i++) {