浏览代码

ESQL: Fix single value query (#102317)

This fixes the lucene query that we use to push "is this a single valued
field?" into lucene. In particular, it was erroniously turning itself
into a noop any time it saw a field that could have either 0 or 1 value
per document. It's right and good for the query to noop itself only when
a field can only have 1 value per document. This adds logic to check for
that.

Closes #102298
Nik Everett 1 年之前
父节点
当前提交
6eac8df0c2

+ 6 - 0
docs/changelog/102317.yaml

@@ -0,0 +1,6 @@
+pr: 102317
+summary: "ESQL: Fix single value query"
+area: ES|QL
+type: bug
+issues:
+ - 102298

+ 32 - 10
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java

@@ -10,8 +10,10 @@ package org.elasticsearch.xpack.esql.querydsl.query;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
@@ -321,20 +323,30 @@ public class SingleValueQuery extends Query {
              * can't do that because we need the check the number of fields.
              */
             if (lfd instanceof LeafNumericFieldData n) {
-                return scorer(nextScorer, n);
+                return scorer(context, nextScorer, n);
             }
             if (lfd instanceof LeafOrdinalsFieldData o) {
-                return scorer(nextScorer, o);
+                return scorer(context, nextScorer, o);
             }
             return scorer(nextScorer, lfd);
         }
 
-        private Scorer scorer(Scorer nextScorer, LeafNumericFieldData lfd) {
+        private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafNumericFieldData lfd) throws IOException {
             SortedNumericDocValues sortedNumerics = lfd.getLongValues();
             if (DocValues.unwrapSingleton(sortedNumerics) != null) {
-                // Segment contains only single valued fields.
-                stats.numericSingle++;
-                return nextScorer;
+                /*
+                 * Segment contains only single valued fields. But it's possible
+                 * that some fields have 0 values. The most surefire way to check
+                 * is to look at the index for the data. If there isn't an index
+                 * this isn't going to work - but if there is we can compare the
+                 * number of documents in the index to the number of values in it -
+                 * if they are the same we've got a dense singleton.
+                 */
+                PointValues points = context.reader().getPointValues(fieldData.getFieldName());
+                if (points != null && points.getDocCount() == context.reader().maxDoc()) {
+                    stats.numericSingle++;
+                    return nextScorer;
+                }
             }
             TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator();
             if (nextIterator == null) {
@@ -353,12 +365,22 @@ public class SingleValueQuery extends Query {
             );
         }
 
-        private Scorer scorer(Scorer nextScorer, LeafOrdinalsFieldData lfd) {
+        private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafOrdinalsFieldData lfd) throws IOException {
             SortedSetDocValues sortedSet = lfd.getOrdinalsValues();
             if (DocValues.unwrapSingleton(sortedSet) != null) {
-                // Segment contains only single valued fields.
-                stats.ordinalsSingle++;
-                return nextScorer;
+                /*
+                 * Segment contains only single valued fields. But it's possible
+                 * that some fields have 0 values. The most surefire way to check
+                 * is to look at the index for the data. If there isn't an index
+                 * this isn't going to work - but if there is we can compare the
+                 * number of documents in the index to the number of values in it -
+                 * if they are the same we've got a dense singleton.
+                 */
+                Terms terms = context.reader().terms(fieldData.getFieldName());
+                if (terms != null && terms.getDocCount() == context.reader().maxDoc()) {
+                    stats.ordinalsSingle++;
+                    return nextScorer;
+                }
             }
             TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator();
             if (nextIterator == null) {

+ 28 - 20
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java

@@ -57,8 +57,11 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
     public static List<Object[]> params() {
         List<Object[]> params = new ArrayList<>();
         for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
-            params.add(new Object[] { new StandardSetup(fieldType, false) });
-            params.add(new Object[] { new StandardSetup(fieldType, true) });
+            for (boolean multivaluedField : new boolean[] { true, false }) {
+                for (boolean allowEmpty : new boolean[] { true, false }) {
+                    params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
+                }
+            }
         }
         params.add(new Object[] { new FieldMissingSetup() });
         return params;
@@ -196,7 +199,7 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
         }
     }
 
-    private record StandardSetup(String fieldType, boolean multivaluedField) implements Setup {
+    private record StandardSetup(String fieldType, boolean multivaluedField, boolean empty, int count) implements Setup {
         @Override
         public XContentBuilder mapping(XContentBuilder builder) throws IOException {
             builder.startObject("i").field("type", "long").endObject();
@@ -207,27 +210,32 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
         @Override
         public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
             List<List<Object>> fieldValues = new ArrayList<>(100);
-            for (int i = 0; i < 100; i++) {
-                // i == 10 forces at least one multivalued field when we're configured for multivalued fields
-                boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean());
-                List<Object> values;
-                if (makeMultivalued) {
-                    int count = between(2, 10);
-                    Set<Object> set = new HashSet<>(count);
-                    while (set.size() < count) {
-                        set.add(randomValue());
-                    }
-                    values = List.copyOf(set);
-                } else {
-                    values = List.of(randomValue());
-                }
+            for (int i = 0; i < count; i++) {
+                List<Object> values = values(i);
                 fieldValues.add(values);
                 iw.addDocument(docFor(i, values));
             }
-
             return fieldValues;
         }
 
+        private List<Object> values(int i) {
+            // i == 10 forces at least one multivalued field when we're configured for multivalued fields
+            boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean());
+            if (makeMultivalued) {
+                int count = between(2, 10);
+                Set<Object> set = new HashSet<>(count);
+                while (set.size() < count) {
+                    set.add(randomValue());
+                }
+                return List.copyOf(set);
+            }
+            // i == 0 forces at least one empty field when we're configured for empty fields
+            if (empty && (i == 0 || randomBoolean())) {
+                return List.of();
+            }
+            return List.of(randomValue());
+        }
+
         private Object randomValue() {
             return switch (fieldType) {
                 case "long" -> randomLong();
@@ -279,7 +287,7 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
                     assertThat(builder.stats().bytesApprox(), equalTo(0));
                     assertThat(builder.stats().bytesNoApprox(), equalTo(0));
 
-                    if (multivaluedField) {
+                    if (multivaluedField || empty) {
                         assertThat(builder.stats().numericSingle(), greaterThanOrEqualTo(0));
                         if (subHasTwoPhase) {
                             assertThat(builder.stats().numericMultiNoApprox(), equalTo(0));
@@ -300,7 +308,7 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
                     assertThat(builder.stats().numericMultiApprox(), equalTo(0));
                     assertThat(builder.stats().bytesApprox(), equalTo(0));
                     assertThat(builder.stats().bytesNoApprox(), equalTo(0));
-                    if (multivaluedField) {
+                    if (multivaluedField || empty) {
                         assertThat(builder.stats().ordinalsSingle(), greaterThanOrEqualTo(0));
                         if (subHasTwoPhase) {
                             assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0));