Переглянути джерело

ESQL: Use a must boolean statement when pushing down to Lucene when scoring is also needed (#124001)

* Use a "must" instead of "filter" when building the pushed down filter
AND when scoring is needed
Andrei Stefan 7 місяців тому
батько
коміт
ecb3d21b29

+ 7 - 0
docs/changelog/124001.yaml

@@ -0,0 +1,7 @@
+pr: 124001
+summary: Use a must boolean statement when pushing down to Lucene when scoring is
+  also needed
+area: ES|QL
+type: bug
+issues:
+ - 123967

+ 1 - 1
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java

@@ -29,7 +29,7 @@ import java.util.Objects;
 import static org.elasticsearch.core.Tuple.tuple;
 
 public class MetadataAttribute extends TypedAttribute {
-    public static final String TIMESTAMP_FIELD = "@timestamp";
+    public static final String TIMESTAMP_FIELD = "@timestamp"; // this is not a true metadata attribute
     public static final String TSID_FIELD = "_tsid";
     public static final String SCORE = "_score";
     public static final String INDEX = "_index";

+ 117 - 0
x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java

@@ -11,12 +11,16 @@ import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.support.WriteRequest;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.xpack.esql.VerificationException;
 import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
 import org.junit.Before;
 
 import java.util.List;
 
+import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
+import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.CoreMatchers.containsString;
 
@@ -120,6 +124,119 @@ public class MatchOperatorIT extends AbstractEsqlIntegTestCase {
         }
     }
 
+    /**
+     * Test for https://github.com/elastic/elasticsearch/issues/123967
+     */
+    public void testWhereMatchWithScoring_AndRequestFilter() {
+        var query = """
+            FROM test METADATA _score
+            | WHERE content:"fox"
+            | SORT _score DESC
+            | KEEP content, _score
+            """;
+
+        QueryBuilder filter = boolQuery().must(matchQuery("content", "brown"));
+
+        try (var resp = run(query, randomPragmas(), filter)) {
+            assertColumnNames(resp.columns(), List.of("content", "_score"));
+            assertColumnTypes(resp.columns(), List.of("text", "double"));
+            assertValues(
+                resp.values(),
+                List.of(
+                    List.of("This is a brown fox", 1.4274532794952393),
+                    List.of("The quick brown fox jumps over the lazy dog", 1.1248724460601807)
+                )
+            );
+        }
+    }
+
+    public void testWhereMatchWithScoring_AndNoScoreRequestFilter() {
+        var query = """
+            FROM test METADATA _score
+            | WHERE content:"fox"
+            | SORT _score DESC
+            | KEEP content, _score
+            """;
+
+        QueryBuilder filter = boolQuery().filter(matchQuery("content", "brown"));
+
+        try (var resp = run(query, randomPragmas(), filter)) {
+            assertColumnNames(resp.columns(), List.of("content", "_score"));
+            assertColumnTypes(resp.columns(), List.of("text", "double"));
+            assertValues(
+                resp.values(),
+                List.of(
+                    List.of("This is a brown fox", 1.156558871269226),
+                    List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
+                )
+            );
+        }
+    }
+
+    public void testWhereMatchWithScoring_And_MatchAllRequestFilter() {
+        var query = """
+            FROM test METADATA _score
+            | WHERE content:"fox"
+            | SORT _score DESC
+            | KEEP content, _score
+            """;
+
+        QueryBuilder filter = QueryBuilders.matchAllQuery();
+
+        try (var resp = run(query, randomPragmas(), filter)) {
+            assertColumnNames(resp.columns(), List.of("content", "_score"));
+            assertColumnTypes(resp.columns(), List.of("text", "double"));
+            assertValues(
+                resp.values(),
+                List.of(
+                    List.of("This is a brown fox", 2.1565589904785156),
+                    List.of("The quick brown fox jumps over the lazy dog", 1.9114001989364624)
+                )
+            );
+        }
+    }
+
+    public void testScoringOutsideQuery() {
+        var query = """
+            FROM test METADATA _score
+            | SORT _score DESC
+            | KEEP content, _score
+            """;
+
+        QueryBuilder filter = boolQuery().must(matchQuery("content", "fox"));
+
+        try (var resp = run(query, randomPragmas(), filter)) {
+            assertColumnNames(resp.columns(), List.of("content", "_score"));
+            assertColumnTypes(resp.columns(), List.of("text", "double"));
+            assertValues(
+                resp.values(),
+                List.of(
+                    List.of("This is a brown fox", 1.156558871269226),
+                    List.of("The quick brown fox jumps over the lazy dog", 0.9114001989364624)
+                )
+            );
+        }
+    }
+
+    public void testScoring_Zero_OutsideQuery() {
+        var query = """
+            FROM test METADATA _score
+            | SORT _score DESC
+            | KEEP content, _score
+            """;
+
+        QueryBuilder filter = boolQuery().filter(matchQuery("content", "fox"));
+
+        try (var resp = run(query, randomPragmas(), filter)) {
+            assertColumnNames(resp.columns(), List.of("content", "_score"));
+            assertColumnTypes(resp.columns(), List.of("text", "double"));
+            assertValues(
+                resp.values(),
+                List.of(List.of("This is a brown fox", 0.0), List.of("The quick brown fox jumps over the lazy dog", 0.0))
+            );
+        }
+    }
+
     public void testWhereMatchWithScoringDifferentSort() {
         var query = """
             FROM test

+ 2 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java

@@ -101,7 +101,8 @@ public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOpt
         if (newPushable.size() > 0) { // update the executable with pushable conditions
             Query queryDSL = TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(newPushable));
             QueryBuilder planQuery = queryDSL.asBuilder();
-            var query = Queries.combine(Queries.Clause.FILTER, asList(queryExec.query(), planQuery));
+            Queries.Clause combiningQueryClauseType = queryExec.hasScoring() ? Queries.Clause.MUST : Queries.Clause.FILTER;
+            var query = Queries.combine(combiningQueryClauseType, asList(queryExec.query(), planQuery));
             queryExec = new EsQueryExec(
                 queryExec.source(),
                 queryExec.indexPattern(),

+ 21 - 12
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java

@@ -32,27 +32,36 @@ public class ReplaceSourceAttributes extends PhysicalOptimizerRules.OptimizerRul
         var docId = new FieldAttribute(plan.source(), EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD);
         final List<Attribute> attributes = new ArrayList<>();
         attributes.add(docId);
-        if (plan.indexMode() == IndexMode.TIME_SERIES) {
-            Attribute tsid = null, timestamp = null;
-            for (Attribute attr : plan.output()) {
-                String name = attr.name();
-                if (name.equals(MetadataAttribute.TSID_FIELD)) {
+
+        var outputIterator = plan.output().iterator();
+        var isTimeSeries = plan.indexMode() == IndexMode.TIME_SERIES;
+        var keepIterating = true;
+        Attribute tsid = null, timestamp = null, score = null;
+
+        while (keepIterating && outputIterator.hasNext()) {
+            Attribute attr = outputIterator.next();
+            if (attr instanceof MetadataAttribute ma) {
+                if (ma.name().equals(MetadataAttribute.SCORE)) {
+                    score = attr;
+                } else if (isTimeSeries && ma.name().equals(MetadataAttribute.TSID_FIELD)) {
                     tsid = attr;
-                } else if (name.equals(MetadataAttribute.TIMESTAMP_FIELD)) {
-                    timestamp = attr;
                 }
+            } else if (attr.name().equals(MetadataAttribute.TIMESTAMP_FIELD)) {
+                timestamp = attr;
             }
+            keepIterating = score == null || (isTimeSeries && (tsid == null || timestamp == null));
+        }
+        if (isTimeSeries) {
             if (tsid == null || timestamp == null) {
                 throw new IllegalStateException("_tsid or @timestamp are missing from the time-series source");
             }
             attributes.add(tsid);
             attributes.add(timestamp);
         }
-        plan.output().forEach(attr -> {
-            if (attr instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)) {
-                attributes.add(ma);
-            }
-        });
+        if (score != null) {
+            attributes.add(score);
+        }
+
         return new EsQueryExec(plan.source(), plan.indexPattern(), plan.indexMode(), plan.indexNameWithModes(), attributes, plan.query());
     }
 }

+ 10 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java

@@ -21,6 +21,7 @@ import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.NodeUtils;
 import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -204,6 +205,15 @@ public class EsQueryExec extends LeafExec implements EstimatesRowSize {
         return DOC_ID_FIELD.getName().equals(attr.name());
     }
 
+    public boolean hasScoring() {
+        for (Attribute a : attrs()) {
+            if (a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     @Override
     protected NodeInfo<EsQueryExec> info() {
         return NodeInfo.create(

+ 0 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java

@@ -37,7 +37,6 @@ public class EsStatsQueryExec extends LeafExec implements EstimatesRowSize {
     }
 
     public record Stat(String name, StatsType type, QueryBuilder query) {
-
         public QueryBuilder filter(QueryBuilder sourceQuery) {
             return query == null ? sourceQuery : Queries.combine(Queries.Clause.FILTER, asList(sourceQuery, query));
         }

+ 1 - 4
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

@@ -49,7 +49,6 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
 import org.elasticsearch.xpack.esql.core.expression.FoldContext;
-import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.core.type.KeywordEsField;
 import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
@@ -186,9 +185,7 @@ public class EsPhysicalOperationProviders extends AbstractPhysicalOperationProvi
         assert esQueryExec.estimatedRowSize() != null : "estimated row size not initialized";
         int rowEstimatedSize = esQueryExec.estimatedRowSize();
         int limit = esQueryExec.limit() != null ? (Integer) esQueryExec.limit().fold(context.foldCtx()) : NO_LIMIT;
-        boolean scoring = esQueryExec.attrs()
-            .stream()
-            .anyMatch(a -> a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE));
+        boolean scoring = esQueryExec.hasScoring();
         if ((sorts != null && sorts.isEmpty() == false)) {
             List<SortBuilder<?>> sortBuilders = new ArrayList<>(sorts.size());
             for (Sort sort : sorts) {