浏览代码

Pushdown for LIKE (LIST) (#129557)

Improved performance of LIKE (LIST)  by pushing an Automaton to do the evaluation down to Lucine.
Julian Kiryakov 3 月之前
父节点
当前提交
caae426cf7

+ 5 - 0
docs/changelog/129557.yaml

@@ -0,0 +1,5 @@
+pr: 129557
+summary: Pushdown for LIKE (LIST)
+area: ES|QL
+type: enhancement
+issues: []

+ 104 - 0
server/src/main/java/org/elasticsearch/index/query/AutomatonQueryBuilder.java

@@ -0,0 +1,104 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.query;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.automaton.Automaton;
+import org.elasticsearch.TransportVersion;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Objects;
+
+/**
+ * Implements an Automaton query, which matches documents based on a Lucene Automaton.
+ * It does not support serialization or XContent representation.
+ */
+public class AutomatonQueryBuilder extends AbstractQueryBuilder<AutomatonQueryBuilder> implements MultiTermQueryBuilder {
+    private final String fieldName;
+    private final Automaton automaton;
+    private final String description;
+
+    public AutomatonQueryBuilder(String fieldName, Automaton automaton, String description) {
+        if (Strings.isEmpty(fieldName)) {
+            throw new IllegalArgumentException("field name is null or empty");
+        }
+        if (automaton == null) {
+            throw new IllegalArgumentException("automaton cannot be null");
+        }
+        this.fieldName = fieldName;
+        this.automaton = automaton;
+        this.description = description;
+    }
+
+    @Override
+    public String fieldName() {
+        return fieldName;
+    }
+
+    @Override
+    public String getWriteableName() {
+        throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getWriteableName");
+    }
+
+    @Override
+    protected void doWriteTo(StreamOutput out) throws IOException {
+        throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doWriteTo");
+    }
+
+    @Override
+    protected void doXContent(XContentBuilder builder, Params params) throws IOException {
+        throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doXContent");
+    }
+
+    @Override
+    protected Query doToQuery(SearchExecutionContext context) throws IOException {
+        return new AutomatonQueryWithDescription(new Term(fieldName), automaton, description);
+    }
+
+    @Override
+    protected int doHashCode() {
+        return Objects.hash(fieldName, automaton, description);
+    }
+
+    @Override
+    protected boolean doEquals(AutomatonQueryBuilder other) {
+        return Objects.equals(fieldName, other.fieldName)
+            && Objects.equals(automaton, other.automaton)
+            && Objects.equals(description, other.description);
+    }
+
+    @Override
+    public TransportVersion getMinimalSupportedVersion() {
+        throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getMinimalSupportedVersion");
+    }
+
+    static class AutomatonQueryWithDescription extends AutomatonQuery {
+        private final String description;
+
+        AutomatonQueryWithDescription(Term term, Automaton automaton, String description) {
+            super(term, automaton);
+            this.description = description;
+        }
+
+        @Override
+        public String toString(String field) {
+            if (this.field.equals(field)) {
+                return description;
+            }
+            return this.field + ":" + description;
+        }
+    }
+}

+ 66 - 0
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/querydsl/query/AutomatonQuery.java

@@ -0,0 +1,66 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.esql.core.querydsl.query;
+
+import org.apache.lucene.util.automaton.Automaton;
+import org.elasticsearch.index.query.AutomatonQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+
+import java.util.Objects;
+
+/**
+ * Query that matches documents based on a Lucene Automaton.
+ */
+public class AutomatonQuery extends Query {
+
+    private final String field;
+    private final Automaton automaton;
+    private final String automatonDescription;
+
+    public AutomatonQuery(Source source, String field, Automaton automaton, String automatonDescription) {
+        super(source);
+        this.field = field;
+        this.automaton = automaton;
+        this.automatonDescription = automatonDescription;
+    }
+
+    public String field() {
+        return field;
+    }
+
+    @Override
+    protected QueryBuilder asBuilder() {
+        return new AutomatonQueryBuilder(field, automaton, automatonDescription);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(field, automaton, automatonDescription);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
+        }
+
+        if (obj == null || getClass() != obj.getClass()) {
+            return false;
+        }
+
+        AutomatonQuery other = (AutomatonQuery) obj;
+        return Objects.equals(field, other.field)
+            && Objects.equals(automaton, other.automaton)
+            && Objects.equals(automatonDescription, other.automatonDescription);
+    }
+
+    @Override
+    protected String innerToString() {
+        return "AutomatonQuery{" + "field='" + field + '\'' + '}';
+    }
+}

+ 4 - 4
x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

@@ -264,13 +264,13 @@ public class PushQueriesIT extends ESRestTestCase {
             | WHERE test like ("%value*", "abc*")
             """;
         String luceneQuery = switch (type) {
-            case KEYWORD, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
+            case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
             case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
+            case KEYWORD -> "test:LIKE(\"%value*\", \"abc*\"), caseInsensitive=false";
         };
         ComputeSignature dataNodeSignature = switch (type) {
-            case CONSTANT_KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
-            case AUTO, KEYWORD, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD ->
-                ComputeSignature.FILTER_IN_COMPUTE;
+            case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
+            case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
         };
         testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
     }

+ 10 - 8
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLikeList.java

@@ -12,7 +12,9 @@ import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
 import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
+import org.elasticsearch.xpack.esql.core.querydsl.query.AutomatonQuery;
 import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
 import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -23,6 +25,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdow
 import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
 
 import java.io.IOException;
+import java.util.stream.Collectors;
 
 public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
@@ -89,10 +92,6 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
      */
     @Override
     public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
-        if (pattern().patternList().size() != 1) {
-            // we only support a single pattern in the list for pushdown for now
-            return Translatable.NO;
-        }
         return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO;
 
     }
@@ -113,9 +112,12 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
      * Throws an {@link IllegalArgumentException} if the pattern list contains more than one pattern.
      */
     private Query translateField(String targetFieldName) {
-        if (pattern().patternList().size() != 1) {
-            throw new IllegalArgumentException("WildcardLikeList can only be translated when it has a single pattern");
-        }
-        return new WildcardQuery(source(), targetFieldName, pattern().patternList().getFirst().asLuceneWildcard(), caseInsensitive());
+        return new AutomatonQuery(source(), targetFieldName, pattern().createAutomaton(caseInsensitive()), getAutomatonDescription());
+    }
+
+    private String getAutomatonDescription() {
+        // we use the information used to create the automaton to describe the query here
+        String patternDesc = pattern().patternList().stream().map(WildcardPattern::pattern).collect(Collectors.joining("\", \""));
+        return "LIKE(\"" + patternDesc + "\"), caseInsensitive=" + caseInsensitive();
     }
 }