Browse Source

Search - make wildcard field use constant scoring queries for wildcard queries and caching fix (#70452)

* Make wildcard field use constant scoring queries for wildcard queries. Add a note about ignoring rewrite parameters on wildcard queries.

Also fixes caching issue where case sensitive and case insensitive results were cached as the same

Closes #69604
markharwood 4 years ago
parent
commit
2f9c7318c2

+ 2 - 1
docs/reference/mapping/types/wildcard.asciidoc

@@ -5,7 +5,7 @@
 === Wildcard field type
 
 The `wildcard` field type is a specialized keyword field for unstructured
-machine-generated content you plan to search using grep-like 
+machine-generated content you plan to search using grep-like
 <<query-dsl-wildcard-query,`wildcard`>> and <<query-dsl-regexp-query,`regexp`>>
 queries. The `wildcard` type is optimized for fields with large values or high
 cardinality.
@@ -130,4 +130,5 @@ The following parameters are accepted by `wildcard` fields:
 ==== Limitations
 
 * `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
+* When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.
 

+ 6 - 10
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/AutomatonQueryOnBinaryDv.java

@@ -25,7 +25,6 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton;
 
 import java.io.IOException;
 import java.util.Objects;
-import java.util.function.Supplier;
 
 /**
  * Query that runs an Automaton across all binary doc values.
@@ -35,20 +34,16 @@ public class AutomatonQueryOnBinaryDv extends Query {
 
     private final String field;
     private final String matchPattern;
-    private final Supplier<Automaton> automatonSupplier;
+    private final ByteRunAutomaton bytesMatcher;
 
-    public AutomatonQueryOnBinaryDv(String field, String matchPattern, Supplier<Automaton> automatonSupplier) {
+    public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
         this.field = field;
         this.matchPattern = matchPattern;
-        this.automatonSupplier = automatonSupplier;
+        bytesMatcher = new ByteRunAutomaton(automaton);
     }
 
     @Override
     public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-
-
-        ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automatonSupplier.get());
-
         return new ConstantScoreWeight(this, boost) {
 
             @Override
@@ -99,12 +94,13 @@ public class AutomatonQueryOnBinaryDv extends Query {
             return false;
           }
         AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
-        return Objects.equals(field, other.field)  && Objects.equals(matchPattern, other.matchPattern);
+        return Objects.equals(field, other.field)  && Objects.equals(matchPattern, other.matchPattern)
+            && Objects.equals(bytesMatcher, other.bytesMatcher);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(field, matchPattern);
+        return Objects.hash(field, matchPattern, bytesMatcher);
     }
 
 }

+ 19 - 30
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

@@ -322,21 +322,17 @@ public class WildcardFieldMapper extends FieldMapper {
                 addClause(string, rewritten, Occur.MUST);
                 clauseCount++;
             }
-            Supplier<Automaton> deferredAutomatonSupplier = () -> {
-                if(caseInsensitive) {
-                    return AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE);
-                } else {
-                    return WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
-                }
-            };
-            AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, deferredAutomatonSupplier);
+            Automaton automaton = caseInsensitive
+                ? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE)
+                : WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
+            AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton);
             if (clauseCount > 0) {
                 // We can accelerate execution with the ngram query
                 BooleanQuery approxQuery = rewritten.build();
                 BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
                 verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
                 verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
-                return verifyingBuilder.build();
+                return new ConstantScoreQuery(verifyingBuilder.build());
             } else if (numWildcardChars == 0 || numWildcardStrings > 0) {
                 // We have no concrete characters and we're not a pure length query e.g. ???
                 return new DocValuesFieldExistsQuery(name());
@@ -362,12 +358,9 @@ public class WildcardFieldMapper extends FieldMapper {
             if (approxNgramQuery instanceof MatchAllDocsQuery) {
                 return existsQuery(context);
             }
-            Supplier<Automaton> deferredAutomatonSupplier = ()-> {
-                RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
-                return regex.toAutomaton(maxDeterminizedStates);
-            };
-
-            AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, deferredAutomatonSupplier);
+            RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
+            Automaton automaton = regex.toAutomaton(maxDeterminizedStates);
+            AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton);
 
             // MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
             // clause which we can't accelerate at all and needs verification. Example would be ".."
@@ -746,9 +739,8 @@ public class WildcardFieldMapper extends FieldMapper {
                     }
                 }
             }
-            Supplier <Automaton> deferredAutomatonSupplier
-                = () -> TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
-            AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
+            Automaton automaton =  TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
+            AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton);
 
             if (accelerationQuery == null) {
                 return slowQuery;
@@ -831,18 +823,15 @@ public class WildcardFieldMapper extends FieldMapper {
                     bqBuilder.add(ngramQ, Occur.MUST);
                 }
 
-                Supplier <Automaton> deferredAutomatonSupplier = ()->{
-                    // Verification query
-                    FuzzyQuery fq = new FuzzyQuery(
-                        new Term(name(), searchTerm),
-                        fuzziness.asDistance(searchTerm),
-                        prefixLength,
-                        maxExpansions,
-                        transpositions
-                    );
-                    return fq.getAutomata().automaton;
-                };
-                bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, deferredAutomatonSupplier), Occur.MUST);
+                // Verification query
+                FuzzyQuery fq = new FuzzyQuery(
+                    new Term(name(), searchTerm),
+                    fuzziness.asDistance(searchTerm),
+                    prefixLength,
+                    maxExpansions,
+                    transpositions
+                );
+                bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST);
 
                 return bqBuilder.build();
             } catch (IOException ioe) {

+ 35 - 2
x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java

@@ -22,6 +22,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -42,6 +43,7 @@ import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.TriFunction;
+import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -548,13 +550,14 @@ public class WildcardFieldMapperTests extends MapperTestCase {
             String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
             Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
             testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
-            assertTrue(wildcardFieldQuery instanceof BooleanQuery);
+            assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery);
         }
 
         // TODO All these expressions have no acceleration at all and could be improved
         String slowPatterns[] = { "??" };
         for (String pattern : slowPatterns) {
             Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
+            wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery);
             assertTrue(
                 pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
                 wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
@@ -562,6 +565,26 @@ public class WildcardFieldMapperTests extends MapperTestCase {
         }
 
     }
+    
+    public void testQueryCachingEquality() throws IOException, ParseException {
+        String pattern = "A*b*B?a";
+        // Case sensitivity matters when it comes to caching
+        Automaton caseSensitiveAutomaton = WildcardQuery.toAutomaton(new Term("field", pattern));
+        Automaton caseInSensitiveAutomaton = AutomatonQueries.toCaseInsensitiveWildcardAutomaton(
+            new Term("field", pattern),
+            Integer.MAX_VALUE
+        );
+        AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton);
+        AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton);
+        assertNotEquals(csQ, ciQ);
+        assertNotEquals(csQ.hashCode(), ciQ.hashCode());
+
+        // Same query should be equal
+        Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern));
+        AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2);
+        assertEquals(csQ, csQ2);
+        assertEquals(csQ.hashCode(), csQ2.hashCode());
+    }
 
     @Override
     protected void minimalMapping(XContentBuilder b) throws IOException {
@@ -719,8 +742,18 @@ public class WildcardFieldMapperTests extends MapperTestCase {
         Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
         testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
     }
+    
+    private Query unwrapAnyConstantScore(Query q) {
+        if (q instanceof ConstantScoreQuery) {
+            ConstantScoreQuery csq = (ConstantScoreQuery) q;
+            return csq.getQuery();
+        } else {
+            return q;
+        }
+    }
+
     void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
-        BooleanQuery cq = (BooleanQuery) combinedQuery;
+        BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery);
         assert cq.clauses().size() == 2;
         Query approximationQuery = null;
         boolean verifyQueryFound = false;