Browse Source

Fix blended terms for non-strings

It had some funky errors, like lenient:true not working and queries with
two integer fields blowing up if there was no analyzer defined on the
query. This throws a bunch more tests at it and rejiggers how non-strings
are handled so they don't wander off into scary QueryBuilder-land unless
they have a nice strong analyzer to protect them.

Closes #15860
Nik Everett 9 năm trước cách đây
mục cha
commit
6bb01984b6

+ 1 - 1
core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

@@ -390,7 +390,7 @@ public abstract class MappedFieldType extends FieldType {
     }
 
     /** Creates a term associated with the field of this mapper for the given value */
-    protected Term createTerm(Object value) {
+    public Term createTerm(Object value) {
         return new Term(name(), indexedValueForSearch(value));
     }
 

+ 21 - 6
core/src/main/java/org/elasticsearch/index/search/MatchQuery.java

@@ -212,10 +212,6 @@ public class MatchQuery {
         this.zeroTermsQuery = zeroTermsQuery;
     }
 
-    protected boolean forceAnalyzeQueryString() {
-        return false;
-    }
-
     protected Analyzer getAnalyzer(MappedFieldType fieldType) {
         if (this.analyzer == null) {
             if (fieldType != null) {
@@ -240,9 +236,18 @@ public class MatchQuery {
             field = fieldName;
         }
 
-        if (fieldType != null && fieldType.useTermQueryWithQueryString() && !forceAnalyzeQueryString()) {
+        /*
+         * If the user forced an analyzer we really don't care if they are
+         * searching a type that wants term queries to be used with query string
+         * because the QueryBuilder will take care of it. If they haven't forced
+         * an analyzer then types like NumberFieldType that want terms with
+         * query string will blow up because their analyzer isn't capable of
+         * passing through QueryBuilder.
+         */
+        boolean noForcedAnalyzer = this.analyzer == null;
+        if (fieldType != null && fieldType.useTermQueryWithQueryString() && noForcedAnalyzer) {
             try {
-                return fieldType.termQuery(value, context);
+                return termQuery(fieldType, value);
             } catch (RuntimeException e) {
                 if (lenient) {
                     return null;
@@ -251,6 +256,7 @@ public class MatchQuery {
             }
 
         }
+
         Analyzer analyzer = getAnalyzer(fieldType);
         assert analyzer != null;
         MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType);
@@ -282,6 +288,15 @@ public class MatchQuery {
         }
     }
 
+    /**
+     * Creates a TermQuery-like-query for MappedFieldTypes that don't support
+     * QueryBuilder which is very string-ish. Just delegates to the
+     * MappedFieldType for MatchQuery but gets more complex for blended queries.
+     */
+    protected Query termQuery(MappedFieldType fieldType, Object value) {
+        return fieldType.termQuery(value, context);
+    }
+
     protected Query zeroTermsQuery() {
         return zeroTermsQuery == DEFAULT_ZERO_TERMS_QUERY ? Queries.newMatchNoDocsQuery() : Queries.newMatchAllQuery();
     }

+ 44 - 4
core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java

@@ -149,6 +149,10 @@ public class MultiMatchQuery extends MatchQuery {
         public boolean forceAnalyzeQueryString() {
             return false;
         }
+
+        public Query termQuery(MappedFieldType fieldType, Object value) {
+            return fieldType.termQuery(value, context);
+        }
     }
 
     public class CrossFieldsQueryBuilder extends QueryBuilder {
@@ -196,8 +200,13 @@ public class MultiMatchQuery extends MatchQuery {
                 } else {
                     blendedFields = null;
                 }
-                final FieldAndFieldType fieldAndFieldType = group.get(0);
-                Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
+                /*
+                 * We have to pick some field to pass through the superclass so
+                 * we just pick the first field. It shouldn't matter because
+                 * fields are already grouped by their analyzers/types.
+                 */
+                String representativeField = group.get(0).field;
+                Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch);
                 if (q != null) {
                     queries.add(q);
                 }
@@ -206,6 +215,28 @@ public class MultiMatchQuery extends MatchQuery {
             return queries.isEmpty() ? null : queries;
         }
 
+        /**
+         * Pick the field for parsing. If any of the fields in the group do
+         * *not* useTermQueryWithQueryString then we return that one to force
+         * analysis. If some of the fields would useTermQueryWithQueryString
+         * then we assume that that parsing field's parser is good enough for
+         * them and return it. Otherwise we just return the first field. You
+         * should only get mixed groups like this when you force a certain
+         * analyzer on a query and use string and integer fields because of the
+         * way that grouping is done. That means that the use *asked* for the
+         * integer fields to be searched using a string analyzer so this is
+         * technically doing exactly what they asked for even if it is a bit
+         * funky.
+         */
+        private String fieldForParsing(List<FieldAndFieldType> group) {
+            for (FieldAndFieldType field: group) {
+                if (field.fieldType.useTermQueryWithQueryString()) {
+                    return field.field;
+                }
+            }
+            return group.get(0).field;
+        }
+
         @Override
         public boolean forceAnalyzeQueryString() {
             return blendedFields != null;
@@ -231,6 +262,11 @@ public class MultiMatchQuery extends MatchQuery {
             }
             return BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker);
         }
+
+        @Override
+        public Query termQuery(MappedFieldType fieldType, Object value) {
+            return blendTerm(fieldType.createTerm(value), fieldType);
+        }
     }
 
     @Override
@@ -266,7 +302,11 @@ public class MultiMatchQuery extends MatchQuery {
     }
 
     @Override
-    protected boolean forceAnalyzeQueryString() {
-        return this.queryBuilder == null ? super.forceAnalyzeQueryString() : this.queryBuilder.forceAnalyzeQueryString();
+    protected Query termQuery(MappedFieldType fieldType, Object value) {
+        if (queryBuilder == null) {
+            // Can be null when the MultiMatchQuery collapses into a MatchQuery
+            return super.termQuery(fieldType, value);
+        }
+        return queryBuilder.termQuery(fieldType, value);
     }
 }

+ 76 - 0
core/src/test/java/org/elasticsearch/search/query/MultiMatchQueryIT.java

@@ -19,6 +19,7 @@
 package org.elasticsearch.search.query;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
 import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
@@ -459,6 +460,23 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
         assertHitCount(searchResponse, 1l);
         assertFirstHit(searchResponse, hasId("theone"));
 
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("captain america 15", "full_name", "first_name", "last_name", "category", "skill", "int-field")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .analyzer("category")
+                        .operator(Operator.AND))).get();
+        assertHitCount(searchResponse, 1l);
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("captain america 15", "skill", "full_name", "first_name", "last_name", "category", "int-field")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .analyzer("category")
+                        .operator(Operator.AND))).get();
+        assertHitCount(searchResponse, 1l);
+        assertFirstHit(searchResponse, hasId("theone"));
+
+
         searchResponse = client().prepareSearch("test")
                 .setQuery(randomizeType(multiMatchQuery("captain america 15", "first_name", "last_name", "skill")
                         .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
@@ -471,6 +489,24 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
                         .analyzer("category"))).get();
         assertFirstHit(searchResponse, hasId("theone"));
 
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("25 15", "first_name", "int-field", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .analyzer("category"))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("25 15", "int-field", "skill", "first_name")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .analyzer("category"))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("25 15", "int-field", "first_name", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .analyzer("category"))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
         searchResponse = client().prepareSearch("test")
                 .setQuery(randomizeType(multiMatchQuery("captain america marvel hero", "first_name", "last_name", "category")
                         .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
@@ -529,6 +565,46 @@ public class MultiMatchQueryIT extends ESIntegTestCase {
         assertFirstHit(searchResponse, hasId("ultimate2"));
         assertSecondHit(searchResponse, hasId("ultimate1"));
         assertThat(searchResponse.getHits().hits()[0].getScore(), greaterThan(searchResponse.getHits().hits()[1].getScore()));
+
+        // Test group based on numeric fields
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("15", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("15", "skill", "first_name")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        // Two numeric fields together caused trouble at one point!
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("15", "int-field", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("15", "int-field", "first_name", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS))).get();
+        assertFirstHit(searchResponse, hasId("theone"));
+
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("alpha 15", "first_name", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .lenient(true))).get();
+        assertFirstHit(searchResponse, hasId("ultimate1"));
+        /*
+         * Doesn't find theone because "alpha 15" isn't a number and we don't
+         * break on spaces.
+         */
+        assertHitCount(searchResponse, 1);
+
+        // Lenient wasn't always properly lenient with two numeric fields
+        searchResponse = client().prepareSearch("test")
+                .setQuery(randomizeType(multiMatchQuery("alpha 15", "int-field", "first_name", "skill")
+                        .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)
+                        .lenient(true))).get();
+        assertFirstHit(searchResponse, hasId("ultimate1"));
     }
 
     private static final void assertEquivalent(String query, SearchResponse left, SearchResponse right) {