Browse Source

Add support for auto_generate_synonyms_phrase_query in match_query, multi_match_query, query_string and simple_query_string (#26097)

* Add support for auto_generate_synonyms_phrase_query in match_query, multi_match_query, query_string and simple_query_string

This change adds a new parameter called auto_generate_synonyms_phrase_query (defaults to true).
This option can be used in conjunction with synonym_graph token filter to generate phrase queries
when multi terms synonyms are encountered.
For example, a synonym like "ny, new york" would produce the following boolean query when "ny city" is parsed:
((ny OR "new york") AND city)

Note how the multi terms synonym "new york" produces a phrase query.
Jim Ferenczi 8 years ago
parent
commit
a7e1610134

+ 33 - 2
core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
 
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.Query;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -55,6 +56,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
     public static final ParseField ANALYZER_FIELD = new ParseField("analyzer");
     public static final ParseField TYPE_FIELD = new ParseField("type").withAllDeprecated("match_phrase and match_phrase_prefix query");
     public static final ParseField QUERY_FIELD = new ParseField("query");
+    public static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
 
     /** The name for the match query */
     public static final String NAME = "match";
@@ -98,6 +100,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
 
     private Float cutoffFrequency = null;
 
+    private boolean autoGenerateSynonymsPhraseQuery = true;
+
     /**
      * Constructs a new match query.
      */
@@ -133,6 +137,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         fuzzyRewrite = in.readOptionalString();
         fuzziness = in.readOptionalWriteable(Fuzziness::new);
         cutoffFrequency = in.readOptionalFloat();
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            autoGenerateSynonymsPhraseQuery = in.readBoolean();
+        }
     }
 
     @Override
@@ -153,6 +160,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         out.writeOptionalString(fuzzyRewrite);
         out.writeOptionalWriteable(fuzziness);
         out.writeOptionalFloat(cutoffFrequency);
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(autoGenerateSynonymsPhraseQuery);
+        }
     }
 
     /** Returns the field name used in this query. */
@@ -395,6 +405,20 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         return this.zeroTermsQuery;
     }
 
+
+    public MatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) {
+        this.autoGenerateSynonymsPhraseQuery = enable;
+        return this;
+    }
+
+    /**
+     * Whether phrase queries should be automatically generated for multi terms synonyms.
+     * Defaults to <tt>true</tt>.
+     */
+    public boolean autoGenerateSynonymsPhraseQuery() {
+        return autoGenerateSynonymsPhraseQuery;
+    }
+
     @Override
     public void doXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject(NAME);
@@ -431,6 +455,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         if (cutoffFrequency != null) {
             builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
         }
+        builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
         printBoostAndQueryName(builder);
         builder.endObject();
         builder.endObject();
@@ -457,6 +482,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         matchQuery.setLenient(lenient);
         matchQuery.setCommonTermsCutoff(cutoffFrequency);
         matchQuery.setZeroTermsQuery(zeroTermsQuery);
+        matchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
 
         Query query = matchQuery.parse(type, fieldName, value);
         return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch);
@@ -478,14 +504,15 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
                Objects.equals(lenient, other.lenient) &&
                Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) &&
                Objects.equals(zeroTermsQuery, other.zeroTermsQuery) &&
-               Objects.equals(cutoffFrequency, other.cutoffFrequency);
+               Objects.equals(cutoffFrequency, other.cutoffFrequency) &&
+               Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
     }
 
     @Override
     protected int doHashCode() {
         return Objects.hash(fieldName, value, type, operator, analyzer, slop,
                 fuzziness, prefixLength, maxExpansions, minimumShouldMatch,
-                fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency);
+                fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency, autoGenerateSynonymsPhraseQuery);
     }
 
     @Override
@@ -510,6 +537,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
         boolean lenient = MatchQuery.DEFAULT_LENIENCY;
         Float cutOffFrequency = null;
         ZeroTermsQuery zeroTermsQuery = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;
+        boolean autoGenerateSynonymsPhraseQuery = true;
         String queryName = null;
         String currentFieldName = null;
         XContentParser.Token token;
@@ -572,6 +600,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
                             }
                         } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
                             queryName = parser.text();
+                        } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
+                            autoGenerateSynonymsPhraseQuery = parser.booleanValue();
                         } else {
                             throw new ParsingException(parser.getTokenLocation(),
                                     "[" + NAME + "] query does not support [" + currentFieldName + "]");
@@ -610,6 +640,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
             matchQuery.cutoffFrequency(cutOffFrequency);
         }
         matchQuery.zeroTermsQuery(zeroTermsQuery);
+        matchQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
         matchQuery.queryName(queryName);
         matchQuery.boost(boost);
         return matchQuery;

+ 32 - 4
core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java

@@ -22,17 +22,16 @@ package org.elasticsearch.index.query;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.query.support.QueryParsers;
 import org.elasticsearch.index.search.MatchQuery;
 import org.elasticsearch.index.search.MultiMatchQuery;
@@ -74,6 +73,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     private static final ParseField TYPE_FIELD = new ParseField("type");
     private static final ParseField QUERY_FIELD = new ParseField("query");
     private static final ParseField FIELDS_FIELD = new ParseField("fields");
+    private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
+
 
     private final Object value;
     private final Map<String, Float> fieldsBoosts;
@@ -91,6 +92,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     private boolean lenient = DEFAULT_LENIENCY;
     private Float cutoffFrequency = null;
     private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
+    private boolean autoGenerateSynonymsPhraseQuery = true;
 
     public enum Type implements Writeable {
 
@@ -221,6 +223,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         lenient = in.readBoolean();
         cutoffFrequency = in.readOptionalFloat();
         zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            autoGenerateSynonymsPhraseQuery = in.readBoolean();
+        }
     }
 
     @Override
@@ -245,6 +250,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         out.writeBoolean(lenient);
         out.writeOptionalFloat(cutoffFrequency);
         zeroTermsQuery.writeTo(out);
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(autoGenerateSynonymsPhraseQuery);
+        }
     }
 
     public Object value() {
@@ -514,6 +522,19 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         return zeroTermsQuery;
     }
 
+    public MultiMatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) {
+        this.autoGenerateSynonymsPhraseQuery = enable;
+        return this;
+    }
+
+    /**
+     * Whether phrase queries should be automatically generated for multi terms synonyms.
+     * Defaults to <tt>true</tt>.
+     */
+    public boolean autoGenerateSynonymsPhraseQuery() {
+        return autoGenerateSynonymsPhraseQuery;
+    }
+
     @Override
     public void doXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject(NAME);
@@ -551,6 +572,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
             builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
         }
         builder.field(ZERO_TERMS_QUERY_FIELD.getPreferredName(), zeroTermsQuery.toString());
+        builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
         printBoostAndQueryName(builder);
         builder.endObject();
     }
@@ -572,6 +594,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         Float cutoffFrequency = null;
         boolean lenient = DEFAULT_LENIENCY;
         MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
+        boolean autoGenerateSynonymsPhraseQuery = true;
 
         float boost = AbstractQueryBuilder.DEFAULT_BOOST;
         String queryName = null;
@@ -634,6 +657,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                     }
                 } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
                     queryName = parser.text();
+                } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
+                    autoGenerateSynonymsPhraseQuery = parser.booleanValue();
                 } else {
                     throw new ParsingException(parser.getTokenLocation(),
                             "[" + NAME + "] query does not support [" + currentFieldName + "]");
@@ -673,6 +698,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                 .slop(slop)
                 .tieBreaker(tieBreaker)
                 .zeroTermsQuery(zeroTermsQuery)
+                .autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery)
                 .boost(boost)
                 .queryName(queryName);
     }
@@ -728,6 +754,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         }
         multiMatchQuery.setLenient(lenient);
         multiMatchQuery.setZeroTermsQuery(zeroTermsQuery);
+        multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
 
         if (useDisMax != null) { // backwards foobar
             boolean typeUsesDismax = type.tieBreaker() != 1.0f;
@@ -748,7 +775,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     protected int doHashCode() {
         return Objects.hash(value, fieldsBoosts, type, operator, analyzer, slop, fuzziness,
                 prefixLength, maxExpansions, minimumShouldMatch, fuzzyRewrite, useDisMax, tieBreaker, lenient,
-                cutoffFrequency, zeroTermsQuery);
+                cutoffFrequency, zeroTermsQuery, autoGenerateSynonymsPhraseQuery);
     }
 
     @Override
@@ -768,6 +795,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                 Objects.equals(tieBreaker, other.tieBreaker) &&
                 Objects.equals(lenient, other.lenient) &&
                 Objects.equals(cutoffFrequency, other.cutoffFrequency) &&
-                Objects.equals(zeroTermsQuery, other.zeroTermsQuery);
+                Objects.equals(zeroTermsQuery, other.zeroTermsQuery) &&
+                Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
     }
 }

+ 31 - 2
core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java

@@ -102,6 +102,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
     private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields")
             .withAllDeprecated("Set [default_field] to `*` instead");
     private static final ParseField TYPE_FIELD = new ParseField("type");
+    private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
 
     private final String queryString;
 
@@ -157,6 +158,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
     /** To limit effort spent determinizing regexp queries. */
     private int maxDeterminizedStates = DEFAULT_MAX_DETERMINED_STATES;
 
+    private boolean autoGenerateSynonymsPhraseQuery = true;
+
     public QueryStringQueryBuilder(String queryString) {
         if (queryString == null) {
             throw new IllegalArgumentException("query text missing");
@@ -219,6 +222,9 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
                 }
             }
         }
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            autoGenerateSynonymsPhraseQuery = in.readBoolean();
+        }
     }
 
     @Override
@@ -271,6 +277,9 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
                 out.writeOptionalBoolean(useAllFields);
             }
         }
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(autoGenerateSynonymsPhraseQuery);
+        }
     }
 
     public String queryString() {
@@ -625,6 +634,19 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
         return false;
     }
 
+    public QueryStringQueryBuilder autoGenerateSynonymsPhraseQuery(boolean value) {
+        this.autoGenerateSynonymsPhraseQuery = value;
+        return this;
+    }
+
+    /**
+     * Whether phrase queries should be automatically generated for multi terms synonyms.
+     * Defaults to <tt>true</tt>.
+     */
+    public boolean autoGenerateSynonymsPhraseQuery() {
+        return autoGenerateSynonymsPhraseQuery;
+    }
+
     @Override
     protected void doXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject(NAME);
@@ -682,6 +704,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
             builder.field(TIME_ZONE_FIELD.getPreferredName(), this.timeZone.getID());
         }
         builder.field(ESCAPE_FIELD.getPreferredName(), this.escape);
+        builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
         printBoostAndQueryName(builder);
         builder.endObject();
     }
@@ -714,6 +737,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
         String fuzzyRewrite = null;
         String rewrite = null;
         Map<String, Float> fieldsAndWeights = new HashMap<>();
+        boolean autoGenerateSynonymsPhraseQuery = true;
         while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
             if (token == XContentParser.Token.FIELD_NAME) {
                 currentFieldName = parser.currentName();
@@ -799,6 +823,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
                     }
                 } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
                     queryName = parser.text();
+                } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
+                    autoGenerateSynonymsPhraseQuery = parser.booleanValue();
                 } else if (AUTO_GENERATE_PHRASE_QUERIES_FIELD.match(currentFieldName)) {
                     // ignore, deprecated setting
                 } else if (LOWERCASE_EXPANDED_TERMS_FIELD.match(currentFieldName)) {
@@ -849,6 +875,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
         queryStringQuery.timeZone(timeZone);
         queryStringQuery.boost(boost);
         queryStringQuery.queryName(queryName);
+        queryStringQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
         return queryStringQuery;
     }
 
@@ -882,7 +909,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
                 timeZone == null ? other.timeZone == null : other.timeZone != null &&
                 Objects.equals(timeZone.getID(), other.timeZone.getID()) &&
                 Objects.equals(escape, other.escape) &&
-                Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates);
+                Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) &&
+                Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
     }
 
     @Override
@@ -891,7 +919,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
                 quoteFieldSuffix, allowLeadingWildcard, analyzeWildcard,
                 enablePositionIncrements, fuzziness, fuzzyPrefixLength,
                 fuzzyMaxExpansions, fuzzyRewrite, phraseSlop, type, tieBreaker, rewrite, minimumShouldMatch, lenient,
-                timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates);
+                timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates, autoGenerateSynonymsPhraseQuery);
     }
 
     @Override
@@ -963,6 +991,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
         queryParser.setMultiTermRewriteMethod(QueryParsers.parseRewriteMethod(this.rewrite));
         queryParser.setTimeZone(timeZone);
         queryParser.setMaxDeterminizedStates(maxDeterminizedStates);
+        queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
 
         Query query;
         try {

+ 20 - 3
core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java

@@ -300,6 +300,8 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
         private boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
         /** Specifies a suffix, if any, to apply to field names for phrase matching. */
         private String quoteFieldSuffix = null;
+        /** Whether phrase queries should be automatically generated for multi terms synonyms. */
+        private boolean autoGenerateSynonymsPhraseQuery = true;
 
         /**
          * Generates default {@link Settings} object (uses ROOT locale, does
@@ -312,6 +314,7 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
             this.lenient = other.lenient;
             this.analyzeWildcard = other.analyzeWildcard;
             this.quoteFieldSuffix = other.quoteFieldSuffix;
+            this.autoGenerateSynonymsPhraseQuery = other.autoGenerateSynonymsPhraseQuery;
         }
 
         /** Specifies whether to use lenient parsing, defaults to false. */
@@ -349,9 +352,21 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
             return quoteFieldSuffix;
         }
 
+        public void autoGenerateSynonymsPhraseQuery(boolean value) {
+            this.autoGenerateSynonymsPhraseQuery = value;
+        }
+
+        /**
+         * Whether phrase queries should be automatically generated for multi terms synonyms.
+         * Defaults to <tt>true</tt>.
+         */
+        public boolean autoGenerateSynonymsPhraseQuery() {
+            return autoGenerateSynonymsPhraseQuery;
+        }
+
         @Override
         public int hashCode() {
-            return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix);
+            return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix, autoGenerateSynonymsPhraseQuery);
         }
 
         @Override
@@ -363,8 +378,10 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
                 return false;
             }
             Settings other = (Settings) obj;
-            return Objects.equals(lenient, other.lenient) && Objects.equals(analyzeWildcard, other.analyzeWildcard)
-                    && Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix);
+            return Objects.equals(lenient, other.lenient) &&
+                Objects.equals(analyzeWildcard, other.analyzeWildcard) &&
+                Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix) &&
+                Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
         }
     }
 }

+ 26 - 1
core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java

@@ -105,6 +105,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
     private static final ParseField FIELDS_FIELD = new ParseField("fields");
     private static final ParseField QUOTE_FIELD_SUFFIX_FIELD = new ParseField("quote_field_suffix");
     private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields");
+    private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
 
     /** Query text to parse. */
     private final String queryText;
@@ -174,6 +175,9 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
             settings.quoteFieldSuffix(in.readOptionalString());
             useAllFields = in.readOptionalBoolean();
         }
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            settings.autoGenerateSynonymsPhraseQuery(in.readBoolean());
+        }
     }
 
     @Override
@@ -203,6 +207,9 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
             out.writeOptionalString(settings.quoteFieldSuffix());
             out.writeOptionalBoolean(useAllFields);
         }
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeBoolean(settings.autoGenerateSynonymsPhraseQuery());
+        }
     }
 
     /** Returns the text to parse the query from. */
@@ -358,6 +365,20 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
         return minimumShouldMatch;
     }
 
+    public SimpleQueryStringBuilder autoGenerateSynonymsPhraseQuery(boolean value) {
+        this.settings.autoGenerateSynonymsPhraseQuery(value);
+        return this;
+    }
+
+    /**
+     * Whether phrase queries should be automatically generated for multi terms synonyms.
+     * Defaults to <tt>true</tt>.
+     */
+    public boolean autoGenerateSynonymsPhraseQuery() {
+        return settings.autoGenerateSynonymsPhraseQuery();
+    }
+
+
     @Override
     protected Query doToQuery(QueryShardContext context) throws IOException {
         // field names in builder can have wildcards etc, need to resolve them here
@@ -459,7 +480,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
         if (useAllFields != null) {
             builder.field(ALL_FIELDS_FIELD.getPreferredName(), useAllFields);
         }
-
+        builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), settings.autoGenerateSynonymsPhraseQuery());
         printBoostAndQueryName(builder);
         builder.endObject();
     }
@@ -478,6 +499,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
         boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
         String quoteFieldSuffix = null;
         Boolean useAllFields = null;
+        boolean autoGenerateSynonymsPhraseQuery = true;
 
         XContentParser.Token token;
         while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@@ -543,6 +565,8 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
                     quoteFieldSuffix = parser.textOrNull();
                 } else if (ALL_FIELDS_FIELD.match(currentFieldName)) {
                     useAllFields = parser.booleanValue();
+                } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
+                    autoGenerateSynonymsPhraseQuery = parser.booleanValue();
                 } else {
                     throw new ParsingException(parser.getTokenLocation(), "[" + SimpleQueryStringBuilder.NAME +
                             "] unsupported field [" + parser.currentName() + "]");
@@ -571,6 +595,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
         }
         qb.analyzeWildcard(analyzeWildcard).boost(boost).quoteFieldSuffix(quoteFieldSuffix);
         qb.useAllFields(useAllFields);
+        qb.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
         return qb;
     }
 

+ 7 - 0
core/src/main/java/org/elasticsearch/index/search/MatchQuery.java

@@ -167,6 +167,8 @@ public class MatchQuery {
 
     protected Float commonTermsCutoff = null;
 
+    protected boolean autoGenerateSynonymsPhraseQuery = true;
+
     public MatchQuery(QueryShardContext context) {
         this.context = context;
     }
@@ -226,6 +228,10 @@ public class MatchQuery {
         this.zeroTermsQuery = zeroTermsQuery;
     }
 
+    public void setAutoGenerateSynonymsPhraseQuery(boolean enabled) {
+        this.autoGenerateSynonymsPhraseQuery = enabled;
+    }
+
     protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) {
         if (analyzer == null) {
             return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType);
@@ -258,6 +264,7 @@ public class MatchQuery {
         assert analyzer != null;
         MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType);
         builder.setEnablePositionIncrements(this.enablePositionIncrements);
+        builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery);
 
         Query query = null;
         switch (type) {

+ 5 - 0
core/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

@@ -321,6 +321,11 @@ public class QueryStringQueryParser extends XQueryParser {
         this.groupTieBreaker = groupTieBreaker;
     }
 
+    @Override
+    public void setAutoGenerateMultiTermSynonymsPhraseQuery(boolean enable) {
+        queryBuilder.setAutoGenerateSynonymsPhraseQuery(enable);
+    }
+
     private Query applyBoost(Query q, Float boost) {
         if (boost != null && boost != 1f) {
             return new BoostQuery(q, boost);

+ 7 - 0
core/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java

@@ -119,6 +119,10 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
         if (randomBoolean()) {
             matchQuery.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
         }
+
+        if (randomBoolean()) {
+            matchQuery.autoGenerateSynonymsPhraseQuery(randomBoolean());
+        }
         return matchQuery;
     }
 
@@ -274,6 +278,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
                 "      \"fuzzy_transpositions\" : true,\n" +
                 "      \"lenient\" : false,\n" +
                 "      \"zero_terms_query\" : \"ALL\",\n" +
+                "      \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "      \"boost\" : 1.0\n" +
                 "    }\n" +
                 "  }\n" +
@@ -302,6 +307,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
                 "      \"fuzzy_transpositions\" : true,\n" +
                 "      \"lenient\" : false,\n" +
                 "      \"zero_terms_query\" : \"NONE\",\n" +
+                "      \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "      \"boost\" : 1.0\n" +
                 "    }\n" +
                 "  }\n" +
@@ -333,6 +339,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
                 "      \"fuzzy_transpositions\" : true,\n" +
                 "      \"lenient\" : false,\n" +
                 "      \"zero_terms_query\" : \"NONE\",\n" +
+                "      \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "      \"boost\" : 1.0\n" +
                 "    }\n" +
                 "  }\n" +

+ 4 - 0
core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java

@@ -121,6 +121,9 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
         if (randomBoolean()) {
             query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
         }
+        if (randomBoolean()) {
+            query.autoGenerateSynonymsPhraseQuery(randomBoolean());
+        }
         // test with fields with boost and patterns delegated to the tests further below
         return query;
     }
@@ -238,6 +241,7 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
                 "    \"max_expansions\" : 50,\n" +
                 "    \"lenient\" : false,\n" +
                 "    \"zero_terms_query\" : \"NONE\",\n" +
+                "    \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "    \"boost\" : 1.0\n" +
                 "  }\n" +
                 "}";

+ 1 - 0
core/src/test/java/org/elasticsearch/index/query/NestedQueryBuilderTests.java

@@ -157,6 +157,7 @@ public class NestedQueryBuilderTests extends AbstractQueryTestCase<NestedQueryBu
                 "              \"fuzzy_transpositions\" : true,\n" +
                 "              \"lenient\" : false,\n" +
                 "              \"zero_terms_query\" : \"NONE\",\n" +
+                "              \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "              \"boost\" : 1.0\n" +
                 "            }\n" +
                 "          }\n" +

+ 20 - 0
core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java

@@ -160,6 +160,9 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
         if (randomBoolean()) {
             queryStringQueryBuilder.timeZone(randomDateTimeZone().getID());
         }
+        if (randomBoolean()) {
+            queryStringQueryBuilder.autoGenerateSynonymsPhraseQuery(randomBoolean());
+        }
         queryStringQueryBuilder.type(randomFrom(MultiMatchQueryBuilder.Type.values()));
         return queryStringQueryBuilder;
     }
@@ -375,6 +378,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
             queryParser.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
             queryParser.setDefaultOperator(op.toQueryParserOperator());
             queryParser.setForceAnalyzer(new MockSynonymAnalyzer());
+            queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(false);
 
             // simple multi-term
             Query query = queryParser.parse("guinea pig");
@@ -393,6 +397,21 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
                             defaultOp).build();
             assertThat(query, Matchers.equalTo(expectedQuery));
 
+            queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
+            // simple multi-term with phrase query
+            query = queryParser.parse("guinea pig");
+            expectedQuery = new BooleanQuery.Builder()
+                    .add(new BooleanQuery.Builder()
+                            .add(new PhraseQuery.Builder()
+                                .add(new Term(STRING_FIELD_NAME, "guinea"))
+                                .add(new Term(STRING_FIELD_NAME, "pig"))
+                                .build(), Occur.SHOULD)
+                            .add(new TermQuery(new Term(STRING_FIELD_NAME, "cavy")), Occur.SHOULD)
+                            .build(), defaultOp)
+                    .build();
+            assertThat(query, Matchers.equalTo(expectedQuery));
+            queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(false);
+
             // simple with additional tokens
             query = queryParser.parse("that guinea pig smells");
             expectedQuery = new BooleanQuery.Builder()
@@ -850,6 +869,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
                 "    \"fuzzy_max_expansions\" : 50,\n" +
                 "    \"phrase_slop\" : 0,\n" +
                 "    \"escape\" : false,\n" +
+                "    \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "    \"boost\" : 1.0\n" +
                 "  }\n" +
                 "}";

+ 4 - 1
core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java

@@ -95,7 +95,9 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
             }
         }
         result.fields(fields);
-
+        if (randomBoolean()) {
+            result.autoGenerateSynonymsPhraseQuery(randomBoolean());
+        }
         return result;
     }
 
@@ -340,6 +342,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
                 "    \"lenient\" : false,\n" +
                 "    \"analyze_wildcard\" : false,\n" +
                 "    \"quote_field_suffix\" : \".quote\",\n" +
+                "    \"auto_generate_synonyms_phrase_query\" : true,\n" +
                 "    \"boost\" : 1.0\n" +
                 "  }\n" +
                 "}";

+ 19 - 2
core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java

@@ -141,8 +141,12 @@ public class MatchQueryIT extends ESIntegTestCase {
         indexRandom(true, false, getDocs());
 
         // no min should match
-        SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
-            .operator(Operator.OR).analyzer("lower_graphsyns")).get();
+        SearchResponse searchResponse = client().prepareSearch(INDEX)
+            .setQuery(
+                QueryBuilders.matchQuery("field", "three what the fudge foo")
+                    .operator(Operator.OR).analyzer("lower_graphsyns").autoGenerateSynonymsPhraseQuery(false)
+            )
+            .get();
 
         assertHitCount(searchResponse, 6L);
         assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6");
@@ -159,6 +163,19 @@ public class MatchQueryIT extends ESIntegTestCase {
         assertSearchHits(searchResponse, "1", "2", "6");
     }
 
+    public void testMultiTermsSynonymsPhrase() throws ExecutionException, InterruptedException {
+        List<IndexRequestBuilder> builders = getDocs();
+        indexRandom(true, false, builders);
+        SearchResponse searchResponse = client().prepareSearch(INDEX)
+            .setQuery(
+                QueryBuilders.matchQuery("field", "wtf")
+                    .analyzer("lower_graphsyns")
+                    .operator(Operator.AND))
+            .get();
+        assertHitCount(searchResponse, 3L);
+        assertSearchHits(searchResponse, "1", "2", "3");
+    }
+
     public void testPhrasePrefix() throws ExecutionException, InterruptedException {
         List<IndexRequestBuilder> builders = getDocs();
         builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));

+ 13 - 0
core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java

@@ -316,6 +316,7 @@ public class QueryStringIT extends ESIntegTestCase {
             QueryBuilders.queryStringQuery("say what the fudge")
                 .defaultField("field")
                 .defaultOperator(Operator.AND)
+                .autoGenerateSynonymsPhraseQuery(false)
                 .analyzer("lower_graphsyns")).get();
 
         assertHitCount(searchResponse, 1L);
@@ -326,6 +327,7 @@ public class QueryStringIT extends ESIntegTestCase {
             QueryBuilders.queryStringQuery("three what the fudge foo")
                 .defaultField("field")
                 .defaultOperator(Operator.OR)
+                .autoGenerateSynonymsPhraseQuery(false)
                 .analyzer("lower_graphsyns")).get();
 
         assertHitCount(searchResponse, 6L);
@@ -336,11 +338,22 @@ public class QueryStringIT extends ESIntegTestCase {
             QueryBuilders.queryStringQuery("three what the fudge foo")
                 .defaultField("field")
                 .defaultOperator(Operator.OR)
+                .autoGenerateSynonymsPhraseQuery(false)
                 .analyzer("lower_graphsyns")
                 .minimumShouldMatch("80%")).get();
 
         assertHitCount(searchResponse, 3L);
         assertSearchHits(searchResponse, "1", "2", "6");
+
+        // multi terms synonyms phrase
+        searchResponse = client().prepareSearch(index).setQuery(
+            QueryBuilders.queryStringQuery("what the fudge")
+                .defaultField("field")
+                .defaultOperator(Operator.AND)
+                .analyzer("lower_graphsyns"))
+            .get();
+        assertHitCount(searchResponse, 3L);
+        assertSearchHits(searchResponse,  "1", "2", "3");
     }
 
     private void assertHits(SearchHits hits, String... ids) {

+ 35 - 0
docs/reference/query-dsl/match-query.asciidoc

@@ -143,6 +143,41 @@ IMPORTANT: The `cutoff_frequency` option operates on a per-shard-level. This mea
 that when trying it out on test indexes with low document numbers you
 should follow the advice in {defguide}/relevance-is-broken.html[Relevance is broken].
 
+[[query-dsl-match-query-synonyms]]
+===== Synonyms
+
+The `match` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
+synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
+For example, the following synonym: `"ny, new york" would produce:`
+
+`(ny OR ("new york"))`
+
+It is also possible to match multi terms synonyms with conjunctions instead:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+   "query": {
+       "match" : {
+           "message": {
+               "query" : "ny city",
+               "auto_generate_synonyms_phrase_query" : false
+           }
+       }
+   }
+}
+--------------------------------------------------
+// CONSOLE
+
+The example above creates a boolean query:
+
+`(ny OR (new AND york)) city)`
+
+that matches documents with the term `ny` or the conjunction `new AND york`.
+By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.
+
+
 .Comparison to query_string / field
 **************************************************
 

+ 2 - 2
docs/reference/query-dsl/multi-match-query.asciidoc

@@ -136,8 +136,8 @@ follows:
   * plus `tie_breaker * _score` for all other matching fields
 
 Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
-`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`
-and `cutoff_frequency`, as explained in <<query-dsl-match-query, match query>>.
+`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`,
+ `cutoff_frequency` and `auto_generate_synonyms_phrase_query`, as explained in <<query-dsl-match-query, match query>>.
 
 [IMPORTANT]
 [[operator-min]]

+ 36 - 0
docs/reference/query-dsl/query-string-query.asciidoc

@@ -110,6 +110,9 @@ the query string. This allows to use a field that has a different analysis chain
 for exact matching. Look <<mixing-exact-search-with-stemming,here>> for a
 comprehensive example.
 
+|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms.
+Defaults to `true`.
+
 |`all_fields` | deprecated[6.0.0, set `default_field` to `*` instead]
 Perform the query on all fields detected in the mapping that can
 be queried. Will be used by default when the `_all` field is disabled and no
@@ -273,4 +276,37 @@ GET /_search
 --------------------------------------------------
 // CONSOLE
 
+[float]
+==== Synonyms
+
+The `query_string` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
+synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
+For example, the following synonym: `"ny, new york" would produce:`
+
+`(ny OR ("new york"))`
+
+It is also possible to match multi terms synonyms with conjunctions instead:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+   "query": {
+       "query_string" : {
+           "default_field": "title",
+           "query" : "ny city",
+           "auto_generate_synonyms_phrase_query" : false
+       }
+   }
+}
+--------------------------------------------------
+// CONSOLE
+
+The example above creates a boolean query:
+
+`(ny OR (new AND york)) city)`
+
+that matches documents with the term `ny` or the conjunction `new AND york`.
+By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.
+
 include::query-string-syntax.asciidoc[]

+ 36 - 0
docs/reference/query-dsl/simple-query-string-query.asciidoc

@@ -62,6 +62,9 @@ the query string. This allows to use a field that has a different analysis chain
 for exact matching. Look <<mixing-exact-search-with-stemming,here>> for a
 comprehensive example.
 
+|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms.
+Defaults to `true`.
+
 |`all_fields` | Perform the query on all fields detected in the mapping that can
 be queried. Will be used by default when the `_all` field is disabled and no
 `default_field` is specified index settings, and no `fields` are specified.
@@ -160,3 +163,36 @@ GET /_search
 
 The available flags are: `ALL`, `NONE`, `AND`, `OR`, `NOT`, `PREFIX`, `PHRASE`,
 `PRECEDENCE`, `ESCAPE`, `WHITESPACE`, `FUZZY`, `NEAR`, and `SLOP`.
+
+[float]
+==== Synonyms
+
+The `simple_query_string` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
+synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
+For example, the following synonym: `"ny, new york" would produce:`
+
+`(ny OR ("new york"))`
+
+It is also possible to match multi terms synonyms with conjunctions instead:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+   "query": {
+       "simple_query_string" : {
+           "query" : "ny city",
+           "auto_generate_synonyms_phrase_query" : false
+       }
+   }
+}
+--------------------------------------------------
+// CONSOLE
+
+The example above creates a boolean query:
+
+`(ny OR (new AND york)) city)`
+
+that matches documents with the term `ny` or the conjunction `new AND york`.
+By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.
+