Browse Source

Expose Lucene's new TopTermsBlendedFreqScoringRewrite.

This rewrite method is interesting because it computes scores as if all terms
had the same frequencies, which avoids disappointments with ranking when a fuzzy
query ranks typos first given that they are less frequent than the correct term.
Adrien Grand 10 years ago
parent
commit
8238f497d8

+ 1 - 1
core/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java

@@ -95,7 +95,7 @@ public class FuzzyQueryParser implements QueryParser {
                     } else if ("transpositions".equals(currentFieldName)) {
                       transpositions = parser.booleanValue();
                     } else if ("rewrite".equals(currentFieldName)) {
-                        rewriteMethod = QueryParsers.parseRewriteMethod(parser.textOrNull(), null);
+                        rewriteMethod = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null);
                     } else if ("_name".equals(currentFieldName)) {
                         queryName = parser.text();
                     } else {

+ 2 - 2
core/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java

@@ -124,9 +124,9 @@ public class MatchQueryParser implements QueryParser {
                     } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
                         minimumShouldMatch = parser.textOrNull();
                     } else if ("rewrite".equals(currentFieldName)) {
-                        matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
+                        matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
                     } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
-                        matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
+                        matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
                     } else if ("fuzzy_transpositions".equals(currentFieldName)) {
                         matchQuery.setTranspositions(parser.booleanValue());
                     } else if ("lenient".equals(currentFieldName)) {

+ 2 - 2
core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java

@@ -114,9 +114,9 @@ public class MultiMatchQueryParser implements QueryParser {
                 } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
                     minimumShouldMatch = parser.textOrNull();
                 } else if ("rewrite".equals(currentFieldName)) {
-                    multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
+                    multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
                 } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
-                    multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
+                    multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
                 } else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) {
                     useDisMax = parser.booleanValue();
                 } else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {

+ 1 - 1
core/src/main/java/org/elasticsearch/index/query/PrefixQueryParser.java

@@ -97,7 +97,7 @@ public class PrefixQueryParser implements QueryParser {
             throw new QueryParsingException(parseContext, "No value specified for prefix query");
         }
 
-        MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
+        MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);
 
         Query query = null;
         MappedFieldType fieldType = parseContext.fieldMapper(fieldName);

+ 2 - 2
core/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java

@@ -175,7 +175,7 @@ public class QueryStringQueryParser implements QueryParser {
                 } else if ("fuzzy_max_expansions".equals(currentFieldName) || "fuzzyMaxExpansions".equals(currentFieldName)) {
                     qpSettings.fuzzyMaxExpansions(parser.intValue());
                 } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
-                    qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
+                    qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
                 } else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
                     qpSettings.phraseSlop(parser.intValue());
                 } else if (parseContext.parseFieldMatcher().match(currentFieldName, FUZZINESS)) {
@@ -187,7 +187,7 @@ public class QueryStringQueryParser implements QueryParser {
                 } else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
                     qpSettings.analyzeWildcard(parser.booleanValue());
                 } else if ("rewrite".equals(currentFieldName)) {
-                    qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
+                    qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
                 } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
                     qpSettings.minimumShouldMatch(parser.textOrNull());
                 } else if ("quote_field_suffix".equals(currentFieldName) || "quoteFieldSuffix".equals(currentFieldName)) {

+ 1 - 1
core/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java

@@ -109,7 +109,7 @@ public class RegexpQueryParser implements QueryParser {
             throw new QueryParsingException(parseContext, "No value specified for regexp query");
         }
 
-        MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
+        MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);
 
         Query query = null;
         MappedFieldType fieldType = parseContext.fieldMapper(fieldName);

+ 2 - 3
core/src/main/java/org/elasticsearch/index/query/WildcardQueryParser.java

@@ -25,7 +25,6 @@ import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.support.QueryParsers;
 
@@ -103,8 +102,8 @@ public class WildcardQueryParser implements QueryParser {
         }
 
         WildcardQuery wildcardQuery = new WildcardQuery(new Term(fieldName, valueBytes));
-        QueryParsers.setRewriteMethod(wildcardQuery, rewriteMethod);
-        wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(rewriteMethod));
+        QueryParsers.setRewriteMethod(wildcardQuery, parseContext.parseFieldMatcher(), rewriteMethod);
+        wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod));
         wildcardQuery.setBoost(boost);
         if (queryName != null) {
             parseContext.addNamedQuery(queryName, wildcardQuery);

+ 43 - 30
core/src/main/java/org/elasticsearch/index/query/support/QueryParsers.java

@@ -20,14 +20,22 @@
 package org.elasticsearch.index.query.support;
 
 import org.apache.lucene.search.MultiTermQuery;
-
 import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.ParseFieldMatcher;
 
 /**
  *
  */
 public final class QueryParsers {
 
+    private static final ParseField CONSTANT_SCORE = new ParseField("constant_score", "constant_score_auto", "constant_score_filter");
+    private static final ParseField SCORING_BOOLEAN = new ParseField("scoring_boolean");
+    private static final ParseField CONSTANT_SCORE_BOOLEAN = new ParseField("constant_score_boolean");
+    private static final ParseField TOP_TERMS = new ParseField("top_terms_");
+    private static final ParseField TOP_TERMS_BOOST = new ParseField("top_terms_boost_");
+    private static final ParseField TOP_TERMS_BLENDED_FREQS = new ParseField("top_terms_blended_freqs_");
+
     private QueryParsers() {
 
     }
@@ -39,50 +47,55 @@ public final class QueryParsers {
         query.setRewriteMethod(rewriteMethod);
     }
 
-    public static void setRewriteMethod(MultiTermQuery query, @Nullable String rewriteMethod) {
+    public static void setRewriteMethod(MultiTermQuery query, ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
         if (rewriteMethod == null) {
             return;
         }
-        query.setRewriteMethod(parseRewriteMethod(rewriteMethod));
+        query.setRewriteMethod(parseRewriteMethod(matcher, rewriteMethod));
     }
 
-    public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod) {
-        return parseRewriteMethod(rewriteMethod, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
+        return parseRewriteMethod(matcher, rewriteMethod, MultiTermQuery.CONSTANT_SCORE_REWRITE);
     }
 
-    public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
+    public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
         if (rewriteMethod == null) {
             return defaultRewriteMethod;
         }
-        if ("constant_score_auto".equals(rewriteMethod) || "constant_score_auto".equals(rewriteMethod)) {
-            return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
-        }
-        if ("scoring_boolean".equals(rewriteMethod) || "scoringBoolean".equals(rewriteMethod)) {
-            return MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
-        }
-        if ("constant_score_boolean".equals(rewriteMethod) || "constantScoreBoolean".equals(rewriteMethod)) {
-            return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
-        }
-        if ("constant_score_filter".equals(rewriteMethod) || "constantScoreFilter".equals(rewriteMethod)) {
-            return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
+        if (matcher.match(rewriteMethod, CONSTANT_SCORE)) {
+            return MultiTermQuery.CONSTANT_SCORE_REWRITE;
         }
-        if (rewriteMethod.startsWith("top_terms_boost_")) {
-            int size = Integer.parseInt(rewriteMethod.substring("top_terms_boost_".length()));
-            return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
+        if (matcher.match(rewriteMethod, SCORING_BOOLEAN)) {
+            return MultiTermQuery.SCORING_BOOLEAN_REWRITE;
         }
-        if (rewriteMethod.startsWith("topTermsBoost")) {
-            int size = Integer.parseInt(rewriteMethod.substring("topTermsBoost".length()));
-            return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
+        if (matcher.match(rewriteMethod, CONSTANT_SCORE_BOOLEAN)) {
+            return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE;
         }
-        if (rewriteMethod.startsWith("top_terms_")) {
-            int size = Integer.parseInt(rewriteMethod.substring("top_terms_".length()));
-            return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
+
+        int firstDigit = -1;
+        for (int i = 0; i < rewriteMethod.length(); ++i) {
+            if (Character.isDigit(rewriteMethod.charAt(i))) {
+                firstDigit = i;
+                break;
+            }
         }
-        if (rewriteMethod.startsWith("topTerms")) {
-            int size = Integer.parseInt(rewriteMethod.substring("topTerms".length()));
-            return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
+
+        if (firstDigit >= 0) {
+            final int size = Integer.parseInt(rewriteMethod.substring(firstDigit));
+            String rewriteMethodName = rewriteMethod.substring(0, firstDigit);
+
+            if (matcher.match(rewriteMethodName, TOP_TERMS)) {
+                return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
+            }
+            if (matcher.match(rewriteMethodName, TOP_TERMS_BOOST)) {
+                return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
+            }
+            if (matcher.match(rewriteMethodName, TOP_TERMS_BLENDED_FREQS)) {
+                return new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(size);
+            }
         }
+
         throw new IllegalArgumentException("Failed to parse rewrite_method [" + rewriteMethod + "]");
     }
-    
+
 }

+ 16 - 0
core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.memory.MemoryIndex;
@@ -29,6 +30,7 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery;
 import org.apache.lucene.queries.TermsQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
 import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.search.spans.*;
 import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
@@ -68,6 +70,7 @@ import org.junit.Before;
 import org.junit.Test;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
 
@@ -428,6 +431,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         assertThat(parsedQuery, instanceOf(FuzzyQuery.class));
         FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery;
         assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh")));
+        assertThat(fuzzyQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
     }
 
     @Test
@@ -2423,4 +2427,16 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
         q = csq.getQuery();
         assertThat(q, instanceOf(TermsQuery.class));
     }
+
+    @Test
+    public void testBlendedRewriteMethod() throws IOException {
+        IndexQueryParserService queryParser = queryParser();
+        for (String rewrite : Arrays.asList("top_terms_blended_freqs_10", "topTermsBlendedFreqs10")) {
+            Query parsedQuery = queryParser.parse(prefixQuery("field", "val").rewrite(rewrite)).query();
+            assertThat(parsedQuery, instanceOf(PrefixQuery.class));
+            PrefixQuery prefixQuery = (PrefixQuery) parsedQuery;
+            assertThat(prefixQuery.getPrefix(), equalTo(new Term("field", "val")));
+            assertThat(prefixQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
+        }
+    }
 }

+ 11 - 8
docs/reference/query-dsl/multi-term-rewrite.asciidoc

@@ -10,9 +10,11 @@ also happens on the
 All of those queries allow to control how they will get rewritten using
 the `rewrite` parameter:
 
-* When not set, or set to `constant_score_auto`, defaults to
-automatically choosing either `constant_score_boolean` or
-`constant_score_filter` based on query characteristics.
+* `constant_score` (default): A rewrite method that performs like
+`constant_score_boolean` when there are few matching terms and otherwise
+visits all matching terms in sequence and marks documents for that term.
+Matching documents are assigned a constant score equal to the query's
+boost.
 * `scoring_boolean`: A rewrite method that first translates each term
 into a should clause in a boolean query, and keeps the scores as
 computed by the query. Note that typically such scores are meaningless
@@ -25,10 +27,6 @@ are not computed. Instead, each matching document receives a constant
 score equal to the query's boost. This rewrite method will hit too many
 clauses failure if it exceeds the boolean query limit (defaults to
 `1024`).
-* `constant_score_filter`: A rewrite method that first creates a private
-Filter by visiting each term in sequence and marking all docs for that
-term. Matching documents are assigned a constant score equal to the
-query's boost.
 * `top_terms_N`: A rewrite method that first translates each term into
 should clause in boolean query, and keeps the scores as computed by the
 query. This rewrite method only uses the top scoring terms so it will
@@ -39,4 +37,9 @@ into should clause in boolean query, but the scores are only computed as
 the boost. This rewrite method only uses the top scoring terms so it
 will not overflow the boolean max clause count. The `N` controls the
 size of the top scoring terms to use.
-
+* `top_terms_blended_freqs_N`: A rewrite method that first translates each
+term into should clause in boolean query, but all term queries compute scores
+as if they had the same frequency. In practice the frequency which is used
+is the maximum frequency of all matching terms. This rewrite method only uses
+the top scoring terms so it will not overflow boolean max clause count. The
+`N` controls the size of the top scoring terms to use.