Browse Source

Use FieldMapper to create the low level term queries in CommonTermQuery

Closes #5258
Simon Willnauer 11 years ago
parent
commit
fe9de7fba2

+ 106 - 5
src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java

@@ -19,8 +19,20 @@
 
 package org.apache.lucene.queries;
 
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.Version;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.index.mapper.FieldMapper;
+
+import java.io.IOException;
 
 /**
  * Extended version of {@link CommonTermsQuery} that allows to pass in a
@@ -29,12 +41,11 @@ import org.elasticsearch.common.lucene.search.Queries;
  */
 public class ExtendedCommonTermsQuery extends CommonTermsQuery {
 
-    public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
-        super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
-    }
+    private final FieldMapper<?> mapper;
 
-    public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
-        super(highFreqOccur, lowFreqOccur, maxTermFrequency);
+    public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
+        super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
+        this.mapper = mapper;
     }
 
     private String lowFreqMinNumShouldMatchSpec;
@@ -72,4 +83,94 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
     public String getLowFreqMinimumNumberShouldMatchSpec() {
         return lowFreqMinNumShouldMatchSpec;
     }
+
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+        if (this.terms.isEmpty()) {
+            return new BooleanQuery();
+        } else if (this.terms.size() == 1) {
+            final Query tq = newTermQuery(this.terms.get(0), null);
+            tq.setBoost(getBoost());
+            return tq;
+        }
+        return super.rewrite(reader);
+    }
+
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    @Override
+    protected Query buildQuery(final int maxDoc,
+                               final TermContext[] contextArray, final Term[] queryTerms) {
+        BooleanQuery lowFreq = new BooleanQuery(disableCoord);
+        BooleanQuery highFreq = new BooleanQuery(disableCoord);
+        highFreq.setBoost(highFreqBoost);
+        lowFreq.setBoost(lowFreqBoost);
+        BooleanQuery query = new BooleanQuery(true);
+        for (int i = 0; i < queryTerms.length; i++) {
+            TermContext termContext = contextArray[i];
+            if (termContext == null) {
+                lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
+            } else {
+                if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
+                        || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
+                    highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
+                } else {
+                    lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
+                }
+            }
+
+        }
+        final int numLowFreqClauses = lowFreq.clauses().size();
+        final int numHighFreqClauses = highFreq.clauses().size();
+        if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
+            int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
+            lowFreq.setMinimumNumberShouldMatch(minMustMatch);
+        }
+        if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
+            int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
+            highFreq.setMinimumNumberShouldMatch(minMustMatch);
+        }
+        if (lowFreq.clauses().isEmpty()) {
+          /*
+           * if lowFreq is empty we rewrite the high freq terms in a conjunction to
+           * prevent slow queries.
+           */
+            if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
+                for (BooleanClause booleanClause : highFreq) {
+                    booleanClause.setOccur(Occur.MUST);
+                }
+            }
+            highFreq.setBoost(getBoost());
+            return highFreq;
+        } else if (highFreq.clauses().isEmpty()) {
+            // only do low freq terms - we don't have high freq terms
+            lowFreq.setBoost(getBoost());
+            return lowFreq;
+        } else {
+            query.add(highFreq, Occur.SHOULD);
+            query.add(lowFreq, Occur.MUST);
+            query.setBoost(getBoost());
+            return query;
+        }
+    }
+
+    static {
+        assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
+    }
+
+    //@Override
+    // LUCENE-UPGRADE: remove this method if on 4.8
+    protected Query newTermQuery(Term term, TermContext context) {
+        if (mapper == null) {
+            // this should be super.newTermQuery(term, context) once it's available in the super class
+            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+        }
+        final Query query = mapper.queryStringTermQuery(term);
+        if (query == null) {
+            // this should be super.newTermQuery(term, context) once it's available in the super class
+            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+        } else {
+            return query;
+        }
+    }
 }

+ 12 - 13
src/main/java/org/elasticsearch/index/query/CommonTermsQueryParser.java

@@ -166,19 +166,6 @@ public class CommonTermsQueryParser implements QueryParser {
         if (value == null) {
             throw new QueryParsingException(parseContext.index(), "No text specified for text query");
         }
-        ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
-        commonsQuery.setBoost(boost);
-        Query query = parseQueryString(commonsQuery, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
-        if (queryName != null) {
-            parseContext.addNamedQuery(queryName, query);
-        }
-        return query;
-    }
-
-
-    private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
-            String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
-
         FieldMapper<?> mapper = null;
         String field;
         MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
@@ -207,6 +194,18 @@ public class CommonTermsQueryParser implements QueryParser {
             }
         }
 
+        ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords, mapper);
+        commonsQuery.setBoost(boost);
+        Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch, smartNameFieldMappers);
+        if (queryName != null) {
+            parseContext.addNamedQuery(queryName, query);
+        }
+        return query;
+    }
+
+
+    private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
+            Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch, MapperService.SmartNameFieldMappers smartNameFieldMappers) throws IOException {
         // Logic similar to QueryParser#getFieldQuery
         TokenStream source = analyzer.tokenStream(field, queryString.toString());
         int count = 0;

+ 3 - 3
src/main/java/org/elasticsearch/index/search/MatchQuery.java

@@ -209,7 +209,7 @@ public class MatchQuery {
                 if (commonTermsCutoff == null) {
                     query = builder.createBooleanQuery(field, value.toString(), occur);
                 } else {
-                    query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff);
+                    query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff, mapper);
                 }
                 break;
             case PHRASE:
@@ -276,11 +276,11 @@ public class MatchQuery {
             return query;
         }
 
-        public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
+        public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, FieldMapper<?> mapper) {
             Query booleanQuery = createBooleanQuery(field, queryText, Occur.SHOULD);
             if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
                 BooleanQuery bq = (BooleanQuery) booleanQuery;
-                ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled());
+                ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled(), mapper);
                 for (BooleanClause clause : bq.clauses()) {
                     if (!(clause.getQuery() instanceof TermQuery)) {
                         return booleanQuery;

+ 15 - 0
src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java

@@ -252,6 +252,21 @@ public class SimpleQueryTests extends ElasticsearchIntegrationTest {
         }
     }
 
+    @Test
+    public void testCommonTermsQueryOnAllField() throws Exception {
+        client().admin().indices().prepareCreate("test")
+                .addMapping("type1", "message", "type=string", "comment", "type=string,boost=5.0")
+                .setSettings(SETTING_NUMBER_OF_SHARDS, 1).get();
+        indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("message", "test message", "comment", "whatever"),
+                client().prepareIndex("test", "type1", "2").setSource("message", "hello world", "comment", "test comment"));
+
+        SearchResponse searchResponse = client().prepareSearch().setQuery(commonTerms("_all", "test")).get();
+        assertHitCount(searchResponse, 2l);
+        assertFirstHit(searchResponse, hasId("2"));
+        assertSecondHit(searchResponse, hasId("1"));
+        assertThat(searchResponse.getHits().getHits()[0].getScore(), greaterThan(searchResponse.getHits().getHits()[1].getScore()));
+    }
+
     @Test
     public void testCommonTermsQuery() throws Exception {
         client().admin().indices().prepareCreate("test")