Browse Source

Optimize query with types filter in the URL (t/t/_search) (#20979)

This change adds a TypesQuery that checks if the disjunction of types should be rewritten to a MatchAllDocs query. The check is done only if the number of terms is below a threshold (16 by default and configurable via max_boolean_clause).
Jim Ferenczi 9 years ago
parent
commit
d0bbe89c16

+ 55 - 19
core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java

@@ -25,6 +25,9 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
+import org.apache.lucene.queries.TermsQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
@@ -41,6 +44,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
 import org.elasticsearch.index.query.QueryShardContext;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -144,7 +148,7 @@ public class TypeFieldMapper extends MetadataFieldMapper {
             if (indexOptions() == IndexOptions.NONE) {
                 throw new AssertionError();
             }
-            return new TypeQuery(indexedValueForSearch(value));
+            return new TypesQuery(indexedValueForSearch(value));
         }
 
         @Override
@@ -161,26 +165,52 @@ public class TypeFieldMapper extends MetadataFieldMapper {
         }
     }
 
-    public static class TypeQuery extends Query {
+    /**
+     * Specialization for a disjunction over many _type
+     */
+    public static class TypesQuery extends Query {
+        // Same threshold as TermsQuery
+        private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
 
-        private final BytesRef type;
+        private final BytesRef[] types;
 
-        public TypeQuery(BytesRef type) {
-            this.type = Objects.requireNonNull(type);
+        public TypesQuery(BytesRef... types) {
+            if (types == null) {
+                throw new NullPointerException("types cannot be null.");
+            }
+            if (types.length == 0) {
+                throw new IllegalArgumentException("types must contains at least one value.");
+            }
+            this.types = types;
         }
 
         @Override
         public Query rewrite(IndexReader reader) throws IOException {
-            Term term = new Term(CONTENT_TYPE, type);
-            TermContext context = TermContext.build(reader.getContext(), term);
-            if (context.docFreq() == reader.maxDoc()) {
-                // All docs have the same type.
-                // Using a match_all query will help Lucene perform some optimizations
-                // For instance, match_all queries as filter clauses are automatically removed
-                return new MatchAllDocsQuery();
-            } else {
-                return new ConstantScoreQuery(new TermQuery(term, context));
+            final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
+            if (types.length <= threshold) {
+                BooleanQuery.Builder bq = new BooleanQuery.Builder();
+                int totalDocFreq = 0;
+                for (BytesRef type : types) {
+                    Term term = new Term(CONTENT_TYPE, type);
+                    TermContext context = TermContext.build(reader.getContext(), term);
+                    if (context.docFreq() == 0) {
+                        // this _type is not present in the reader
+                        continue;
+                    }
+                    totalDocFreq += context.docFreq();
+                    // strict equality should be enough ?
+                    if (totalDocFreq >= reader.maxDoc()) {
+                        assert totalDocFreq == reader.maxDoc();
+                        // Matches all docs since _type is a single value field
+                        // Using a match_all query will help Lucene perform some optimizations
+                        // For instance, match_all queries as filter clauses are automatically removed
+                        return new MatchAllDocsQuery();
+                    }
+                    bq.add(new TermQuery(term, context), BooleanClause.Occur.SHOULD);
+                }
+                return new ConstantScoreQuery(bq.build());
             }
+            return new TermsQuery(CONTENT_TYPE, types);
         }
 
         @Override
@@ -188,20 +218,26 @@ public class TypeFieldMapper extends MetadataFieldMapper {
             if (sameClassAs(obj) == false) {
                 return false;
             }
-            TypeQuery that = (TypeQuery) obj;
-            return type.equals(that.type);
+            TypesQuery that = (TypesQuery) obj;
+            return Arrays.equals(types, that.types);
         }
 
         @Override
         public int hashCode() {
-            return 31 * classHash() + type.hashCode();
+            return 31 * classHash() + Arrays.hashCode(types);
         }
 
         @Override
         public String toString(String field) {
-            return "_type:" + type;
+            StringBuilder builder = new StringBuilder();
+            for (BytesRef type : types) {
+                if (builder.length() > 0) {
+                    builder.append(' ');
+                }
+                builder.append(new Term(CONTENT_TYPE, type).toString());
+            }
+            return builder.toString();
         }
-
     }
 
     private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) {

+ 1 - 3
core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java

@@ -19,7 +19,6 @@
 
 package org.elasticsearch.search;
 
-import org.apache.lucene.queries.TermsQuery;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
@@ -65,7 +64,6 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
 import org.elasticsearch.search.internal.ScrollContext;
 import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.search.internal.ShardSearchRequest;
-import org.elasticsearch.search.lookup.SearchLookup;
 import org.elasticsearch.search.profile.Profilers;
 import org.elasticsearch.search.query.QueryPhaseExecutionException;
 import org.elasticsearch.search.query.QuerySearchResult;
@@ -295,7 +293,7 @@ final class DefaultSearchContext extends SearchContext {
             for (int i = 0; i < typesBytes.length; i++) {
                 typesBytes[i] = new BytesRef(types[i]);
             }
-            typesFilter = new TermsQuery(TypeFieldMapper.NAME, typesBytes);
+            typesFilter = new TypeFieldMapper.TypesQuery(typesBytes);
         }
 
         if (typesFilter == null && aliasFilter == null && hasNestedFields == false) {

+ 40 - 14
core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java

@@ -34,11 +34,12 @@ import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.TypeFieldMapper;
 import org.junit.Before;
 
+import java.io.IOException;
+
 public class TypeFieldTypeTests extends FieldTypeTestCase {
     @Override
     protected MappedFieldType createDefaultFieldType() {
@@ -56,20 +57,14 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
         });
     }
 
-    public void testTermQuery() throws Exception {
+    public void testTermsQuery() throws Exception {
         Directory dir = newDirectory();
         IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
-        Document doc = new Document();
-        StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO);
-        doc.add(type);
-        w.addDocument(doc);
-        w.addDocument(doc);
-        IndexReader reader = DirectoryReader.open(w);
+        IndexReader reader = openReaderWithNewType("my_type", w);
 
         TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
         ft.setName(TypeFieldMapper.NAME);
         Query query = ft.termQuery("my_type", null);
-
         assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
 
         // Make sure that Lucene actually simplifies the query when there is a single type
@@ -78,13 +73,44 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
         Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery);
         assertEquals(userQuery, rewritten);
 
-        type.setStringValue("my_type2");
-        w.addDocument(doc);
+        // ... and does not rewrite it if there is more than one type
         reader.close();
-        reader = DirectoryReader.open(w);
+        reader = openReaderWithNewType("my_type2", w);
+        Query expected = new ConstantScoreQuery(
+            new BooleanQuery.Builder()
+                .add(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type")), Occur.SHOULD)
+            .build()
+        );
+        assertEquals(expected, query.rewrite(reader));
 
-        assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader));
+        BytesRef[] types =
+            new BytesRef[] {new BytesRef("my_type"), new BytesRef("my_type2"), new BytesRef("my_type3")};
+        // the query should match all documents
+        query = new TypeFieldMapper.TypesQuery(types);
+        assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
+
+        reader.close();
+        reader = openReaderWithNewType("unknown_type", w);
+        // the query cannot rewrite to a match all docs sinc unknown_type is not queried.
+        query = new TypeFieldMapper.TypesQuery(types);
+        expected =
+            new ConstantScoreQuery(
+                new BooleanQuery.Builder()
+                    .add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[0])), Occur.SHOULD)
+                    .add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[1])), Occur.SHOULD)
+                .build()
+            );
+        rewritten = query.rewrite(reader);
+        assertEquals(expected, rewritten);
 
         IOUtils.close(reader, w, dir);
     }
+
+    static DirectoryReader openReaderWithNewType(String type, IndexWriter writer) throws IOException {
+        Document doc = new Document();
+        StringField typeField = new StringField(TypeFieldMapper.NAME, type, Store.NO);
+        doc.add(typeField);
+        writer.addDocument(doc);
+        return DirectoryReader.open(writer);
+    }
 }

+ 1 - 1
core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java

@@ -280,7 +280,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
         assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
         //check the type filter
         assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
-        assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
+        assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
     }
 
     @Override

+ 1 - 1
core/src/test/java/org/elasticsearch/index/query/TypeQueryBuilderTests.java

@@ -40,7 +40,7 @@ public class TypeQueryBuilderTests extends AbstractQueryTestCase<TypeQueryBuilde
         if (createShardContext().getMapperService().documentMapper(queryBuilder.type()) == null) {
             assertEquals(new MatchNoDocsQuery(), query);
         } else {
-            assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(queryBuilder.type())), query);
+            assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(queryBuilder.type())), query);
         }
     }
 

+ 2 - 4
core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java

@@ -19,14 +19,12 @@
 
 package org.elasticsearch.search;
 
-import org.apache.lucene.queries.TermsQuery;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.index.mapper.TypeFieldMapper;
-import org.elasticsearch.search.DefaultSearchContext;
 import org.elasticsearch.test.ESTestCase;
 
 import static org.apache.lucene.search.BooleanClause.Occur.FILTER;
@@ -38,13 +36,13 @@ public class DefaultSearchContextTests extends ESTestCase {
     public void testCreateSearchFilter() {
         Query searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, null, randomBoolean());
         Query expectedQuery = new BooleanQuery.Builder()
-            .add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
+            .add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
             .build();
         assertThat(searchFilter, equalTo(expectedQuery));
 
         searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, new MatchAllDocsQuery(), randomBoolean());
         expectedQuery = new BooleanQuery.Builder()
-            .add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
+            .add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
             .add(new MatchAllDocsQuery(), FILTER)
             .build();
         assertThat(searchFilter, equalTo(expectedQuery));

+ 1 - 1
docs/reference/search/explain.asciidoc

@@ -68,7 +68,7 @@ This will yield the following result:
         "details" : [ ]
       }, {
         "value" : 1.0,
-        "description" : "_type:tweet, product of:",
+        "description" : "*:*, product of:",
         "details" : [
           { "value" : 1.0, "description" : "boost", "details" : [ ] },
           { "value" : 1.0, "description" : "queryNorm", "details" : [ ] }