Browse Source

percolator: Don't verify candidate matches with MemoryIndex that are verified matches

If we don't care about scoring then for certain candidate matches we can be certain, that if they are a candidate match,
then they will always match. So verifying these queries with the MemoryIndex can be skipped.
Martijn van Groningen 9 years ago
parent
commit
599a548998

+ 44 - 3
core/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

@@ -19,6 +19,7 @@
 package org.elasticsearch.common.lucene;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
@@ -27,6 +28,8 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NoDeletionPolicy;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -35,9 +38,11 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.Bits;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
@@ -49,9 +54,6 @@ import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-/**
- *
- */
 public class LuceneTests extends ESTestCase {
     public void testWaitForIndex() throws Exception {
         final MockDirectoryWrapper dir = newMockDirectory();
@@ -355,6 +357,45 @@ public class LuceneTests extends ESTestCase {
         dir.close();
     }
 
+    public void testAsSequentialAccessBits() throws Exception {
+        Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
+
+        Document doc = new Document();
+        doc.add(new StringField("foo", "bar", Store.NO));
+        w.addDocument(doc);
+
+        doc = new Document();
+        w.addDocument(doc);
+
+        doc = new Document();
+        doc.add(new StringField("foo", "bar", Store.NO));
+        w.addDocument(doc);
+
+
+        try (DirectoryReader reader = DirectoryReader.open(w)) {
+            IndexSearcher searcher = newSearcher(reader);
+            Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
+            assertEquals(1, reader.leaves().size());
+            LeafReaderContext leafReaderContext = reader.leaves().get(0);
+            Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
+
+            expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
+            expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
+            assertTrue(bits.get(0));
+            assertTrue(bits.get(0));
+            assertFalse(bits.get(1));
+            assertFalse(bits.get(1));
+            expectThrows(IllegalArgumentException.class, () -> bits.get(0));
+            assertTrue(bits.get(2));
+            assertTrue(bits.get(2));
+            expectThrows(IllegalArgumentException.class, () -> bits.get(1));
+        }
+
+        w.close();
+        dir.close();
+    }
+
     /**
      * Test that the "unmap hack" is detected as supported by lucene.
      * This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038

+ 242 - 106
modules/percolator/src/main/java/org/elasticsearch/percolator/ExtractQueryTermsService.java

@@ -53,10 +53,13 @@ import org.elasticsearch.index.mapper.ParseContext;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
+import java.util.function.Function;
 
 /**
  * Utility to extract query terms from queries and create queries from documents.
@@ -64,64 +67,179 @@ import java.util.Set;
 public final class ExtractQueryTermsService {
 
     private static final byte FIELD_VALUE_SEPARATOR = 0;  // nul code point
+    public static final String EXTRACTION_COMPLETE = "complete";
+    public static final String EXTRACTION_PARTIAL = "partial";
+    public static final String EXTRACTION_FAILED = "failed";
+
+    static final Map<Class<? extends Query>, Function<Query, Result>> queryProcessors;
+
+    static {
+        Map<Class<? extends Query>, Function<Query, Result>> map = new HashMap<>(16);
+        map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
+        map.put(ConstantScoreQuery.class, constantScoreQuery());
+        map.put(BoostQuery.class, boostQuery());
+        map.put(TermQuery.class, termQuery());
+        map.put(TermsQuery.class, termsQuery());
+        map.put(CommonTermsQuery.class, commonTermsQuery());
+        map.put(BlendedTermQuery.class, blendedTermQuery());
+        map.put(PhraseQuery.class, phraseQuery());
+        map.put(SpanTermQuery.class, spanTermQuery());
+        map.put(SpanNearQuery.class, spanNearQuery());
+        map.put(SpanOrQuery.class, spanOrQuery());
+        map.put(SpanFirstQuery.class, spanFirstQuery());
+        map.put(SpanNotQuery.class, spanNotQuery());
+        map.put(BooleanQuery.class, booleanQuery());
+        map.put(DisjunctionMaxQuery.class, disjunctionMaxQuery());
+        queryProcessors = Collections.unmodifiableMap(map);
+    }
 
     private ExtractQueryTermsService() {
     }
 
     /**
      * Extracts all terms from the specified query and adds it to the specified document.
+     *
      * @param query                 The query to extract terms from
      * @param document              The document to add the extracted terms to
      * @param queryTermsFieldField  The field in the document holding the extracted terms
-     * @param unknownQueryField     The field used to mark a document that not all query terms could be extracted.
-     *                              For example the query contained an unsupported query (e.g. WildcardQuery).
-     * @param fieldType The field type for the query metadata field
+     * @param extractionResultField The field contains whether query term extraction was successful, partial or
+     *                              failed. (For example the query contained an unsupported query (e.g. WildcardQuery)
+     *                              then query extraction would fail)
+     * @param fieldType             The field type for the query metadata field
      */
     public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField,
-                                         String unknownQueryField, FieldType fieldType) {
-        Set<Term> queryTerms;
+                                         String extractionResultField, FieldType fieldType) {
+        Result result;
         try {
-            queryTerms = extractQueryTerms(query);
+            result = extractQueryTerms(query);
         } catch (UnsupportedQueryException e) {
-            document.add(new Field(unknownQueryField, new BytesRef(), fieldType));
+            document.add(new Field(extractionResultField, EXTRACTION_FAILED, fieldType));
             return;
         }
-        for (Term term : queryTerms) {
+        for (Term term : result.terms) {
             BytesRefBuilder builder = new BytesRefBuilder();
             builder.append(new BytesRef(term.field()));
             builder.append(FIELD_VALUE_SEPARATOR);
             builder.append(term.bytes());
             document.add(new Field(queryTermsFieldField, builder.toBytesRef(), fieldType));
         }
+        if (result.verified) {
+            document.add(new Field(extractionResultField, EXTRACTION_COMPLETE, fieldType));
+        } else {
+            document.add(new Field(extractionResultField, EXTRACTION_PARTIAL, fieldType));
+        }
+    }
+
+    /**
+     * Creates a terms query containing all terms from all fields of the specified index reader.
+     */
+    public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
+                                              Term... optionalTerms) throws IOException {
+        Objects.requireNonNull(queryMetadataField);
+
+        List<Term> extractedTerms = new ArrayList<>();
+        Collections.addAll(extractedTerms, optionalTerms);
+
+        Fields fields = MultiFields.getFields(indexReader);
+        for (String field : fields) {
+            Terms terms = fields.terms(field);
+            if (terms == null) {
+                continue;
+            }
+
+            BytesRef fieldBr = new BytesRef(field);
+            TermsEnum tenum = terms.iterator();
+            for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
+                BytesRefBuilder builder = new BytesRefBuilder();
+                builder.append(fieldBr);
+                builder.append(FIELD_VALUE_SEPARATOR);
+                builder.append(term);
+                extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
+            }
+        }
+        return new TermsQuery(extractedTerms);
     }
 
     /**
      * Extracts all query terms from the provided query and adds it to specified list.
-     *
+     * <p>
      * From boolean query with no should clauses or phrase queries only the longest term are selected,
      * since that those terms are likely to be the rarest. Boolean query's must_not clauses are always ignored.
-     *
+     * <p>
      * If from part of the query, no query terms can be extracted then term extraction is stopped and
      * an UnsupportedQueryException is thrown.
      */
-    static Set<Term> extractQueryTerms(Query query) {
-        if (query instanceof MatchNoDocsQuery) {
-            // no terms to extract as this query matches no docs
-            return Collections.emptySet();
-        } else if (query instanceof TermQuery) {
-            return Collections.singleton(((TermQuery) query).getTerm());
-        } else if (query instanceof TermsQuery) {
-            Set<Term> terms = new HashSet<>();
+    static Result extractQueryTerms(Query query) {
+        Class queryClass = query.getClass();
+        if (queryClass.isAnonymousClass()) {
+            // Sometimes queries have anonymous classes in that case we need the direct super class.
+            // (for example blended term query)
+            queryClass = queryClass.getSuperclass();
+        }
+        Function<Query, Result> queryProcessor = queryProcessors.get(queryClass);
+        if (queryProcessor != null) {
+            return queryProcessor.apply(query);
+        } else {
+            throw new UnsupportedQueryException(query);
+        }
+    }
+
+    static Function<Query, Result> matchNoDocsQuery() {
+        return (query -> new Result(true, Collections.emptySet()));
+    }
+
+    static Function<Query, Result> constantScoreQuery() {
+        return query -> {
+            Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
+            return extractQueryTerms(wrappedQuery);
+        };
+    }
+
+    static Function<Query, Result> boostQuery() {
+        return query -> {
+            Query wrappedQuery = ((BoostQuery) query).getQuery();
+            return extractQueryTerms(wrappedQuery);
+        };
+    }
+
+    static Function<Query, Result> termQuery() {
+        return (query -> {
+            TermQuery termQuery = (TermQuery) query;
+            return new Result(true, Collections.singleton(termQuery.getTerm()));
+        });
+    }
+
+    static Function<Query, Result> termsQuery() {
+        return query -> {
             TermsQuery termsQuery = (TermsQuery) query;
+            Set<Term> terms = new HashSet<>();
             PrefixCodedTerms.TermIterator iterator = termsQuery.getTermData().iterator();
             for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
                 terms.add(new Term(iterator.field(), term));
             }
-            return terms;
-        } else if (query instanceof PhraseQuery) {
+            return new Result(true, terms);
+        };
+    }
+
+    static Function<Query, Result> commonTermsQuery() {
+        return query -> {
+            List<Term> terms = ((CommonTermsQuery) query).getTerms();
+            return new Result(false, new HashSet<>(terms));
+        };
+    }
+
+    static Function<Query, Result> blendedTermQuery() {
+        return query -> {
+            List<Term> terms = ((BlendedTermQuery) query).getTerms();
+            return new Result(true, new HashSet<>(terms));
+        };
+    }
+
+    static Function<Query, Result> phraseQuery() {
+        return query -> {
             Term[] terms = ((PhraseQuery) query).getTerms();
             if (terms.length == 0) {
-                return Collections.emptySet();
+                return new Result(true, Collections.emptySet());
             }
 
             // the longest term is likely to be the rarest,
@@ -132,19 +250,76 @@ public final class ExtractQueryTermsService {
                     longestTerm = term;
                 }
             }
-            return Collections.singleton(longestTerm);
-        } else if (query instanceof BooleanQuery) {
-            List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
-            boolean hasRequiredClauses = false;
+            return new Result(false, Collections.singleton(longestTerm));
+        };
+    }
+
+    static Function<Query, Result> spanTermQuery() {
+        return query -> {
+            Term term = ((SpanTermQuery) query).getTerm();
+            return new Result(true, Collections.singleton(term));
+        };
+    }
+
+    static Function<Query, Result> spanNearQuery() {
+        return query -> {
+            Set<Term> bestClauses = null;
+            SpanNearQuery spanNearQuery = (SpanNearQuery) query;
+            for (SpanQuery clause : spanNearQuery.getClauses()) {
+                Result temp = extractQueryTerms(clause);
+                bestClauses = selectTermListWithTheLongestShortestTerm(temp.terms, bestClauses);
+            }
+            return new Result(false, bestClauses);
+        };
+    }
+
+    static Function<Query, Result> spanOrQuery() {
+        return query -> {
+            Set<Term> terms = new HashSet<>();
+            SpanOrQuery spanOrQuery = (SpanOrQuery) query;
+            for (SpanQuery clause : spanOrQuery.getClauses()) {
+                terms.addAll(extractQueryTerms(clause).terms);
+            }
+            return new Result(false, terms);
+        };
+    }
+
+    static Function<Query, Result> spanNotQuery() {
+        return query -> {
+            Result result = extractQueryTerms(((SpanNotQuery) query).getInclude());
+            return new Result(false, result.terms);
+        };
+    }
+
+    static Function<Query, Result> spanFirstQuery() {
+        return query -> {
+            Result result = extractQueryTerms(((SpanFirstQuery) query).getMatch());
+            return new Result(false, result.terms);
+        };
+    }
+
+    static Function<Query, Result> booleanQuery() {
+        return query -> {
+            BooleanQuery bq = (BooleanQuery) query;
+            List<BooleanClause> clauses = bq.clauses();
+            int minimumShouldMatch = bq.getMinimumNumberShouldMatch();
+            int numRequiredClauses = 0;
+            int numOptionalClauses = 0;
+            int numProhibitedClauses = 0;
             for (BooleanClause clause : clauses) {
                 if (clause.isRequired()) {
-                    hasRequiredClauses = true;
-                    break;
+                    numRequiredClauses++;
+                }
+                if (clause.isProhibited()) {
+                    numProhibitedClauses++;
+                }
+                if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
+                    numOptionalClauses++;
                 }
             }
-            if (hasRequiredClauses) {
-                UnsupportedQueryException uqe = null;
+            if (numRequiredClauses > 0) {
                 Set<Term> bestClause = null;
+                UnsupportedQueryException uqe = null;
                 for (BooleanClause clause : clauses) {
                     if (clause.isRequired() == false) {
                         // skip must_not clauses, we don't need to remember the things that do *not* match...
@@ -153,77 +328,56 @@ public final class ExtractQueryTermsService {
                         continue;
                     }
 
-                    Set<Term> temp;
+                    Result temp;
                     try {
                         temp = extractQueryTerms(clause.getQuery());
                     } catch (UnsupportedQueryException e) {
                         uqe = e;
                         continue;
                     }
-                    bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
+                    bestClause = selectTermListWithTheLongestShortestTerm(temp.terms, bestClause);
                 }
                 if (bestClause != null) {
-                    return bestClause;
+                    return new Result(false, bestClause);
                 } else {
                     if (uqe != null) {
+                        // we're unable to select the best clause and an exception occurred, so we bail
                         throw uqe;
+                    } else {
+                        // We didn't find a clause and no exception occurred, so this bq only contained MatchNoDocsQueries,
+                        return new Result(true, Collections.emptySet());
                     }
-                    return Collections.emptySet();
                 }
             } else {
-                Set<Term> terms = new HashSet<>();
+                List<Query> disjunctions = new ArrayList<>(numOptionalClauses);
                 for (BooleanClause clause : clauses) {
-                    if (clause.isProhibited()) {
-                        // we don't need to remember the things that do *not* match...
-                        continue;
+                    if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
+                        disjunctions.add(clause.getQuery());
                     }
-                    terms.addAll(extractQueryTerms(clause.getQuery()));
                 }
-                return terms;
+                return handleDisjunction(disjunctions, minimumShouldMatch, numProhibitedClauses > 0);
             }
-        } else if (query instanceof ConstantScoreQuery) {
-            Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
-            return extractQueryTerms(wrappedQuery);
-        } else if (query instanceof BoostQuery) {
-            Query wrappedQuery = ((BoostQuery) query).getQuery();
-            return extractQueryTerms(wrappedQuery);
-        } else if (query instanceof CommonTermsQuery) {
-            List<Term> terms = ((CommonTermsQuery) query).getTerms();
-            return new HashSet<>(terms);
-        } else if (query instanceof BlendedTermQuery) {
-            List<Term> terms = ((BlendedTermQuery) query).getTerms();
-            return new HashSet<>(terms);
-        } else if (query instanceof DisjunctionMaxQuery) {
+        };
+    }
+
+    static Function<Query, Result> disjunctionMaxQuery() {
+        return query -> {
             List<Query> disjuncts = ((DisjunctionMaxQuery) query).getDisjuncts();
-            Set<Term> terms = new HashSet<>();
-            for (Query disjunct : disjuncts) {
-                terms.addAll(extractQueryTerms(disjunct));
-            }
-            return terms;
-        } else if (query instanceof SpanTermQuery) {
-            return Collections.singleton(((SpanTermQuery) query).getTerm());
-        } else if (query instanceof SpanNearQuery) {
-            Set<Term> bestClause = null;
-            SpanNearQuery spanNearQuery = (SpanNearQuery) query;
-            for (SpanQuery clause : spanNearQuery.getClauses()) {
-                Set<Term> temp = extractQueryTerms(clause);
-                bestClause = selectTermListWithTheLongestShortestTerm(temp, bestClause);
-            }
-            return bestClause;
-        } else if (query instanceof SpanOrQuery) {
-            Set<Term> terms = new HashSet<>();
-            SpanOrQuery spanOrQuery = (SpanOrQuery) query;
-            for (SpanQuery clause : spanOrQuery.getClauses()) {
-                terms.addAll(extractQueryTerms(clause));
+            return handleDisjunction(disjuncts, 1, false);
+        };
+    }
+
+    static Result handleDisjunction(List<Query> disjunctions, int minimumShouldMatch, boolean otherClauses) {
+        boolean verified = minimumShouldMatch <= 1 && otherClauses == false;
+        Set<Term> terms = new HashSet<>();
+        for (Query disjunct : disjunctions) {
+            Result subResult = extractQueryTerms(disjunct);
+            if (subResult.verified == false) {
+                verified = false;
             }
-            return terms;
-        } else if (query instanceof SpanFirstQuery) {
-            return extractQueryTerms(((SpanFirstQuery)query).getMatch());
-        } else if (query instanceof SpanNotQuery) {
-            return extractQueryTerms(((SpanNotQuery) query).getInclude());
-        } else {
-            throw new UnsupportedQueryException(query);
+            terms.addAll(subResult.terms);
         }
+        return new Result(verified, terms);
     }
 
     static Set<Term> selectTermListWithTheLongestShortestTerm(Set<Term> terms1, Set<Term> terms2) {
@@ -243,7 +397,7 @@ public final class ExtractQueryTermsService {
         }
     }
 
-    private static int minTermLength(Set<Term> terms) {
+    static int minTermLength(Set<Term> terms) {
         int min = Integer.MAX_VALUE;
         for (Term term : terms) {
             min = Math.min(min, term.bytes().length);
@@ -251,40 +405,22 @@ public final class ExtractQueryTermsService {
         return min;
     }
 
-    /**
-     * Creates a boolean query with a should clause for each term on all fields of the specified index reader.
-     */
-    public static Query createQueryTermsQuery(IndexReader indexReader, String queryMetadataField,
-                                              String unknownQueryField) throws IOException {
-        Objects.requireNonNull(queryMetadataField);
-        Objects.requireNonNull(unknownQueryField);
+    static class Result {
 
-        List<Term> extractedTerms = new ArrayList<>();
-        extractedTerms.add(new Term(unknownQueryField));
-        Fields fields = MultiFields.getFields(indexReader);
-        for (String field : fields) {
-            Terms terms = fields.terms(field);
-            if (terms == null) {
-                continue;
-            }
+        final Set<Term> terms;
+        final boolean verified;
 
-            BytesRef fieldBr = new BytesRef(field);
-            TermsEnum tenum = terms.iterator();
-            for (BytesRef term = tenum.next(); term != null ; term = tenum.next()) {
-                BytesRefBuilder builder = new BytesRefBuilder();
-                builder.append(fieldBr);
-                builder.append(FIELD_VALUE_SEPARATOR);
-                builder.append(term);
-                extractedTerms.add(new Term(queryMetadataField, builder.toBytesRef()));
-            }
+        Result(boolean verified, Set<Term> terms) {
+            this.terms = terms;
+            this.verified = verified;
         }
-        return new TermsQuery(extractedTerms);
+
     }
 
     /**
      * Exception indicating that none or some query terms couldn't extracted from a percolator query.
      */
-    public static class UnsupportedQueryException extends RuntimeException {
+    static class UnsupportedQueryException extends RuntimeException {
 
         private final Query unsupportedQuery;
 

+ 42 - 12
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java

@@ -28,12 +28,15 @@ import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Bits;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 
 import java.io.IOException;
 import java.util.Objects;
@@ -54,6 +57,7 @@ public final class PercolateQuery extends Query implements Accountable {
         private final IndexSearcher percolatorIndexSearcher;
 
         private Query queriesMetaDataQuery;
+        private Query verifiedQueriesQuery = new MatchNoDocsQuery("");
         private Query percolateTypeQuery;
 
         /**
@@ -64,21 +68,32 @@ public final class PercolateQuery extends Query implements Accountable {
          */
         public Builder(String docType, QueryStore queryStore, BytesReference documentSource, IndexSearcher percolatorIndexSearcher) {
             this.docType = Objects.requireNonNull(docType);
+            this.queryStore = Objects.requireNonNull(queryStore);
             this.documentSource = Objects.requireNonNull(documentSource);
             this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
-            this.queryStore = Objects.requireNonNull(queryStore);
         }
 
         /**
          * Optionally sets a query that reduces the number of queries to percolate based on extracted terms from
          * the document to be percolated.
-         *
-         * @param extractedTermsFieldName The name of the field to get the extracted terms from
-         * @param unknownQueryFieldname The field used to mark documents whose queries couldn't all get extracted
+         * @param extractedTermsFieldName   The name of the field to get the extracted terms from
+         * @param extractionResultField     The field to indicate for a document whether query term extraction was complete,
+         *                                  partial or failed. If query extraction was complete, the MemoryIndex doesn't
          */
-        public void extractQueryTermsQuery(String extractedTermsFieldName, String unknownQueryFieldname) throws IOException {
+        public void extractQueryTermsQuery(String extractedTermsFieldName, String extractionResultField) throws IOException {
+            // We can only skip the MemoryIndex verification when percolating a single document.
+            // When the document being percolated contains a nested object field then the MemoryIndex contains multiple
+            // documents. In this case the term query that indicates whether memory index verification can be skipped
+            // can incorrectly indicate that non nested queries would match, while their nested variants would not.
+            if (percolatorIndexSearcher.getIndexReader().maxDoc() == 1) {
+                this.verifiedQueriesQuery = new TermQuery(new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_COMPLETE));
+            }
             this.queriesMetaDataQuery = ExtractQueryTermsService.createQueryTermsQuery(
-                    percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName, unknownQueryFieldname
+                    percolatorIndexSearcher.getIndexReader(), extractedTermsFieldName,
+                    // include extractionResultField:failed, because docs with this term have no extractedTermsField
+                    // and otherwise we would fail to return these docs. Docs that failed query term extraction
+                    // always need to be verified by MemoryIndex:
+                    new Term(extractionResultField, ExtractQueryTermsService.EXTRACTION_FAILED)
             );
         }
 
@@ -94,14 +109,15 @@ public final class PercolateQuery extends Query implements Accountable {
                 throw new IllegalStateException("Either filter by deprecated percolator type or by query metadata");
             }
             // The query that selects which percolator queries will be evaluated by MemoryIndex:
-            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            BooleanQuery.Builder queriesQuery = new BooleanQuery.Builder();
             if (percolateTypeQuery != null) {
-                builder.add(percolateTypeQuery, FILTER);
+                queriesQuery.add(percolateTypeQuery, FILTER);
             }
             if (queriesMetaDataQuery != null) {
-                builder.add(queriesMetaDataQuery, FILTER);
+                queriesQuery.add(queriesMetaDataQuery, FILTER);
             }
-            return new PercolateQuery(docType, queryStore, documentSource, builder.build(), percolatorIndexSearcher);
+            return new PercolateQuery(docType, queryStore, documentSource, queriesQuery.build(), percolatorIndexSearcher,
+                    verifiedQueriesQuery);
         }
 
     }
@@ -110,22 +126,25 @@ public final class PercolateQuery extends Query implements Accountable {
     private final QueryStore queryStore;
     private final BytesReference documentSource;
     private final Query percolatorQueriesQuery;
+    private final Query verifiedQueriesQuery;
     private final IndexSearcher percolatorIndexSearcher;
 
     private PercolateQuery(String documentType, QueryStore queryStore, BytesReference documentSource,
-                           Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher) {
+                           Query percolatorQueriesQuery, IndexSearcher percolatorIndexSearcher, Query verifiedQueriesQuery) {
         this.documentType = documentType;
         this.documentSource = documentSource;
         this.percolatorQueriesQuery = percolatorQueriesQuery;
         this.queryStore = queryStore;
         this.percolatorIndexSearcher = percolatorIndexSearcher;
+        this.verifiedQueriesQuery = verifiedQueriesQuery;
     }
 
     @Override
     public Query rewrite(IndexReader reader) throws IOException {
         Query rewritten = percolatorQueriesQuery.rewrite(reader);
         if (rewritten != percolatorQueriesQuery) {
-            return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher);
+            return new PercolateQuery(documentType, queryStore, documentSource, rewritten, percolatorIndexSearcher,
+                    verifiedQueriesQuery);
         } else {
             return this;
         }
@@ -133,6 +152,7 @@ public final class PercolateQuery extends Query implements Accountable {
 
     @Override
     public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+        final Weight verifiedQueriesQueryWeight = verifiedQueriesQuery.createWeight(searcher, false);
         final Weight innerWeight = percolatorQueriesQuery.createWeight(searcher, needsScores);
         return new Weight(this) {
             @Override
@@ -206,6 +226,8 @@ public final class PercolateQuery extends Query implements Accountable {
                         }
                     };
                 } else {
+                    Scorer verifiedDocsScorer = verifiedQueriesQueryWeight.scorer(leafReaderContext);
+                    Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
                     return new BaseScorer(this, approximation, queries, percolatorIndexSearcher) {
 
                         @Override
@@ -214,6 +236,14 @@ public final class PercolateQuery extends Query implements Accountable {
                         }
 
                         boolean matchDocId(int docId) throws IOException {
+                            // We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
+                            // If docId also appears in the verifiedDocsBits then that means during indexing
+                            // we were able to extract all query terms and for this candidate match
+                            // and we determined based on the nature of the query that it is safe to skip
+                            // the MemoryIndex verification.
+                            if (verifiedDocsBits.get(docId)) {
+                                return true;
+                            }
                             Query query = percolatorQueries.getQuery(docId);
                             return query != null && Lucene.exists(percolatorIndexSearcher, query);
                         }

+ 3 - 2
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java

@@ -420,10 +420,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
                         "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
             }
             PercolatorFieldMapper.PercolatorFieldType pft = (PercolatorFieldMapper.PercolatorFieldType) fieldType;
+            PercolateQuery.QueryStore queryStore = createStore(pft, context, mapUnmappedFieldsAsString);
             PercolateQuery.Builder builder = new PercolateQuery.Builder(
-                    documentType, createStore(pft, context, mapUnmappedFieldsAsString), document, docSearcher
+                    documentType, queryStore, document, docSearcher
             );
-            builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getUnknownQueryFieldName());
+            builder.extractQueryTermsQuery(pft.getExtractedTermsField(), pft.getExtractionResultFieldName());
             return builder.build();
         } else {
             Query percolateTypeQuery = new TermQuery(new Term(TypeFieldMapper.NAME, MapperService.PERCOLATOR_LEGACY_TYPE_NAME));

+ 18 - 17
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java

@@ -58,7 +58,7 @@ public class PercolatorFieldMapper extends FieldMapper {
     private static final PercolatorFieldType FIELD_TYPE = new PercolatorFieldType();
 
     public static final String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
-    public static final String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
+    public static final String EXTRACTION_RESULT_FIELD_NAME = "extraction_result";
     public static final String QUERY_BUILDER_FIELD_NAME = "query_builder_field";
 
     public static class Builder extends FieldMapper.Builder<Builder, PercolatorFieldMapper> {
@@ -75,15 +75,15 @@ public class PercolatorFieldMapper extends FieldMapper {
             context.path().add(name());
             KeywordFieldMapper extractedTermsField = createExtractQueryFieldBuilder(EXTRACTED_TERMS_FIELD_NAME, context);
             ((PercolatorFieldType) fieldType).queryTermsField = extractedTermsField.fieldType();
-            KeywordFieldMapper unknownQueryField = createExtractQueryFieldBuilder(UNKNOWN_QUERY_FIELD_NAME, context);
-            ((PercolatorFieldType) fieldType).unknownQueryField = unknownQueryField.fieldType();
+            KeywordFieldMapper extractionResultField = createExtractQueryFieldBuilder(EXTRACTION_RESULT_FIELD_NAME, context);
+            ((PercolatorFieldType) fieldType).extractionResultField = extractionResultField.fieldType();
             BinaryFieldMapper queryBuilderField = createQueryBuilderFieldBuilder(context);
             ((PercolatorFieldType) fieldType).queryBuilderField = queryBuilderField.fieldType();
             context.path().remove();
             setupFieldType(context);
             return new PercolatorFieldMapper(name(), fieldType, defaultFieldType, context.indexSettings(),
                     multiFieldsBuilder.build(this, context), copyTo, queryShardContext, extractedTermsField,
-                    unknownQueryField, queryBuilderField);
+                    extractionResultField, queryBuilderField);
         }
 
         static KeywordFieldMapper createExtractQueryFieldBuilder(String name, BuilderContext context) {
@@ -102,6 +102,7 @@ public class PercolatorFieldMapper extends FieldMapper {
             builder.fieldType().setDocValuesType(DocValuesType.BINARY);
             return builder.build(context);
         }
+
     }
 
     public static class TypeParser implements FieldMapper.TypeParser {
@@ -115,7 +116,7 @@ public class PercolatorFieldMapper extends FieldMapper {
     public static class PercolatorFieldType extends MappedFieldType {
 
         private MappedFieldType queryTermsField;
-        private MappedFieldType unknownQueryField;
+        private MappedFieldType extractionResultField;
         private MappedFieldType queryBuilderField;
 
         public PercolatorFieldType() {
@@ -127,7 +128,7 @@ public class PercolatorFieldMapper extends FieldMapper {
         public PercolatorFieldType(PercolatorFieldType ref) {
             super(ref);
             queryTermsField = ref.queryTermsField;
-            unknownQueryField = ref.unknownQueryField;
+            extractionResultField = ref.extractionResultField;
             queryBuilderField = ref.queryBuilderField;
         }
 
@@ -135,8 +136,8 @@ public class PercolatorFieldMapper extends FieldMapper {
             return queryTermsField.name();
         }
 
-        public String getUnknownQueryFieldName() {
-            return unknownQueryField.name();
+        public String getExtractionResultFieldName() {
+            return extractionResultField.name();
         }
 
         public String getQueryBuilderFieldName() {
@@ -162,17 +163,17 @@ public class PercolatorFieldMapper extends FieldMapper {
     private final boolean mapUnmappedFieldAsString;
     private final QueryShardContext queryShardContext;
     private KeywordFieldMapper queryTermsField;
-    private KeywordFieldMapper unknownQueryField;
+    private KeywordFieldMapper extractionResultField;
     private BinaryFieldMapper queryBuilderField;
 
     public PercolatorFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
                                  Settings indexSettings, MultiFields multiFields, CopyTo copyTo, QueryShardContext queryShardContext,
-                                 KeywordFieldMapper queryTermsField, KeywordFieldMapper unknownQueryField,
+                                 KeywordFieldMapper queryTermsField, KeywordFieldMapper extractionResultField,
                                  BinaryFieldMapper queryBuilderField) {
         super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
         this.queryShardContext = queryShardContext;
         this.queryTermsField = queryTermsField;
-        this.unknownQueryField = unknownQueryField;
+        this.extractionResultField = extractionResultField;
         this.queryBuilderField = queryBuilderField;
         this.mapUnmappedFieldAsString = INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING.get(indexSettings);
     }
@@ -181,18 +182,18 @@ public class PercolatorFieldMapper extends FieldMapper {
     public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
         PercolatorFieldMapper updated = (PercolatorFieldMapper) super.updateFieldType(fullNameToFieldType);
         KeywordFieldMapper queryTermsUpdated = (KeywordFieldMapper) queryTermsField.updateFieldType(fullNameToFieldType);
-        KeywordFieldMapper unknownQueryUpdated = (KeywordFieldMapper) unknownQueryField.updateFieldType(fullNameToFieldType);
+        KeywordFieldMapper extractionResultUpdated = (KeywordFieldMapper) extractionResultField.updateFieldType(fullNameToFieldType);
         BinaryFieldMapper queryBuilderUpdated = (BinaryFieldMapper) queryBuilderField.updateFieldType(fullNameToFieldType);
 
-        if (updated == this || queryTermsUpdated == queryTermsField || unknownQueryUpdated == unknownQueryField
-                || queryBuilderUpdated == queryBuilderField) {
+        if (updated == this && queryTermsUpdated == queryTermsField && extractionResultUpdated == extractionResultField
+                && queryBuilderUpdated == queryBuilderField) {
             return this;
         }
         if (updated == this) {
             updated = (PercolatorFieldMapper) updated.clone();
         }
         updated.queryTermsField = queryTermsUpdated;
-        updated.unknownQueryField = unknownQueryUpdated;
+        updated.extractionResultField = extractionResultUpdated;
         updated.queryBuilderField = queryBuilderUpdated;
         return updated;
     }
@@ -220,7 +221,7 @@ public class PercolatorFieldMapper extends FieldMapper {
         }
 
         Query query = toQuery(queryShardContext, mapUnmappedFieldAsString, queryBuilder);
-        ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), unknownQueryField.name(),
+        ExtractQueryTermsService.extractQueryTerms(query, context.doc(), queryTermsField.name(), extractionResultField.name(),
                 queryTermsField.fieldType());
         return null;
     }
@@ -258,7 +259,7 @@ public class PercolatorFieldMapper extends FieldMapper {
 
     @Override
     public Iterator<Mapper> iterator() {
-        return Arrays.<Mapper>asList(queryTermsField, unknownQueryField, queryBuilderField).iterator();
+        return Arrays.<Mapper>asList(queryTermsField, extractionResultField, queryBuilderField).iterator();
     }
 
     @Override

+ 233 - 60
modules/percolator/src/test/java/org/elasticsearch/percolator/ExtractQueryTermsServiceTests.java

@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.PrefixCodedTerms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.memory.MemoryIndex;
@@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
@@ -44,6 +46,7 @@ import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.ParseContext;
+import org.elasticsearch.percolator.ExtractQueryTermsService.Result;
 import org.elasticsearch.test.ESTestCase;
 
 
@@ -54,18 +57,22 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
+import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
+import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_PARTIAL;
 import static org.elasticsearch.percolator.ExtractQueryTermsService.UnsupportedQueryException;
 import static org.elasticsearch.percolator.ExtractQueryTermsService.extractQueryTerms;
 import static org.elasticsearch.percolator.ExtractQueryTermsService.createQueryTermsQuery;
 import static org.elasticsearch.percolator.ExtractQueryTermsService.selectTermListWithTheLongestShortestTerm;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.sameInstance;
 
 public class ExtractQueryTermsServiceTests extends ESTestCase {
 
     public final static String QUERY_TERMS_FIELD = "extracted_terms";
-    public final static String UNKNOWN_QUERY_FIELD = "unknown_query";
-    public static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
+    public final static String EXTRACTION_RESULT_FIELD = "extraction_result";
+    public final static FieldType QUERY_TERMS_FIELD_TYPE = new FieldType();
 
     static {
         QUERY_TERMS_FIELD_TYPE.setTokenized(false);
@@ -81,33 +88,41 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         bq.add(termQuery2, BooleanClause.Occur.SHOULD);
 
         ParseContext.Document document = new ParseContext.Document();
-        extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
-        Collections.sort(document.getFields(), (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
-        assertThat(document.getFields().size(), equalTo(2));
-        assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
-        assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
-        assertThat(document.getFields().get(1).name(), equalTo(QUERY_TERMS_FIELD));
-        assertThat(document.getFields().get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
+        extractQueryTerms(bq.build(), document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
+        assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_COMPLETE));
+        List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(QUERY_TERMS_FIELD)));
+        Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
+        assertThat(fields.size(), equalTo(2));
+        assertThat(fields.get(0).name(), equalTo(QUERY_TERMS_FIELD));
+        assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field1\u0000term1"));
+        assertThat(fields.get(1).name(), equalTo(QUERY_TERMS_FIELD));
+        assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field2\u0000term2"));
     }
 
     public void testExtractQueryMetadata_unsupported() {
-        BooleanQuery.Builder bq = new BooleanQuery.Builder();
-        TermQuery termQuery1 = new TermQuery(new Term("field1", "term1"));
-        bq.add(termQuery1, BooleanClause.Occur.SHOULD);
-        TermQuery termQuery2 = new TermQuery(new Term("field2", "term2"));
-        bq.add(termQuery2, BooleanClause.Occur.SHOULD);
-
         TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
         ParseContext.Document document = new ParseContext.Document();
-        extractQueryTerms(query, document, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD, QUERY_TERMS_FIELD_TYPE);
+        extractQueryTerms(query, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
         assertThat(document.getFields().size(), equalTo(1));
-        assertThat(document.getFields().get(0).name(), equalTo(UNKNOWN_QUERY_FIELD));
-        assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo(""));
+        assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_FAILED));
+    }
+
+    public void testExtractQueryMetadata_notVerified() {
+        PhraseQuery phraseQuery = new PhraseQuery("field", "term");
+
+        ParseContext.Document document = new ParseContext.Document();
+        extractQueryTerms(phraseQuery, document, QUERY_TERMS_FIELD, EXTRACTION_RESULT_FIELD, QUERY_TERMS_FIELD_TYPE);
+        assertThat(document.getFields().size(), equalTo(2));
+        assertThat(document.getFields().get(0).name(), equalTo(QUERY_TERMS_FIELD));
+        assertThat(document.getFields().get(0).binaryValue().utf8ToString(), equalTo("field\u0000term"));
+        assertThat(document.getField(EXTRACTION_RESULT_FIELD).stringValue(), equalTo(EXTRACTION_PARTIAL));
     }
 
     public void testExtractQueryMetadata_termQuery() {
         TermQuery termQuery = new TermQuery(new Term("_field", "_term"));
-        List<Term> terms = new ArrayList<>(extractQueryTerms(termQuery));
+        Result result = extractQueryTerms(termQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
         assertThat(terms.size(), equalTo(1));
         assertThat(terms.get(0).field(), equalTo(termQuery.getTerm().field()));
         assertThat(terms.get(0).bytes(), equalTo(termQuery.getTerm().bytes()));
@@ -115,7 +130,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
 
     public void testExtractQueryMetadata_termsQuery() {
         TermsQuery termsQuery = new TermsQuery("_field", new BytesRef("_term1"), new BytesRef("_term2"));
-        List<Term> terms = new ArrayList<>(extractQueryTerms(termsQuery));
+        Result result = extractQueryTerms(termsQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(2));
         assertThat(terms.get(0).field(), equalTo("_field"));
@@ -125,7 +142,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
 
         // test with different fields
         termsQuery = new TermsQuery(new Term("_field1", "_term1"), new Term("_field2", "_term2"));
-        terms = new ArrayList<>(extractQueryTerms(termsQuery));
+        result = extractQueryTerms(termsQuery);
+        assertThat(result.verified, is(true));
+        terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(2));
         assertThat(terms.get(0).field(), equalTo("_field1"));
@@ -136,7 +155,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
 
     public void testExtractQueryMetadata_phraseQuery() {
         PhraseQuery phraseQuery = new PhraseQuery("_field", "_term1", "term2");
-        List<Term> terms = new ArrayList<>(extractQueryTerms(phraseQuery));
+        Result result = extractQueryTerms(phraseQuery);
+        assertThat(result.verified, is(false));
+        List<Term> terms = new ArrayList<>(result.terms);
         assertThat(terms.size(), equalTo(1));
         assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
         assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
@@ -157,7 +178,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
 
         BooleanQuery booleanQuery = builder.build();
-        List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
+        Result result = extractQueryTerms(booleanQuery);
+        assertThat("Should clause with phrase query isn't verified, so entire query can't be verified", result.verified, is(false));
+        List<Term> terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(3));
         assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@@ -183,7 +206,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         builder.add(subBuilder.build(), BooleanClause.Occur.SHOULD);
 
         BooleanQuery booleanQuery = builder.build();
-        List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
+        Result result = extractQueryTerms(booleanQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(4));
         assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@@ -204,16 +229,74 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         builder.add(phraseQuery, BooleanClause.Occur.SHOULD);
 
         BooleanQuery booleanQuery = builder.build();
-        List<Term> terms = new ArrayList<>(extractQueryTerms(booleanQuery));
+        Result result = extractQueryTerms(booleanQuery);
+        assertThat(result.verified, is(false));
+        List<Term> terms = new ArrayList<>(result.terms);
         assertThat(terms.size(), equalTo(1));
         assertThat(terms.get(0).field(), equalTo(phraseQuery.getTerms()[0].field()));
         assertThat(terms.get(0).bytes(), equalTo(phraseQuery.getTerms()[0].bytes()));
     }
 
+    public void testExactMatch_booleanQuery() {
+        BooleanQuery.Builder builder = new BooleanQuery.Builder();
+        TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
+        builder.add(termQuery1, BooleanClause.Occur.SHOULD);
+        TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
+        builder.add(termQuery2, BooleanClause.Occur.SHOULD);
+        Result result = extractQueryTerms(builder.build());
+        assertThat("All clauses are exact, so candidate matches are verified", result.verified, is(true));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(termQuery1, BooleanClause.Occur.SHOULD);
+        PhraseQuery phraseQuery1 = new PhraseQuery("_field", "_term1", "_term2");
+        builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
+        result = extractQueryTerms(builder.build());
+        assertThat("Clause isn't exact, so candidate matches are not verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(phraseQuery1, BooleanClause.Occur.SHOULD);
+        PhraseQuery phraseQuery2 = new PhraseQuery("_field", "_term3", "_term4");
+        builder.add(phraseQuery2, BooleanClause.Occur.SHOULD);
+        result = extractQueryTerms(builder.build());
+        assertThat("No clause is exact, so candidate matches are not verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(termQuery1, BooleanClause.Occur.MUST_NOT);
+        builder.add(termQuery2, BooleanClause.Occur.SHOULD);
+        result = extractQueryTerms(builder.build());
+        assertThat("There is a must_not clause, so candidate matches are not verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
+        builder.add(termQuery1, BooleanClause.Occur.SHOULD);
+        builder.add(termQuery2, BooleanClause.Occur.SHOULD);
+        result = extractQueryTerms(builder.build());
+        assertThat("Minimum match is >= 1, so candidate matches are not verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
+        result = extractQueryTerms(builder.build());
+        assertThat("Single required clause, so candidate matches are verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
+        builder.add(termQuery2, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
+        result = extractQueryTerms(builder.build());
+        assertThat("Two or more required clauses, so candidate matches are not verified", result.verified, is(false));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
+        builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
+        result = extractQueryTerms(builder.build());
+        assertThat("Required and prohibited clauses, so candidate matches are not verified", result.verified, is(false));
+    }
+
     public void testExtractQueryMetadata_constantScoreQuery() {
         TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
         ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(termQuery1);
-        List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
+        Result result = extractQueryTerms(constantScoreQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
         assertThat(terms.size(), equalTo(1));
         assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
         assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@@ -222,7 +305,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
     public void testExtractQueryMetadata_boostQuery() {
         TermQuery termQuery1 = new TermQuery(new Term("_field", "_term"));
         BoostQuery constantScoreQuery = new BoostQuery(termQuery1, 1f);
-        List<Term> terms = new ArrayList<>(extractQueryTerms(constantScoreQuery));
+        Result result = extractQueryTerms(constantScoreQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
         assertThat(terms.size(), equalTo(1));
         assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
         assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
@@ -232,7 +317,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
         commonTermsQuery.add(new Term("_field", "_term1"));
         commonTermsQuery.add(new Term("_field", "_term2"));
-        List<Term> terms = new ArrayList<>(extractQueryTerms(commonTermsQuery));
+        Result result = extractQueryTerms(commonTermsQuery);
+        assertThat(result.verified, is(false));
+        List<Term> terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(2));
         assertThat(terms.get(0).field(), equalTo("_field"));
@@ -242,15 +329,17 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
     }
 
     public void testExtractQueryMetadata_blendedTermQuery() {
-        Term[] terms = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
-        BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(terms, false);
-        List<Term> result = new ArrayList<>(extractQueryTerms(commonTermsQuery));
-        Collections.sort(result);
-        assertThat(result.size(), equalTo(2));
-        assertThat(result.get(0).field(), equalTo("_field"));
-        assertThat(result.get(0).text(), equalTo("_term1"));
-        assertThat(result.get(1).field(), equalTo("_field"));
-        assertThat(result.get(1).text(), equalTo("_term2"));
+        Term[] termsArr = new Term[]{new Term("_field", "_term1"), new Term("_field", "_term2")};
+        BlendedTermQuery commonTermsQuery = BlendedTermQuery.booleanBlendedQuery(termsArr, false);
+        Result result = extractQueryTerms(commonTermsQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
+        Collections.sort(terms);
+        assertThat(terms.size(), equalTo(2));
+        assertThat(terms.get(0).field(), equalTo("_field"));
+        assertThat(terms.get(0).text(), equalTo("_term1"));
+        assertThat(terms.get(1).field(), equalTo("_field"));
+        assertThat(terms.get(1).text(), equalTo("_term2"));
     }
 
     public void testExtractQueryMetadata_spanTermQuery() {
@@ -266,8 +355,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         // 4) FieldMaskingSpanQuery is a tricky query so we shouldn't optimize this
 
         SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
-        Set<Term> terms = extractQueryTerms(spanTermQuery1);
-        assertTermsEqual(terms, spanTermQuery1.getTerm());
+        Result result = extractQueryTerms(spanTermQuery1);
+        assertThat(result.verified, is(true));
+        assertTermsEqual(result.terms, spanTermQuery1.getTerm());
     }
 
     public void testExtractQueryMetadata_spanNearQuery() {
@@ -275,48 +365,109 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
         SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("_field", true)
                 .addClause(spanTermQuery1).addClause(spanTermQuery2).build();
-        Set<Term> terms = extractQueryTerms(spanNearQuery);
-        assertTermsEqual(terms, spanTermQuery2.getTerm());
+
+        Result result = extractQueryTerms(spanNearQuery);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, spanTermQuery2.getTerm());
     }
 
     public void testExtractQueryMetadata_spanOrQuery() {
         SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
         SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
         SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2);
-        Set<Term> terms = extractQueryTerms(spanOrQuery);
-        assertTermsEqual(terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
+        Result result = extractQueryTerms(spanOrQuery);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm());
     }
 
     public void testExtractQueryMetadata_spanFirstQuery() {
         SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
         SpanFirstQuery spanFirstQuery = new SpanFirstQuery(spanTermQuery1, 20);
-        Set<Term> terms = extractQueryTerms(spanFirstQuery);
-        assertTermsEqual(terms, spanTermQuery1.getTerm());
+        Result result = extractQueryTerms(spanFirstQuery);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, spanTermQuery1.getTerm());
     }
 
     public void testExtractQueryMetadata_spanNotQuery() {
         SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term"));
         SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term"));
         SpanNotQuery spanNotQuery = new SpanNotQuery(spanTermQuery1, spanTermQuery2);
-        Set<Term> terms = extractQueryTerms(spanNotQuery);
-        assertTermsEqual(terms, spanTermQuery1.getTerm());
+        Result result = extractQueryTerms(spanNotQuery);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, spanTermQuery1.getTerm());
     }
 
     public void testExtractQueryMetadata_matchNoDocsQuery() {
-        Set<Term> terms = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
-        assertEquals(0, terms.size());
+        Result result = extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
+        assertThat(result.verified, is(true));
+        assertEquals(0, result.terms.size());
 
         BooleanQuery.Builder bq = new BooleanQuery.Builder();
         bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
         bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
-        terms = extractQueryTerms(bq.build());
-        assertEquals(0, terms.size());
+        result = extractQueryTerms(bq.build());
+        assertThat(result.verified, is(false));
+        assertEquals(0, result.terms.size());
 
         bq = new BooleanQuery.Builder();
         bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
         bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
-        terms = extractQueryTerms(bq.build());
-        assertTermsEqual(terms, new Term("field", "value"));
+        result = extractQueryTerms(bq.build());
+        assertThat(result.verified, is(true));
+        assertTermsEqual(result.terms, new Term("field", "value"));
+
+        DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(
+                Arrays.asList(new TermQuery(new Term("field", "value")), new MatchNoDocsQuery("sometimes there is no reason at all")),
+                1f
+        );
+        result = extractQueryTerms(disjunctionMaxQuery);
+        assertThat(result.verified, is(true));
+        assertTermsEqual(result.terms, new Term("field", "value"));
+    }
+
+    public void testExtractQueryMetadata_matchAllDocsQuery() {
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(new MatchAllDocsQuery()));
+
+        BooleanQuery.Builder builder = new BooleanQuery.Builder();
+        builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        Result result = extractQueryTerms(builder.build());
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, new Term("field", "value"));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        BooleanQuery bq1 = builder.build();
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq1));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+        BooleanQuery bq2 = builder.build();
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq2));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        BooleanQuery bq3 = builder.build();
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq3));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        BooleanQuery bq4 = builder.build();
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq4));
+
+        builder = new BooleanQuery.Builder();
+        builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
+        builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+        BooleanQuery bq5 = builder.build();
+        expectThrows(UnsupportedQueryException.class, () -> extractQueryTerms(bq5));
     }
 
     public void testExtractQueryMetadata_unsupportedQuery() {
@@ -343,8 +494,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
         BooleanQuery bq1 = builder.build();
 
-        Set<Term> terms = extractQueryTerms(bq1);
-        assertTermsEqual(terms, termQuery1.getTerm());
+        Result result = extractQueryTerms(bq1);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, termQuery1.getTerm());
 
         TermQuery termQuery2 = new TermQuery(new Term("_field", "_longer_term"));
         builder = new BooleanQuery.Builder();
@@ -352,8 +504,9 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         builder.add(termQuery2, BooleanClause.Occur.MUST);
         builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
         bq1 = builder.build();
-        terms = extractQueryTerms(bq1);
-        assertTermsEqual(terms, termQuery2.getTerm());
+        result = extractQueryTerms(bq1);
+        assertThat(result.verified, is(false));
+        assertTermsEqual(result.terms, termQuery2.getTerm());
 
         builder = new BooleanQuery.Builder();
         builder.add(unsupportedQuery, BooleanClause.Occur.MUST);
@@ -372,7 +525,27 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
                 Arrays.asList(termQuery1, termQuery2, termQuery3, termQuery4), 0.1f
         );
 
-        List<Term> terms = new ArrayList<>(extractQueryTerms(disjunctionMaxQuery));
+        Result result = extractQueryTerms(disjunctionMaxQuery);
+        assertThat(result.verified, is(true));
+        List<Term> terms = new ArrayList<>(result.terms);
+        Collections.sort(terms);
+        assertThat(terms.size(), equalTo(4));
+        assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
+        assertThat(terms.get(0).bytes(), equalTo(termQuery1.getTerm().bytes()));
+        assertThat(terms.get(1).field(), equalTo(termQuery2.getTerm().field()));
+        assertThat(terms.get(1).bytes(), equalTo(termQuery2.getTerm().bytes()));
+        assertThat(terms.get(2).field(), equalTo(termQuery3.getTerm().field()));
+        assertThat(terms.get(2).bytes(), equalTo(termQuery3.getTerm().bytes()));
+        assertThat(terms.get(3).field(), equalTo(termQuery4.getTerm().field()));
+        assertThat(terms.get(3).bytes(), equalTo(termQuery4.getTerm().bytes()));
+
+        disjunctionMaxQuery = new DisjunctionMaxQuery(
+                Arrays.asList(termQuery1, termQuery2, termQuery3, new PhraseQuery("_field", "_term4")), 0.1f
+        );
+
+        result = extractQueryTerms(disjunctionMaxQuery);
+        assertThat(result.verified, is(false));
+        terms = new ArrayList<>(result.terms);
         Collections.sort(terms);
         assertThat(terms.size(), equalTo(4));
         assertThat(terms.get(0).field(), equalTo(termQuery1.getTerm().field()));
@@ -394,7 +567,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
 
         IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
         TermsQuery query = (TermsQuery)
-                createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, UNKNOWN_QUERY_FIELD);
+                createQueryTermsQuery(indexReader, QUERY_TERMS_FIELD, new Term(EXTRACTION_RESULT_FIELD, EXTRACTION_FAILED));
 
         PrefixCodedTerms terms = query.getTermData();
         assertThat(terms.size(), equalTo(15L));
@@ -413,7 +586,7 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         assertTermIterator(termIterator, "field2\u0000some", QUERY_TERMS_FIELD);
         assertTermIterator(termIterator, "field2\u0000text", QUERY_TERMS_FIELD);
         assertTermIterator(termIterator, "field4\u0000123", QUERY_TERMS_FIELD);
-        assertTermIterator(termIterator, "", UNKNOWN_QUERY_FIELD);
+        assertTermIterator(termIterator, EXTRACTION_FAILED, EXTRACTION_RESULT_FIELD);
     }
 
     public void testSelectTermsListWithHighestSumOfTermLength() {

+ 192 - 36
modules/percolator/src/test/java/org/elasticsearch/percolator/PercolateQueryTests.java

@@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.memory.MemoryIndex;
@@ -37,14 +38,21 @@ import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterScorer;
+import org.apache.lucene.search.FilteredDocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanNotQuery;
@@ -52,6 +60,7 @@ import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.ParseContext;
 import org.elasticsearch.index.mapper.Uid;
@@ -61,8 +70,11 @@ import org.junit.After;
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.function.Function;
 
 import static org.hamcrest.Matchers.arrayWithSize;
 import static org.hamcrest.Matchers.equalTo;
@@ -72,7 +84,7 @@ public class PercolateQueryTests extends ESTestCase {
 
     public final static String EXTRACTED_TERMS_FIELD_NAME = "extracted_terms";
     public final static String UNKNOWN_QUERY_FIELD_NAME = "unknown_query";
-    public static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
+    public final static FieldType EXTRACTED_TERMS_FIELD_TYPE = new FieldType();
 
     static {
         EXTRACTED_TERMS_FIELD_TYPE.setTokenized(false);
@@ -247,34 +259,91 @@ public class PercolateQueryTests extends ESTestCase {
     }
 
     public void testDuel() throws Exception {
-        int numQueries = scaledRandomIntBetween(32, 256);
-        for (int i = 0; i < numQueries; i++) {
-            String id = Integer.toString(i);
-            Query query;
+        List<Function<String, Query>> queries = new ArrayList<>();
+        queries.add((id) -> new PrefixQuery(new Term("field", id)));
+        queries.add((id) -> new WildcardQuery(new Term("field", id + "*")));
+        queries.add((id) -> new CustomQuery(new Term("field", id)));
+        queries.add((id) -> new SpanTermQuery(new Term("field", id)));
+        queries.add((id) -> new TermQuery(new Term("field", id)));
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            return builder.build();
+        });
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
+            if (randomBoolean()) {
+                builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
+            }
+            if (randomBoolean()) {
+                builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
+            }
+            return builder.build();
+        });
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
+            if (randomBoolean()) {
+                builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
+            }
             if (randomBoolean()) {
-                query = new PrefixQuery(new Term("field", id));
-            } else if (randomBoolean()) {
-                query = new WildcardQuery(new Term("field", id + "*"));
-            } else if (randomBoolean()) {
-                query = new CustomQuery(new Term("field", id + "*"));
-            } else if (randomBoolean()) {
-                query = new SpanTermQuery(new Term("field", id));
-            } else {
-                query = new TermQuery(new Term("field", id));
+                builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
             }
-            addPercolatorQuery(id, query);
+            return builder.build();
+        });
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+            builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+            if (randomBoolean()) {
+                builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
+            }
+            return builder.build();
+        });
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+            builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+            if (randomBoolean()) {
+                builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
+            }
+            return builder.build();
+        });
+        queries.add((id) -> {
+            BooleanQuery.Builder builder = new BooleanQuery.Builder();
+            builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
+            builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
+            builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
+            return builder.build();
+        });
+        queries.add((id) -> new MatchAllDocsQuery());
+        queries.add((id) -> new MatchNoDocsQuery("no reason at all"));
+
+        int numDocs = randomIntBetween(queries.size(), queries.size() * 3);
+        for (int i = 0; i < numDocs; i++) {
+            String id = Integer.toString(i);
+            addPercolatorQuery(id, queries.get(i % queries.size()).apply(id));
         }
 
         indexWriter.close();
         directoryReader = DirectoryReader.open(directory);
         IndexSearcher shardSearcher = newSearcher(directoryReader);
+        // Disable query cache, because ControlQuery cannot be cached...
+        shardSearcher.setQueryCache(null);
 
-        for (int i = 0; i < numQueries; i++) {
-            MemoryIndex memoryIndex = new MemoryIndex();
+        for (int i = 0; i < numDocs; i++) {
             String id = Integer.toString(i);
+            MemoryIndex memoryIndex = new MemoryIndex();
             memoryIndex.addField("field", id, new WhitespaceAnalyzer());
             duelRun(memoryIndex, shardSearcher);
         }
+
+        MemoryIndex memoryIndex = new MemoryIndex();
+        memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
+        duelRun(memoryIndex, shardSearcher);
+        // Empty percolator doc:
+        memoryIndex = new MemoryIndex();
+        duelRun(memoryIndex, shardSearcher);
     }
 
     public void testDuelSpecificQueries() throws Exception {
@@ -312,6 +381,8 @@ public class PercolateQueryTests extends ESTestCase {
         indexWriter.close();
         directoryReader = DirectoryReader.open(directory);
         IndexSearcher shardSearcher = newSearcher(directoryReader);
+        // Disable query cache, because ControlQuery cannot be cached...
+        shardSearcher.setQueryCache(null);
 
         MemoryIndex memoryIndex = new MemoryIndex();
         memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
@@ -332,33 +403,33 @@ public class PercolateQueryTests extends ESTestCase {
     }
 
     private void duelRun(MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
+        boolean requireScore = randomBoolean();
         IndexSearcher percolateSearcher = memoryIndex.createSearcher();
-        PercolateQuery.Builder builder1 = new PercolateQuery.Builder(
+        PercolateQuery.Builder builder = new PercolateQuery.Builder(
                 "docType",
                 queryStore,
                 new BytesArray("{}"),
                 percolateSearcher
         );
         // enables the optimization that prevents queries from being evaluated that don't match
-        builder1.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
-        TopDocs topDocs1 = shardSearcher.search(builder1.build(), 10);
+        builder.extractQueryTermsQuery(EXTRACTED_TERMS_FIELD_NAME, UNKNOWN_QUERY_FIELD_NAME);
+        Query query = requireScore ? builder.build() : new ConstantScoreQuery(builder.build());
+        TopDocs topDocs = shardSearcher.search(query, 10);
 
-        PercolateQuery.Builder builder2 = new PercolateQuery.Builder(
-                "docType",
-                queryStore,
-                new BytesArray("{}"),
-                percolateSearcher
-        );
-        builder2.setPercolateTypeQuery(new MatchAllDocsQuery());
-        TopDocs topDocs2 = shardSearcher.search(builder2.build(), 10);
-        assertThat(topDocs1.totalHits, equalTo(topDocs2.totalHits));
-        assertThat(topDocs1.scoreDocs.length, equalTo(topDocs2.scoreDocs.length));
-        for (int j = 0; j < topDocs1.scoreDocs.length; j++) {
-            assertThat(topDocs1.scoreDocs[j].doc, equalTo(topDocs2.scoreDocs[j].doc));
-            assertThat(topDocs1.scoreDocs[j].score, equalTo(topDocs2.scoreDocs[j].score));
-            Explanation explain1 = shardSearcher.explain(builder1.build(), topDocs1.scoreDocs[j].doc);
-            Explanation explain2 = shardSearcher.explain(builder2.build(), topDocs2.scoreDocs[j].doc);
-            assertThat(explain1.toHtml(), equalTo(explain2.toHtml()));
+        Query controlQuery = new ControlQuery(memoryIndex, queryStore);
+        controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
+        TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
+        assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
+        assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
+        for (int j = 0; j < topDocs.scoreDocs.length; j++) {
+            assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
+            assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
+            if (requireScore) {
+                Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
+                Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
+                assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
+                assertThat(explain1.getValue(), equalTo(explain2.getValue()));
+            }
         }
     }
 
@@ -391,4 +462,89 @@ public class PercolateQueryTests extends ESTestCase {
         }
     }
 
+    private final static class ControlQuery extends Query {
+
+        private final MemoryIndex memoryIndex;
+        private final PercolateQuery.QueryStore queryStore;
+
+        private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
+            this.memoryIndex = memoryIndex;
+            this.queryStore = queryStore;
+        }
+
+        @Override
+        public Weight createWeight(IndexSearcher searcher, boolean needsScores) {
+            return new ConstantScoreWeight(this) {
+
+                float _score;
+
+                @Override
+                public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+                    Scorer scorer = scorer(context);
+                    if (scorer != null) {
+                        int result = scorer.iterator().advance(doc);
+                        if (result == doc) {
+                            return Explanation.match(scorer.score(), "ControlQuery");
+                        }
+                    }
+                    return Explanation.noMatch("ControlQuery");
+                }
+
+                @Override
+                public String toString() {
+                    return "weight(" + ControlQuery.this + ")";
+                }
+
+                @Override
+                public Scorer scorer(LeafReaderContext context) throws IOException {
+                    DocIdSetIterator allDocs = DocIdSetIterator.all(context.reader().maxDoc());
+                    PercolateQuery.QueryStore.Leaf leaf = queryStore.getQueries(context);
+                    FilteredDocIdSetIterator memoryIndexIterator = new FilteredDocIdSetIterator(allDocs) {
+
+                        @Override
+                        protected boolean match(int doc) {
+                            try {
+                                Query query = leaf.getQuery(doc);
+                                float score = memoryIndex.search(query);
+                                if (score != 0f) {
+                                    if (needsScores) {
+                                        _score = score;
+                                    }
+                                    return true;
+                                } else {
+                                    return false;
+                                }
+                            } catch (IOException e) {
+                                throw new RuntimeException(e);
+                            }
+                        }
+                    };
+                    return new FilterScorer(new ConstantScoreScorer(this, score(), memoryIndexIterator)) {
+
+                        @Override
+                        public float score() throws IOException {
+                            return _score;
+                        }
+                    };
+                }
+            };
+        }
+
+        @Override
+        public String toString(String field) {
+            return "control{" + field + "}";
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return sameClassAs(obj);
+        }
+
+        @Override
+        public int hashCode() {
+            return classHash();
+        }
+
+    }
+
 }

+ 15 - 8
modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java

@@ -49,6 +49,8 @@ import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
 import static org.elasticsearch.index.query.QueryBuilders.termsLookupQuery;
 import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
+import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_COMPLETE;
+import static org.elasticsearch.percolator.ExtractQueryTermsService.EXTRACTION_FAILED;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
@@ -71,7 +73,9 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
         indexService = createIndex("test", Settings.EMPTY);
         mapperService = indexService.mapperService();
 
-        String mapper = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
+        String mapper = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("_field_names").field("enabled", false).endObject() // makes testing easier
+            .startObject("properties")
                 .startObject("field").field("type", "text").endObject()
                 .startObject("number_field").field("type", "long").endObject()
                 .startObject("date_field").field("type", "date").endObject()
@@ -96,20 +100,21 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
             .field(fieldName, queryBuilder)
             .endObject().bytes());
 
-        assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(0));
         assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(1));
         assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
         assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
+        assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
+        assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
         BytesRef qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
         assertQueryBuilder(qbSource, queryBuilder);
 
         // add an query for which we don't extract terms from
-        queryBuilder = matchAllQuery();
+        queryBuilder = rangeQuery("field").from("a").to("z");
         doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject()
                 .field(fieldName, queryBuilder)
                 .endObject().bytes());
-        assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName()).length, equalTo(1));
-        assertThat(doc.rootDoc().getFields(fieldType.getUnknownQueryFieldName())[0].binaryValue(), equalTo(new BytesRef()));
+        assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName()).length, equalTo(1));
+        assertThat(doc.rootDoc().getFields(fieldType.getExtractionResultFieldName())[0].stringValue(), equalTo(EXTRACTION_FAILED));
         assertThat(doc.rootDoc().getFields(fieldType.getExtractedTermsField()).length, equalTo(0));
         assertThat(doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName()).length, equalTo(1));
         qbSource = doc.rootDoc().getFields(fieldType.getQueryBuilderFieldName())[0].binaryValue();
@@ -195,6 +200,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
     public void testMultiplePercolatorFields() throws Exception {
         String typeName = "another_type";
         String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
+                .startObject("_field_names").field("enabled", false).endObject() // makes testing easier
                 .startObject("properties")
                     .startObject("query_field1").field("type", "percolator").endObject()
                     .startObject("query_field2").field("type", "percolator").endObject()
@@ -209,7 +215,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                         .field("query_field2", queryBuilder)
                         .endObject().bytes()
         );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(22)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(11)); // also includes _uid (1), type (2), source (1)
         BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
         assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
 
@@ -221,6 +227,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
     public void testNestedPercolatorField() throws Exception {
         String typeName = "another_type";
         String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName)
+                .startObject("_field_names").field("enabled", false).endObject() // makes testing easier
                 .startObject("properties")
                 .startObject("object_field")
                     .field("type", "object")
@@ -238,7 +245,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                             .field("query_field", queryBuilder)
                         .endObject().endObject().bytes()
         );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
         BytesRef queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
         assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
 
@@ -249,7 +256,7 @@ public class PercolatorFieldMapperTests extends ESSingleNodeTestCase {
                             .endArray()
                         .endObject().bytes()
         );
-        assertThat(doc.rootDoc().getFields().size(), equalTo(18)); // also includes all other meta fields
+        assertThat(doc.rootDoc().getFields().size(), equalTo(8)); // also includes _uid (1), type (2), source (1)
         queryBuilderAsBytes = doc.rootDoc().getField("object_field.query_field.query_builder_field").binaryValue();
         assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
 

+ 10 - 3
modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java

@@ -397,12 +397,16 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
                 .addMapping("employee", mapping)
                 .addMapping("queries", "query", "type=percolator")
         );
-        client().prepareIndex("test", "queries", "q").setSource(jsonBuilder().startObject()
+        client().prepareIndex("test", "queries", "q1").setSource(jsonBuilder().startObject()
                 .field("query", QueryBuilders.nestedQuery("employee",
                         QueryBuilders.matchQuery("employee.name", "virginia potts").operator(Operator.AND), ScoreMode.Avg)
                 ).endObject())
-                .setRefreshPolicy(IMMEDIATE)
                 .get();
+        // this query should never match as it doesn't use nested query:
+        client().prepareIndex("test", "queries", "q2").setSource(jsonBuilder().startObject()
+                .field("query", QueryBuilders.matchQuery("employee.name", "virginia")).endObject())
+                .get();
+        client().admin().indices().prepareRefresh().get();
 
         SearchResponse response = client().prepareSearch()
                 .setQuery(new PercolateQueryBuilder("query", "employee",
@@ -413,9 +417,10 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
                                     .startObject().field("name", "tony stark").endObject()
                                 .endArray()
                             .endObject().bytes()))
+                .addSort("_doc", SortOrder.ASC)
                 .get();
         assertHitCount(response, 1);
-        assertThat(response.getHits().getAt(0).getId(), equalTo("q"));
+        assertThat(response.getHits().getAt(0).getId(), equalTo("q1"));
 
         response = client().prepareSearch()
                 .setQuery(new PercolateQueryBuilder("query", "employee",
@@ -426,12 +431,14 @@ public class PercolatorQuerySearchIT extends ESSingleNodeTestCase {
                                     .startObject().field("name", "tony stark").endObject()
                                 .endArray()
                             .endObject().bytes()))
+                .addSort("_doc", SortOrder.ASC)
                 .get();
         assertHitCount(response, 0);
 
         response = client().prepareSearch()
                 .setQuery(new PercolateQueryBuilder("query", "employee",
                         XContentFactory.jsonBuilder().startObject().field("companyname", "notstark").endObject().bytes()))
+                .addSort("_doc", SortOrder.ASC)
                 .get();
         assertHitCount(response, 0);
     }