소스 검색

percolator: Add support dor MatchNoDocsQuery in query terms extract service

Before the query extraction would have been aborted and the percolator query would be marked as unknown.
This resulted in a situation that these queries always need to be evaluated by the memory index at search time.
By adding support for this query many more percolator query candidate hits can skip the expensive memory index verification step. For example the `match` query parser returns a MatchNoDocsQuery if the query terms are removed by text analysis (lets query text only contained stop words).
Martijn van Groningen 9 년 전
부모
커밋
c1a0929123

+ 10 - 10
core/src/main/java/org/elasticsearch/index/percolator/ExtractQueryTermsService.java

@@ -37,19 +37,16 @@ import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
-import org.apache.lucene.search.spans.SpanContainingQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
-import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.SpanWithinQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.logging.LoggerMessageFormat;
+import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.ParseContext;
 
 import java.io.IOException;
@@ -72,11 +69,11 @@ public final class ExtractQueryTermsService {
 
     /**
      * Extracts all terms from the specified query and adds it to the specified document.
-     *  @param query The query to extract terms from
-     * @param document The document to add the extracted terms to
-     * @param queryTermsFieldField The field in the document holding the extracted terms
-     * @param unknownQueryField The field used to mark a document that not all query terms could be extracted. For example
-     *                          the query contained an unsupported query (e.g. WildcardQuery).
+     * @param query                 The query to extract terms from
+     * @param document              The document to add the extracted terms to
+     * @param queryTermsFieldField  The field in the document holding the extracted terms
+     * @param unknownQueryField     The field used to mark a document that not all query terms could be extracted.
+     *                              For example the query contained an unsupported query (e.g. WildcardQuery).
      * @param fieldType The field type for the query metadata field
      */
     public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField, String unknownQueryField, FieldType fieldType) {
@@ -106,7 +103,10 @@ public final class ExtractQueryTermsService {
      * an UnsupportedQueryException is thrown.
      */
     static Set<Term> extractQueryTerms(Query query) {
-        if (query instanceof TermQuery) {
+        if (query instanceof MatchNoDocsQuery) {
+            // no terms to extract as this query matches no docs
+            return Collections.emptySet();
+        } else if (query instanceof TermQuery) {
             return Collections.singleton(((TermQuery) query).getTerm());
         } else if (query instanceof TermsQuery) {
             Set<Term> terms = new HashSet<>();

+ 18 - 0
core/src/test/java/org/elasticsearch/index/percolator/ExtractQueryTermsServiceTests.java

@@ -41,6 +41,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.ParseContext;
 import org.elasticsearch.test.ESTestCase;
 
@@ -296,6 +297,23 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
         assertTermsEqual(terms, spanTermQuery1.getTerm());
     }
 
+    public void testExtractQueryMetadata_matchNoDocsQuery() {
+        Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
+        assertEquals(0, terms.size());
+
+        BooleanQuery.Builder bq = new BooleanQuery.Builder();
+        bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
+        bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
+        terms = ExtractQueryTermsService.extractQueryTerms(bq.build());
+        assertEquals(0, terms.size());
+
+        bq = new BooleanQuery.Builder();
+        bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
+        bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
+        terms = ExtractQueryTermsService.extractQueryTerms(bq.build());
+        assertTermsEqual(terms, new Term("field", "value"));
+    }
+
     public void testExtractQueryMetadata_unsupportedQuery() {
         TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
 

+ 24 - 1
docs/reference/query-dsl/percolate-query.asciidoc

@@ -333,4 +333,27 @@ of the query gets stored, but also the query's terms are analyzed and stored int
 At search time, the document specified in the request gets parsed into a Lucene document and is stored in a in-memory
 temporary Lucene index. This in-memory index can just hold this one document and it is optimized for that. After this
 a special query is build based on the terms in the in-memory index that select candidate percolator queries based on
-their indexed query terms. These queries are then evaluated by the in-memory index if they actually match.
+their indexed query terms. These queries are then evaluated by the in-memory index if they actually match.
+
+The selecting of candidate percolator queries matches is an important performance optimization during the execution
+of the `percolate` query as it can significantly reduce the number of candidate matches the in-memory index need to
+evaluate. The reason the `percolate` query can do this is because during indexing of the percolator queries the query
+terms are being extracted and indexed with the percolator query. Unfortunately the percolator cannot extract terms from
+all queries (for example the `wildcard` or `geo_shape` query) and as a result of that in certain cases the percolator
+can't do the selecting optimization (for example if an unsupported query is defined in a required clause of a boolean query
+or the unsupported query is the only query in the percolator document).  These queries are marked by the percolator and
+can be found by running the following search:
+
+[source,js]
+--------------------------------------------------
+curl -XGET "http://localhost:9200/_search" -d'
+{
+  "query": {
+    "term" : {
+      "query.unknown_query" : ""
+    }
+  }
+}'
+--------------------------------------------------
+
+NOTE: The above example assumes that there is a `query` field of type `percolator` in the mappings.