Переглянути джерело

Return matched_queries in Percolator (#103084)

Return matched_queries for named queries in Percolator.

In a response, each hit together with
a `_percolator_document_slot` field will contain
`_percolator_document_slot_<slotNumber>_matched_queries` fields that will show
which sub-queries matched each percolated document.

Closes #10163
Mayya Sharipova 1 рік тому
батько
коміт
b014843078

+ 6 - 0
docs/changelog/103084.yaml

@@ -0,0 +1,6 @@
+pr: 103084
+summary: Return `matched_queries` in Percolator
+area: Percolator
+type: enhancement
+issues:
+ - 10163

+ 131 - 0
docs/reference/query-dsl/percolate-query.asciidoc

@@ -326,6 +326,7 @@ case the search request would fail with a version conflict error.
 
 The search response returned is identical as in the previous example.
 
+
 ==== Percolate query and highlighting
 
 The `percolate` query is handled in a special way when it comes to highlighting. The queries hits are used
@@ -549,6 +550,136 @@ The slightly different response:
 <1> The highlight fields have been prefixed with the document slot they belong to,
     in order to know which highlight field belongs to what document.
 
+==== Named queries within percolator queries
+
+If a stored percolator query is a complex query, and you want to track which
+its sub-queries matched a percolated document, then you can use the `\_name`
+parameter for its sub-queries. In this case, in a response, each hit together with
+a `_percolator_document_slot` field contains
+`_percolator_document_slot_<slotNumber>_matched_queries` fields that show
+which sub-queries matched each percolated document.
+
+For example:
+
+[source,console]
+--------------------------------------------------
+PUT /my-index-000001/_doc/5?refresh
+{
+  "query": {
+    "bool": {
+      "should": [
+        {
+          "match": {
+            "message": {
+              "query": "Japanese art",
+              "_name": "query1"
+            }
+          }
+        },
+        {
+          "match": {
+            "message": {
+              "query": "Holand culture",
+              "_name": "query2"
+            }
+          }
+        }
+      ]
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console]
+--------------------------------------------------
+GET /my-index-000001/_search
+{
+  "query": {
+    "percolate": {
+      "field": "query",
+      "documents": [
+        {
+          "message": "Japanse art"
+        },
+        {
+          "message": "Holand culture"
+        },
+        {
+          "message": "Japanese art and Holand culture"
+        },
+        {
+          "message": "no-match"
+        }
+      ]
+    }
+  }
+}
+--------------------------------------------------
+// TEST[continued]
+
+[source,console-result]
+--------------------------------------------------
+{
+  "took": 55,
+  "timed_out": false,
+  "_shards": {
+    "total": 1,
+    "successful": 1,
+    "skipped" : 0,
+    "failed": 0
+  },
+  "hits": {
+    "total" : {
+        "value": 1,
+        "relation": "eq"
+    },
+    "max_score": 1.1181908,
+    "hits": [
+      {
+        "_index": "my-index-000001",
+        "_id": "5",
+        "_score": 1.1181908,
+        "_source": {
+          "query": {
+            "bool": {
+              "should": [
+                {
+                  "match": {
+                    "message": {
+                      "query": "Japanese art",
+                      "_name": "query1"
+                    }
+                  }
+                },
+                {
+                  "match": {
+                    "message": {
+                      "query": "Holand culture",
+                      "_name": "query2"
+                    }
+                  }
+                }
+              ]
+            }
+          }
+        },
+        "fields" : {
+          "_percolator_document_slot" : [0, 1, 2],
+          "_percolator_document_slot_0_matched_queries" : ["query1"], <1>
+          "_percolator_document_slot_1_matched_queries" : ["query2"], <2>
+          "_percolator_document_slot_2_matched_queries" : ["query1", "query2"] <3>
+        }
+      }
+    ]
+  }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/"took": 55,/"took": "$body.took",/]
+<1> The first document matched only the first sub-query.
+<2> The second document matched only the second sub-query.
+<3> The third document matched both sub-queries.
+
 ==== Specifying multiple percolate queries
 
 It is possible to specify multiple `percolate` queries in a single search request:

+ 1 - 1
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQuery.java

@@ -143,7 +143,7 @@ final class PercolateQuery extends Query implements Accountable {
                         }
 
                         @Override
-                        public float score() throws IOException {
+                        public float score() {
                             return score;
                         }
                     };

+ 5 - 0
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolateQueryBuilder.java

@@ -664,6 +664,11 @@ public class PercolateQueryBuilder extends AbstractQueryBuilder<PercolateQueryBu
                 CircuitBreakerService circuitBreaker = new NoneCircuitBreakerService();
                 return (IFD) builder.build(cache, circuitBreaker);
             }
+
+            @Override
+            public void addNamedQuery(String name, Query query) {
+                delegate.addNamedQuery(name, query);
+            }
         };
     }
 

+ 2 - 0
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java

@@ -530,6 +530,8 @@ public class PercolatorFieldMapper extends FieldMapper {
         // as an analyzed string.
         wrapped.setAllowUnmappedFields(false);
         wrapped.setMapUnmappedFieldAsString(mapUnmappedFieldsAsString);
+        // We need to rewrite queries with name to Lucene NamedQuery to find matched sub-queries of percolator query
+        wrapped.setRewriteToNamedQueries();
         return wrapped;
     }
 

+ 25 - 1
modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhase.java

@@ -11,6 +11,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NamedMatches;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
@@ -96,7 +97,30 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
 
                     IntStream slots = convertTopDocsToSlots(topDocs, pc.rootDocsBySlot);
                     // _percolator_document_slot fields are document fields and should be under "fields" section in a hit
-                    hitContext.hit().setDocumentField(fieldName, new DocumentField(fieldName, slots.boxed().collect(Collectors.toList())));
+                    List<Object> docSlots = slots.boxed().collect(Collectors.toList());
+                    hitContext.hit().setDocumentField(fieldName, new DocumentField(fieldName, docSlots));
+
+                    // Add info what sub-queries of percolator query matched this each percolated document
+                    if (fetchContext.getSearchExecutionContext().hasNamedQueries()) {
+                        List<LeafReaderContext> leafContexts = percolatorIndexSearcher.getLeafContexts();
+                        assert leafContexts.size() == 1 : "Expected single leaf, but got [" + leafContexts.size() + "]";
+                        LeafReaderContext memoryReaderContext = leafContexts.get(0);
+                        Weight weight = percolatorIndexSearcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1);
+                        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+                            List<NamedMatches> namedMatchesList = NamedMatches.findNamedMatches(
+                                weight.matches(memoryReaderContext, topDocs.scoreDocs[i].doc)
+                            );
+                            if (namedMatchesList.isEmpty()) {
+                                continue;
+                            }
+                            List<Object> matchedQueries = new ArrayList<>(namedMatchesList.size());
+                            for (NamedMatches match : namedMatchesList) {
+                                matchedQueries.add(match.getName());
+                            }
+                            String matchedFieldName = fieldName + "_" + docSlots.get(i) + "_matched_queries";
+                            hitContext.hit().setDocumentField(matchedFieldName, new DocumentField(matchedFieldName, matchedQueries));
+                        }
+                    }
                 }
             }
         };

+ 93 - 0
modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorQuerySearchTests.java

@@ -9,7 +9,9 @@ package org.elasticsearch.percolator;
 
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.join.ScoreMode;
+import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.action.support.WriteRequest;
+import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexService;
@@ -24,6 +26,7 @@ import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.script.MockScriptPlugin;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptType;
+import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.lookup.LeafDocLookup;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESSingleNodeTestCase;
@@ -36,6 +39,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 
@@ -335,4 +339,93 @@ public class PercolatorQuerySearchTests extends ESSingleNodeTestCase {
         }
     }
 
+    public void testPercolateNamedQueries() {
+        String mapping = """
+            {
+              "dynamic" : "strict",
+              "properties" : {
+                "my_query" : { "type" : "percolator" },
+                "description" : { "type" : "text"},
+                "num_of_bedrooms" : { "type" : "integer"},
+                "type" : { "type" : "keyword"},
+                "price": { "type": "float"}
+              }
+            }
+            """;
+        indicesAdmin().prepareCreate("houses").setMapping(mapping).get();
+        String source = """
+            {
+              "my_query" : {
+                "bool": {
+                  "should": [
+                    { "match": { "description": { "query": "fireplace", "_name": "fireplace_query" } } },
+                    { "match": { "type": { "query": "detached", "_name": "detached_query" } } }
+                  ],
+                  "filter": {
+                    "match": {
+                      "num_of_bedrooms": {"query": 3, "_name": "3_bedrooms_query"}
+                    }
+                  }
+                }
+              }
+            }
+            """;
+        prepareIndex("houses").setId("query_3_bedroom_detached_house_with_fireplace").setSource(source, XContentType.JSON).get();
+        indicesAdmin().prepareRefresh().get();
+
+        source = """
+            {
+              "my_query" : {
+                "bool": {
+                  "filter": [
+                    { "match": { "description": { "query": "swimming pool", "_name": "swimming_pool_query" } } },
+                    { "match": { "num_of_bedrooms": {"query": 3, "_name": "3_bedrooms_query"} } }
+                  ]
+                }
+              }
+            }
+            """;
+        prepareIndex("houses").setId("query_3_bedroom_house_with_swimming_pool").setSource(source, XContentType.JSON).get();
+        indicesAdmin().prepareRefresh().get();
+
+        BytesArray house1_doc = new BytesArray("""
+            {
+              "description": "house with a beautiful fireplace and swimming pool",
+              "num_of_bedrooms": 3,
+              "type": "detached",
+              "price": 1000000
+            }
+            """);
+
+        BytesArray house2_doc = new BytesArray("""
+            {
+              "description": "house has a wood burning fireplace",
+              "num_of_bedrooms": 3,
+              "type": "semi-detached",
+              "price": 500000
+            }
+            """);
+
+        QueryBuilder query = new PercolateQueryBuilder("my_query", List.of(house1_doc, house2_doc), XContentType.JSON);
+        SearchResponse response = client().prepareSearch("houses").setQuery(query).get();
+        assertEquals(2, response.getHits().getTotalHits().value);
+
+        SearchHit[] hits = response.getHits().getHits();
+        assertThat(hits[0].getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0, 1)));
+        assertThat(
+            hits[0].getFields().get("_percolator_document_slot_0_matched_queries").getValues(),
+            equalTo(Arrays.asList("fireplace_query", "detached_query", "3_bedrooms_query"))
+        );
+        assertThat(
+            hits[0].getFields().get("_percolator_document_slot_1_matched_queries").getValues(),
+            equalTo(Arrays.asList("fireplace_query", "3_bedrooms_query"))
+        );
+
+        assertThat(hits[1].getFields().get("_percolator_document_slot").getValues(), equalTo(Arrays.asList(0)));
+        assertThat(
+            hits[1].getFields().get("_percolator_document_slot_0_matched_queries").getValues(),
+            equalTo(Arrays.asList("swimming_pool_query", "3_bedrooms_query"))
+        );
+    }
+
 }

+ 125 - 0
modules/percolator/src/yamlRestTest/resources/rest-api-spec/test/20_matched_queries.yml

@@ -0,0 +1,125 @@
+setup:
+  - skip:
+      version: " - 8.12.99"
+      reason: "Displaying matched named queries within percolator queries was added in 8.13"
+  - do:
+      indices.create:
+        index: houses
+        body:
+          mappings:
+            dynamic: strict
+            properties:
+              my_query:
+                type: percolator
+              description:
+                type: text
+              num_of_bedrooms:
+                type: integer
+              type:
+                type: keyword
+              price:
+                type: integer
+
+  - do:
+      index:
+        refresh: true
+        index: houses
+        id: query_3_bedroom_detached_house_with_fireplace
+        body:
+          my_query:
+            {
+              "bool": {
+                "should": [
+                  { "match": { "description": { "query": "fireplace"} } },
+                  { "match": { "type": { "query": "detached", "_name": "detached_query" } } }
+                ],
+                "filter": {
+                  "match": {
+                    "num_of_bedrooms": {"query": 3, "_name": "3_bedrooms_query"}
+                  }
+                }
+              }
+            }
+
+  - do:
+      index:
+        refresh: true
+        index: houses
+        id: query_3_bedroom_house_with_swimming_pool
+        body:
+          my_query:
+            {
+              "bool": {
+                "filter": [
+                  { "match": { "description": { "query": "swimming pool", "_name": "swimming_pool_query" } } },
+                  { "match": { "num_of_bedrooms": {"query": 3, "_name": "3_bedrooms_query"} } }
+                ]
+              }
+            }
+
+---
+"Matched named queries within percolator queries: percolate existing document":
+  - do:
+      index:
+        refresh: true
+        index: houses
+        id: house1
+        body:
+          description: "house with a beautiful fireplace and swimming pool"
+          num_of_bedrooms: 3
+          type: detached
+          price: 1000000
+
+  - do:
+      search:
+        index: houses
+        body:
+          query:
+            percolate:
+              field: my_query
+              index: houses
+              id: house1
+
+  - match: { hits.total.value: 2 }
+
+  - match: { hits.hits.0._id: query_3_bedroom_detached_house_with_fireplace }
+  - match: { hits.hits.0.fields._percolator_document_slot: [0] }
+  - match: { hits.hits.0.fields._percolator_document_slot_0_matched_queries: ["detached_query", "3_bedrooms_query"] }
+
+  - match: { hits.hits.1._id: query_3_bedroom_house_with_swimming_pool }
+  - match: { hits.hits.1.fields._percolator_document_slot: [0] }
+  - match: { hits.hits.1.fields._percolator_document_slot_0_matched_queries: ["swimming_pool_query", "3_bedrooms_query"] }
+
+
+---
+"Matched named queries within percolator queries: percolate multiple documents in request":
+  - do:
+      search:
+        index: houses
+        body:
+          query:
+            percolate:
+              field: my_query
+              documents:
+                - {
+                    "description": "house with a beautiful fireplace and swimming pool",
+                    "num_of_bedrooms": 3,
+                    "type": "detached",
+                    "price": 1000000
+                  }
+                - {
+                    "description": "house has a wood burning fireplace",
+                    "num_of_bedrooms": 3,
+                    "type": "semi-detached",
+                    "price": 500000
+                  }
+
+  - match: { hits.total.value: 2 }
+
+  - match: { hits.hits.0._id: query_3_bedroom_detached_house_with_fireplace }
+  - match: { hits.hits.0.fields._percolator_document_slot: [0, 1] }
+  - match: { hits.hits.0.fields._percolator_document_slot_0_matched_queries: ["detached_query", "3_bedrooms_query"] }
+
+  - match: { hits.hits.1._id: query_3_bedroom_house_with_swimming_pool }
+  - match: { hits.hits.1.fields._percolator_document_slot: [0] }
+  - match: { hits.hits.1.fields._percolator_document_slot_0_matched_queries: ["swimming_pool_query", "3_bedrooms_query"] }

+ 4 - 0
server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java

@@ -10,6 +10,7 @@ package org.elasticsearch.index.query;
 
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.NamedMatches;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.ParsingException;
@@ -121,6 +122,9 @@ public abstract class AbstractQueryBuilder<QB extends AbstractQueryBuilder<QB>>
                 }
             }
             if (queryName != null) {
+                if (context.rewriteToNamedQuery()) {
+                    query = NamedMatches.wrapQuery(queryName, query);
+                }
                 context.addNamedQuery(queryName, query);
             }
         }

+ 10 - 0
server/src/main/java/org/elasticsearch/index/query/FilteredSearchExecutionContext.java

@@ -378,4 +378,14 @@ public class FilteredSearchExecutionContext extends SearchExecutionContext {
     public Set<String> getMatchingFieldNames(String pattern) {
         return in.getMatchingFieldNames(pattern);
     }
+
+    @Override
+    public void setRewriteToNamedQueries() {
+        in.setRewriteToNamedQueries();
+    }
+
+    @Override
+    public boolean rewriteToNamedQuery() {
+        return in.rewriteToNamedQuery();
+    }
 }

+ 20 - 1
server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java

@@ -97,8 +97,8 @@ public class SearchExecutionContext extends QueryRewriteContext {
 
     private final Map<String, Query> namedQueries = new HashMap<>();
     private NestedScope nestedScope;
-
     private QueryBuilder aliasFilter;
+    private boolean rewriteToNamedQueries = false;
 
     /**
      * Build a {@linkplain SearchExecutionContext}.
@@ -297,6 +297,10 @@ public class SearchExecutionContext extends QueryRewriteContext {
         return Map.copyOf(namedQueries);
     }
 
+    public boolean hasNamedQueries() {
+        return (namedQueries.isEmpty() == false);
+    }
+
     /**
      * Parse a document with current mapping.
      */
@@ -619,4 +623,19 @@ public class SearchExecutionContext extends QueryRewriteContext {
     public NestedDocuments getNestedDocuments() {
         return new NestedDocuments(mappingLookup, bitsetFilterCache::getBitSetProducer, indexVersionCreated());
     }
+
+    /**
+     * Instructs to rewrite Elasticsearch queries with _name to Lucene NamedQuery
+     */
+    public void setRewriteToNamedQueries() {
+        this.rewriteToNamedQueries = true;
+    }
+
+    /**
+     * Returns true if Elasticsearch queries with _name must be rewritten to Lucene NamedQuery
+     * @return
+     */
+    public boolean rewriteToNamedQuery() {
+        return rewriteToNamedQueries;
+    }
 }