Browse Source

Handle must_not clauses when disabling the weight matches highlighting mode (#108453)

This change makes sure we check all queries, even the must_not ones, to decide if we should disable weight matches highlighting or not.

Closes #101667
Closes #106693
Jim Ferenczi 1 year ago
parent
commit
da50207faa

+ 80 - 35
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml

@@ -14,12 +14,26 @@ setup:
                       "postings":
                         "type": "text"
                         "index_options": "offsets"
+                "nested":
+                  "type": "nested"
+                  "properties":
+                    "text":
+                      "type": "text"
+                "vectors":
+                  "type": "dense_vector"
+                  "dims": 2
+                  "index": true
+                  "similarity": "l2_norm"
+
   - do:
       index:
         index: test
         id:    "1"
         body:
             "text" : "The quick brown fox is brown."
+            "nested":
+              "text": "The quick brown fox is brown."
+            "vectors": [1, 2]
   - do:
       indices.refresh: {}
 
@@ -43,6 +57,7 @@ teardown:
           "query" : { "multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } },
           "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
 
+  - length: { hits.hits.0.highlight: 3 }
   - match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
   - match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
   - match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
@@ -58,6 +73,7 @@ teardown:
           "query" : { "combined_fields" : { "query" : "quick brown fox", "fields" : [ "text*"] } },
           "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
 
+  - length: { hits.hits.0.highlight: 3 }
   - match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
   - match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
   - match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
@@ -72,11 +88,13 @@ teardown:
       search:
         body: {
           "query": { "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] } },
-          "highlight": { "type": "unified", "fields": { "*": { } } } }
+          "highlight": { "type": "unified", "fields": { "*": { } } }
+        }
 
-  - match: { hits.hits.0.highlight.text.0: "The <em>quick brown fox</em> is brown." }
-  - match: { hits.hits.0.highlight.text\.fvh.0: "The <em>quick brown fox</em> is brown." }
-  - match: { hits.hits.0.highlight.text\.postings.0: "The <em>quick brown fox</em> is brown." }
+  - length: { hits.hits.0.highlight: 3 }
+  - match:  { hits.hits.0.highlight.text.0: "The <em>quick brown fox</em> is brown." }
+  - match:  { hits.hits.0.highlight.text\.fvh.0: "The <em>quick brown fox</em> is brown." }
+  - match:  { hits.hits.0.highlight.text\.postings.0: "The <em>quick brown fox</em> is brown." }
 
   - do:
       indices.put_settings:
@@ -90,6 +108,7 @@ teardown:
           "query" : { "multi_match" : { "query" : "quick brown fox", "type": "phrase", "fields" : [ "text*"] } },
           "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
 
+  - length: { hits.hits.0.highlight: 3 }
   - match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
   - match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
   - match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
@@ -100,43 +119,69 @@ teardown:
       reason: 'kNN was not correctly skipped until 8.12'
 
   - do:
-      indices.create:
-        index: test-highlighting-knn
-        body:
-          mappings:
-            "properties":
-              "vectors":
-                "type": "dense_vector"
-                "dims": 2
-                "index": true
-                "similarity": "l2_norm"
-              "text":
-                "type": "text"
-                "fields":
-                  "fvh":
-                    "type": "text"
-                    "term_vector": "with_positions_offsets"
-                  "postings":
-                    "type": "text"
-                    "index_options": "offsets"
-  - do:
-      index:
-        index: test-highlighting-knn
-        id:    "1"
-        body:
-          "text" : "The quick brown fox is brown."
-          "vectors": [1, 2]
+      search:
+        index: test
+        body: {
+          "query": { "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] } },
+          "highlight": { "type": "unified", "fields": { "text*": { } } },
+          "knn": { "field": "vectors", "query_vector": [1, 2], "k": 10, "num_candidates": 10 } }
+
+  - length: { hits.hits.0.highlight: 3 }
+  - match: { hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
+  - match: { hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
+  - match: { hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
+
+---
+"Test nested queries automatically disable weighted mode":
+  - requires:
+      cluster_features: "gte_v8.15.0"
+      reason: 'nested was not correctly skipped until 8.15'
+
   - do:
-      indices.refresh: {}
+      search:
+        index: test
+        body: {
+          "query": {
+            "nested": {
+              "path": "nested",
+              "query": {
+                "multi_match": {
+                  "query": "quick brown fox",
+                  "type": "phrase",
+                  "fields": [ "nested.text" ]
+                }
+              }
+            }
+          },
+          "highlight": { "type": "unified", "fields": { "*": { } } }
+        }
+
+  - length: { hits.hits.0.highlight: 1 }
+  - match: { hits.hits.0.highlight.nested\.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
 
   - do:
       search:
-        index: test-highlighting-knn
+        index: test
         body: {
-          "query": { "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] } },
-          "highlight": { "type": "unified", "fields": { "*": { } } },
-          "knn": { "field": "vectors", "query_vector": [1, 2], "k": 10, "num_candidates": 10 } }
+          "query": {
+            "bool": {
+              "must_not": {
+                "nested": {
+                  "path": "nested",
+                  "query": {
+                    "multi_match": { "query": "quick red fox", "type": "phrase", "fields": [ "nested.text" ] }
+                  }
+                }
+              },
+              "should": {
+                "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] }
+              }
+            }
+          },
+          "highlight": { "type": "unified", "fields": { "text*": { } } }
+        }
 
+  - length: { hits.hits.0.highlight: 3 }
   - match: { hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
   - match: { hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }
   - match: { hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown." }

+ 2 - 1
server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java

@@ -293,7 +293,8 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
                 if (parent instanceof ESToParentBlockJoinQuery) {
                     hasUnknownLeaf[0] = true;
                 }
-                return super.getSubVisitor(occur, parent);
+                // we want to visit all queries, including those within the must_not clauses.
+                return this;
             }
         });
         return hasUnknownLeaf[0];