Browse Source

Track max_score in collapse when requested (#97703)

Before we used to track max_score in collapse when requested (track_scores=true)
or when there is no sort in collapse (see PR#27122). But this feature
was lost through refactoring and changes.

This PR restores this feature.

Closes #97653
Mayya Sharipova 2 years ago
parent
commit
f8c626f792

+ 6 - 0
docs/changelog/97703.yaml

@@ -0,0 +1,6 @@
+pr: 97703
+summary: Track `max_score` in collapse when requested
+area: Search
+type: bug
+issues:
+ - 97653

+ 7 - 1
docs/reference/search/search-your-data/collapse-search-results.asciidoc

@@ -267,7 +267,7 @@ GET /my-index-000001/_search
                 "value" : 1,
                 "relation" : "eq"
               },
-              "max_score" : null,
+              "max_score" : 0.5753642,
               "hits" : [
                 {
                   "_index" : "my-index-000001",
@@ -311,3 +311,9 @@ GET /my-index-000001/_search
   }
 }
 ----
+
+[discrete]
+=== Track Scores
+
+When `collapse` is used with `sort` on a field, scores are not computed.
+Setting `track_scores` to true instructs {es} to compute and track scores.

+ 113 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/111_field_collapsing_with_max_score.yml

@@ -0,0 +1,113 @@
+setup:
+  - skip:
+      version: " - 8.9.99"
+      reason: Collapse with max score was fixed in 8.10.0
+      features: close_to
+
+  - do:
+      indices.create:
+        index: user_comments
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+          mappings:
+              properties:
+                user_id: { type: keyword }
+                comment: { type: text }
+
+  - do:
+      bulk:
+        index: user_comments
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"user_id": "user1", "comment": "Canada is beautiful."}'
+          - '{"index": {"_id": "2"}}'
+          - '{"user_id": "user1", "comment": "Canada is the second-largest country in the world by land area."}'
+          - '{"index": {"_id": "3"}}'
+          - '{"user_id": "user2", "comment": "The capital of Canada is Ottawa."}'
+          - '{"index": {"_id": "4"}}'
+          - '{"user_id": "user2", "comment": "Canada is cold."}'
+          - '{"index": {"_id": "5"}}'
+          - '{"user_id": "user3", "comment": "Canada celebrates Canada Day on July 1st."}'
+
+---
+"Max score returned when there is no sort":
+  - do:
+      search:
+        index: user_comments
+        body:
+          query:
+            match:
+              comment: "Canada"
+          collapse:
+            field: user_id
+
+  - match: {hits.total.value: 5 }
+  - length: {hits.hits: 3 }
+  - match: {hits.hits.0.fields.user_id: ["user3"] }
+  - close_to: { hits.max_score: { value: 0.11545, error: 0.00001 } }
+
+---
+"Max score is not returned when there is sort and track_scores is false (by default)":
+  - do:
+      search:
+        index: user_comments
+        body:
+          sort:
+            user_id: asc
+          query:
+            match:
+              comment: "Canada"
+          collapse:
+            field: user_id
+
+  - match: {hits.total.value: 5 }
+  - length: {hits.hits: 3 }
+  - match: {hits.hits.0.fields.user_id: ["user1"] }
+  - is_false: hits.max_score
+
+---
+"Max score returned when there is sort and track_scores is true":
+  - do:
+      search:
+        index: user_comments
+        body:
+          sort:
+            user_id: asc
+          query:
+            match:
+              comment: "Canada"
+          collapse:
+            field: user_id
+          track_scores: true
+
+  - match: {hits.total.value: 5 }
+  - length: {hits.hits: 3 }
+  - match: {hits.hits.0.fields.user_id: ["user1"] }
+  - close_to: { hits.max_score: { value: 0.11545, error: 0.00001 } }
+
+---
+"Max score returned over all documents not only top hits":
+  - do:
+      search:
+        index: user_comments
+        body:
+          size: 2
+          sort:
+            user_id: asc
+          query:
+            match:
+              comment: "Canada"
+          collapse:
+            field: user_id
+          track_scores: true
+
+  - match: {hits.total.value: 5 }
+  - length: {hits.hits: 2 }
+  - close_to: { hits.max_score: { value: 0.11545, error: 0.00001 } }
+  - match: {hits.hits.0.fields.user_id: ["user1"] }
+  - close_to: { hits.hits.0._score: { value: 0.11030, error: 0.00001 } }
+  - match: { hits.hits.1.fields.user_id: [ "user2" ] }
+  - close_to: { hits.hits.1._score: { value: 0.08817, error: 0.00001 } }

+ 14 - 7
server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorManagerFactory.java

@@ -59,6 +59,7 @@ import org.elasticsearch.search.sort.SortAndFormats;
 
 import java.io.IOException;
 import java.util.Objects;
+import java.util.function.Function;
 import java.util.function.Supplier;
 
 import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_COUNT;
@@ -147,8 +148,9 @@ abstract class TopDocsCollectorManagerFactory {
     }
 
     static class CollapsingTopDocsCollectorManagerFactory extends TopDocsCollectorManagerFactory {
+        private final Collector collector;
         private final SinglePassGroupingCollector<?> topDocsCollector;
-        private final Supplier<Float> maxScoreSupplier;
+        private final Function<TopDocs, Float> maxScoreSupplier;
 
         /**
          * Ctr
@@ -170,24 +172,29 @@ abstract class TopDocsCollectorManagerFactory {
             Sort sort = sortAndFormats == null ? Sort.RELEVANCE : sortAndFormats.sort;
             this.topDocsCollector = collapseContext.createTopDocs(sort, numHits, after);
 
-            MaxScoreCollector maxScoreCollector;
-            if (trackMaxScore) {
+            final MaxScoreCollector maxScoreCollector;
+            if (sortAndFormats == null) {
+                maxScoreCollector = null;
+                maxScoreSupplier = (topDocs) -> topDocs.scoreDocs.length == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
+            } else if (trackMaxScore) {
                 maxScoreCollector = new MaxScoreCollector();
-                maxScoreSupplier = maxScoreCollector::getMaxScore;
+                maxScoreSupplier = (topDocs) -> maxScoreCollector.getMaxScore();
             } else {
-                maxScoreSupplier = () -> Float.NaN;
+                maxScoreCollector = null;
+                maxScoreSupplier = (topDocs) -> Float.NaN;
             }
+            this.collector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
         }
 
         @Override
         Collector collector() {
-            return topDocsCollector;
+            return collector;
         }
 
         @Override
         TopDocsAndMaxScore topDocsAndMaxScore() throws IOException {
             TopFieldGroups topDocs = topDocsCollector.getTopGroups(0);
-            return new TopDocsAndMaxScore(topDocs, maxScoreSupplier.get());
+            return new TopDocsAndMaxScore(topDocs, maxScoreSupplier.apply(topDocs));
         }
     }
 

+ 1 - 0
x-pack/qa/runtime-fields/build.gradle

@@ -72,6 +72,7 @@ subprojects {
           /////// TO FIX ///////
           'aggregations/range/Date range', //source only date field should also emit values for numbers, it expects strings only
           'search/115_multiple_field_collapsing/two levels fields collapsing', // Field collapsing on a runtime field does not work
+          'search/111_field_collapsing_with_max_score/*', // Field collapsing on a runtime field does not work
           'field_caps/30_index_filter/Field caps with index filter', // We don't support filtering field caps on runtime fields. What should we do?
           'aggregations/filters_bucket/cache busting', // runtime keyword does not support split_queries_on_whitespace
           'search/140_pre_filter_search_shards/pre_filter_shard_size with shards that have no hit',