Browse Source

Fix composite early termination on sorted (#72101)

I broke composite early termination when reworking how aggregations'
contact for `getLeafCollector` around early termination in #70320. We
didn't see it in our tests because we weren't properly emulating the
aggregation collection stage. This fixes early termination by adhering
to the new contract and adds more tests.

Closes #72078

Co-authored-by: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Nik Everett 4 years ago
parent
commit
39fee5e908

+ 179 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/235_composite_sorted.yml

@@ -0,0 +1,179 @@
+---
+setup:
+  - do:
+      indices.create:
+        index: sorted
+        body:
+          mappings:
+            properties:
+              date:
+                type: date
+              kw:
+                type: keyword
+          settings:
+            index:
+              number_of_shards: 1
+              number_of_replicas: 0
+              sort:
+                field: date
+                order: desc
+
+  # Index single documents at a time and refresh. That'll create as many
+  # segments as possible which has revealed bugs in the past.
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-01T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-02T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-03T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-04T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-05T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-06T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-07T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-08T00:00:00.000Z", "kw": "cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-09T00:00:00.000Z", "kw": "not a cat"}
+        refresh: true
+  - do:
+      index:
+        index:   sorted
+        body:    {"date": "2021-01-10T00:00:00.000Z", "kw": "also not a cat"}
+        refresh: true
+
+---
+one source - first page:
+  - do:
+      search:
+        index: sorted
+        body:
+          size: 0
+          aggs:
+            c:
+              composite:
+                size: 2
+                sources:
+                  - date:
+                      date_histogram:
+                        field: date
+                        calendar_interval: day
+                        format: iso8601 # Format makes the comparisons a little more obvious
+  - length: { aggregations.c.buckets: 2 }
+  - match: { aggregations.c.buckets.0.key.date: "2021-01-01T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.0.doc_count: 1 }
+  - match: { aggregations.c.buckets.1.key.date: "2021-01-02T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.1.doc_count: 1 }
+
+---
+one source - second page:
+  - do:
+      search:
+        index: sorted
+        body:
+          size: 0
+          aggs:
+            c:
+              composite:
+                size: 2
+                sources:
+                  - date:
+                      date_histogram:
+                        field: date
+                        calendar_interval: day
+                        format: iso8601 # Format makes the comparisons a little more obvious
+                after:
+                  date: "2021-01-02T00:00:00.000Z"
+  - length: { aggregations.c.buckets: 2 }
+  - match: { aggregations.c.buckets.0.key.date: "2021-01-03T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.0.doc_count: 1 }
+  - match: { aggregations.c.buckets.1.key.date: "2021-01-04T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.1.doc_count: 1 }
+
+---
+two sources - first page:
+  - do:
+      search:
+        index: sorted
+        body:
+          size: 0
+          aggs:
+            c:
+              composite:
+                size: 2
+                sources:
+                  - date:
+                      date_histogram:
+                        field: date
+                        calendar_interval: day
+                        format: iso8601 # Format makes the comparisons a little more obvious
+                  - kw:
+                      terms:
+                        field: kw
+  - length: { aggregations.c.buckets: 2 }
+  - match: { aggregations.c.buckets.0.key.date: "2021-01-01T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.0.key.kw: cat }
+  - match: { aggregations.c.buckets.0.doc_count: 1 }
+  - match: { aggregations.c.buckets.1.key.date: "2021-01-02T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.1.key.kw: cat }
+  - match: { aggregations.c.buckets.1.doc_count: 1 }
+
+---
+two sources - second page:
+  - do:
+      search:
+        index: sorted
+        body:
+          size: 0
+          aggs:
+            c:
+              composite:
+                size: 2
+                sources:
+                  - date:
+                      date_histogram:
+                        field: date
+                        calendar_interval: day
+                        format: iso8601 # Format makes the comparisons a little more obvious
+                  - kw:
+                      terms:
+                        field: kw
+                after:
+                  date: "2021-01-02T00:00:00.000Z"
+                  kw: cat
+  - length: { aggregations.c.buckets: 2 }
+  - match: { aggregations.c.buckets.0.key.date: "2021-01-03T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.0.key.kw: cat }
+  - match: { aggregations.c.buckets.0.doc_count: 1 }
+  - match: { aggregations.c.buckets.1.key.date: "2021-01-04T00:00:00.000Z" }
+  - match: { aggregations.c.buckets.1.key.kw: cat }
+  - match: { aggregations.c.buckets.1.doc_count: 1 }

+ 14 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java

@@ -406,10 +406,22 @@ final class CompositeAggregator extends BucketsAggregator {
                 // We have an after key and index sort is applicable so we jump directly to the doc
                 // that is after the index sort prefix using the rawAfterKey and we start collecting
                 // document from there.
-                processLeafFromQuery(ctx, indexSortPrefix);
+                try {
+                    processLeafFromQuery(ctx, indexSortPrefix);
+                } catch (CollectionTerminatedException e) {
+                    /*
+                     * Signal that there isn't anything to collect. We're going
+                     * to return noop collector anyway so we can ignore it.
+                     */
+                }
                 return LeafBucketCollector.NO_OP_COLLECTOR;
             } else {
-                final LeafBucketCollector inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen));
+                final LeafBucketCollector inner;
+                try {
+                    inner = queue.getLeafCollector(ctx, getFirstPassCollector(docIdSetBuilder, sortPrefixLen));
+                } catch (CollectionTerminatedException e) {
+                    return LeafBucketCollector.NO_OP_COLLECTOR;
+                }
                 return new LeafBucketCollector() {
                     @Override
                     public void collect(int doc, long zeroBucket) throws IOException {

+ 2 - 1
server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java

@@ -2657,7 +2657,8 @@ public class CompositeAggregatorTests  extends AggregatorTestCase {
     private static Sort buildIndexSort(List<CompositeValuesSourceBuilder<?>> sources, Map<String, MappedFieldType> fieldTypes) {
         List<SortField> sortFields = new ArrayList<>();
         Map<String, MappedFieldType> remainingFieldTypes = new HashMap<>(fieldTypes);
-        for (CompositeValuesSourceBuilder<?> source : sources) {
+        List<CompositeValuesSourceBuilder<?>> sourcesToCreateSorts = randomBoolean() ? sources : sources.subList(0, 1);
+        for (CompositeValuesSourceBuilder<?> source : sourcesToCreateSorts) {
             MappedFieldType type = fieldTypes.remove(source.field());
             remainingFieldTypes.remove(source.field());
             SortField sortField = sortFieldFrom(type);

+ 1 - 1
test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java

@@ -478,7 +478,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
             }
         } else {
             root.preCollection();
-            searcher.search(rewritten, root);
+            searcher.search(rewritten, MultiBucketCollector.wrap(true, List.of(root)));
             root.postCollection();
             aggs.add(root.buildTopLevel());
         }

+ 1 - 0
x-pack/qa/runtime-fields/build.gradle

@@ -78,6 +78,7 @@ subprojects {
           'search.aggregation/20_terms/string profiler via global ordinals native implementation',
           'search.aggregation/20_terms/Global ordinals are loaded with the global_ordinals execution hint',
           'search.aggregation/170_cardinality_metric/profiler string',
+          'search.aggregation/235_composite_sorted/*',
           /////// NOT SUPPORTED ///////
         ].join(',')
     }