Răsfoiți Sursa

Speed up aggs with sub-aggregations (#69806)

This allows many of the optimizations added in #63643 and #68871 to run
on aggregations with sub-aggregations. This should:
* Speed up `terms` aggregations on fields with less than 1000 values that
  also have sub-aggregations. Locally I see 2 second searches run in 1.2
  seconds.
* Applies that same speedup to `range` and `date_histogram` aggregations but
  it feels less impressive because the point range queries are a little
  slower to get up and go.
* Massively speed up `filters` aggregations with sub-aggregations that
  don't have a `parent` aggregation or collect "other" buckets. Also
  save a ton of memory while collecting them.
Nik Everett 4 ani în urmă
părinte
comite
10e2f90560
18 a modificat fișierele cu 583 adăugiri și 96 ștergeri
  1. 4 1
      modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java
  2. 3 0
      rest-api-spec/build.gradle
  3. 99 38
      rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml
  4. 6 0
      server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java
  5. 108 10
      server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java
  6. 21 1
      server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java
  7. 11 1
      server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java
  8. 11 2
      server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java
  9. 243 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java
  10. 3 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java
  11. 4 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java
  12. 11 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java
  13. 4 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java
  14. 2 0
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java
  15. 49 41
      server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
  16. 1 0
      x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java
  17. 1 1
      x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java
  18. 2 1
      x-pack/qa/runtime-fields/build.gradle

+ 4 - 1
modules/parent-join/src/test/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregatorTests.java

@@ -26,6 +26,7 @@ import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.Uid;
@@ -108,6 +109,7 @@ public class ParentToChildrenAggregatorTests extends AggregatorTestCase {
     }
 
     public void testParentChildAsSubAgg() throws IOException {
+        MappedFieldType kwd = new KeywordFieldMapper.KeywordFieldType("kwd", randomBoolean(), true, null);
         try (Directory directory = newDirectory()) {
             RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
 
@@ -146,7 +148,7 @@ public class ParentToChildrenAggregatorTests extends AggregatorTestCase {
                     indexSearcher,
                     new MatchAllDocsQuery(),
                     request,
-                    withJoinFields(longField("number"), keywordField("kwd"))
+                    withJoinFields(longField("number"), kwd)
                 );
 
                 StringTerms.Bucket evenBucket = result.getBucketByKey("even");
@@ -190,6 +192,7 @@ public class ParentToChildrenAggregatorTests extends AggregatorTestCase {
         return Arrays.asList(
                 new StringField(IdFieldMapper.NAME, Uid.encodeId(id), Field.Store.NO),
                 new SortedSetDocValuesField("kwd", new BytesRef(kwd)),
+                new Field("kwd", new BytesRef(kwd), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new StringField("join_field", PARENT_TYPE, Field.Store.NO),
                 createJoinField(PARENT_TYPE, id)
         );

+ 3 - 0
rest-api-spec/build.gradle

@@ -321,6 +321,9 @@ tasks.named("yamlRestCompatTest").configure {
     'mtermvectors/30_mix_typeless_typeful/mtermvectors without types on an index that has types',
     'search.aggregation/10_histogram/Deprecated _time order',
     'search.aggregation/200_top_hits_metric/top_hits aggregation with sequence numbers',
+    'search.aggregation/20_terms/string profiler via global ordinals',
+    'search.aggregation/20_terms/string profiler via global ordinals filters implementation',
+    'search.aggregation/20_terms/string profiler via global ordinals native implementation',
     'search.aggregation/20_terms/Deprecated _term order',
     'search.aggregation/280_geohash_grid/Basic test',
     'search.aggregation/290_geotile_grid/Basic test',

+ 99 - 38
rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

@@ -754,23 +754,23 @@ setup:
         body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
 
 ---
-"string profiler via global ordinals":
+"string profiler via global ordinals filters implementation":
   - skip:
-      version: " - 7.8.99"
-      reason: debug information added in 7.9.0
+      version: " - 7.99.99"
+      reason: filters implementation first supported with sub-aggregators in 8.0.0, being backported to 7.13.0
   - do:
       bulk:
         index: test_1
         refresh: true
         body: |
           { "index": {} }
-          { "str": "sheep", "number": 1 }
+          { "boolean": true, "str": "sheep", "number": 1 }
           { "index": {} }
-          { "str": "sheep", "number": 3 }
+          { "boolean": true, "str": "sheep", "number": 3 }
           { "index": {} }
-          { "str": "cow", "number": 1 }
+          { "boolean": true, "str": "cow", "number": 1 }
           { "index": {} }
-          { "str": "pig", "number": 1 }
+          { "boolean": true, "str": "pig", "number": 1 }
 
   - do:
       search:
@@ -794,17 +794,73 @@ setup:
   - match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
   - match: { aggregations.str_terms.buckets.2.key: pig }
   - match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
-  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.type: StringTermsAggregatorFromFilters }
   - match: { profile.shards.0.aggregations.0.description: str_terms }
-  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
-  - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
-  - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
-  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
-  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.delegate: FiltersAggregator.FilterByFilter }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.0.query: str:cow }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.1.query: str:pig }
+  - match: { profile.shards.0.aggregations.0.debug.delegate_debug.filters.2.query: str:sheep }
   - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
   - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
+
+---
+"string profiler via global ordinals native implementation":
+  - skip:
+      version: " - 7.8.99"
+      reason: debug information added in 7.9.0
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body: |
+          { "index": {} }
+          { "boolean": true, "str": "sheep", "number": 1 }
+          { "index": {} }
+          { "boolean": true, "str": "sheep", "number": 3 }
+          { "index": {} }
+          { "boolean": true, "str": "cow", "number": 1 }
+          { "index": {} }
+          { "boolean": true, "str": "pig", "number": 1 }
+
+  - do:
+      search:
+        index: test_1
+        body:
+          profile: true
+          size: 0
+          aggs:
+            bool:    # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
+              terms:
+                field: boolean
+              aggs:
+                str_terms:
+                  terms:
+                    field: str
+                    collect_mode: breadth_first
+                    execution_hint: global_ordinals
+                  aggs:
+                    max_number:
+                      max:
+                        field: number
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: sheep }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 3 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: cow }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 1 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.2.key: pig }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.2.max_number.value: 1 }
+  - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 4 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: remap using many bucket ords }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
+  - gt:    { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
 
   - do:
       indices.create:
@@ -823,7 +879,7 @@ setup:
         refresh: true
         body: |
           { "index": {} }
-          { "str": ["pig", "sheep"], "number": 100 }
+          { "boolean": true, "str": ["pig", "sheep"], "number": 100 }
 
   - do:
       search:
@@ -832,30 +888,35 @@ setup:
           profile: true
           size: 0
           aggs:
-            str_terms:
+            bool:    # add a dummy agg "on top" of the child agg just to force it out of filter-by-filter mode
               terms:
-                field: str
-                collect_mode: breadth_first
-                execution_hint: global_ordinals
+                field: boolean
               aggs:
-                max_number:
-                  max:
-                    field: number
-  - match: { aggregations.str_terms.buckets.0.key: pig }
-  - match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
-  - match: { aggregations.str_terms.buckets.1.key: sheep }
-  - match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
-  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
-  - match: { profile.shards.0.aggregations.0.description: str_terms }
-  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
-  - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
-  - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
-  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
-  - match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
-  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
-  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
-  - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
-  - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
+                str_terms:
+                  terms:
+                    field: str
+                    collect_mode: breadth_first
+                    execution_hint: global_ordinals
+                  aggs:
+                    max_number:
+                      max:
+                        field: number
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.key: pig }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.0.max_number.value: 100 }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.key: sheep }
+  - match: { aggregations.bool.buckets.0.str_terms.buckets.1.max_number.value: 100 }
+  - match: { profile.shards.0.aggregations.0.children.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.description: str_terms }
+  - match: { profile.shards.0.aggregations.0.children.0.breakdown.collect_count: 1 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.deferred_aggregators: [ max_number ] }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.collection_strategy: remap using many bucket ords }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.result_strategy: terms }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.segments_with_single_valued_ords: 0 }
+  - gt:    { profile.shards.0.aggregations.0.children.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.children.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.type: MaxAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.children.0.description: max_number }
+
 
 ---
 "string profiler via map":

+ 6 - 0
server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java

@@ -585,6 +585,12 @@ public class StringTermsIT extends AbstractTermsTestCase {
                 } else {
                     throw e;
                 }
+            } else if (e.getCause() instanceof IllegalArgumentException) {
+                // Thrown when the terms agg runs as a filters agg
+                assertThat(
+                    e.getCause().getMessage(),
+                    equalTo("Invalid aggregation order path [inner_terms>avg]. Can't sort by a descendant of a [sterms] aggregation [avg]")
+                );
             } else {
                 throw e;
             }

+ 108 - 10
server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java

@@ -135,8 +135,28 @@ public abstract class FiltersAggregator extends BucketsAggregator {
         CardinalityUpperBound cardinality,
         Map<String, Object> metadata
     ) throws IOException {
-        if (canUseFilterByFilter(parent, factories, otherBucketKey)) {
-            return buildFilterByFilter(name, factories, filters, keyed, otherBucketKey, context, parent, cardinality, metadata);
+        if (canUseFilterByFilter(parent, otherBucketKey)) {
+            FilterByFilter filterByFilter = buildFilterByFilter(
+                name,
+                factories,
+                filters,
+                keyed,
+                otherBucketKey,
+                context,
+                parent,
+                cardinality,
+                metadata
+            );
+            if (false == filterByFilter.scoreMode().needsScores()) {
+                /*
+                 * Filter by filter won't produce the correct results if the
+                 * sub-aggregators need scores because we're not careful with how
+                 * we merge filters. Right now we have to build the whole
+                 * aggregation in order to know if it'll need scores or not.
+                 */
+                // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter
+                return filterByFilter;
+            }
         }
         return new FiltersAggregator.Compatible(
             name,
@@ -155,8 +175,8 @@ public abstract class FiltersAggregator extends BucketsAggregator {
      * Can this aggregation be executed using the {@link FilterByFilter}? That
      * aggregator is much faster than the fallback {@link Compatible} aggregator.
      */
-    public static boolean canUseFilterByFilter(Aggregator parent, AggregatorFactories factories, String otherBucketKey) {
-        return parent == null && factories.countAggregators() == 0 && otherBucketKey == null;
+    public static boolean canUseFilterByFilter(Aggregator parent, String otherBucketKey) {
+        return parent == null && otherBucketKey == null;
     }
 
     /**
@@ -165,6 +185,10 @@ public abstract class FiltersAggregator extends BucketsAggregator {
      * collect filter by filter if there isn't a parent, there aren't children,
      * and we don't collect "other" buckets. Collecting {@link FilterByFilter}
      * is generally going to be much faster than the {@link Compatible} aggregator.
+     * <p>
+     * <strong>Important:</strong> This doesn't properly handle sub-aggregators
+     * that need scores so callers must check {@code #scoreMode()} and not use
+     * this collector if it need scores.
      */
     public static FilterByFilter buildFilterByFilter(
         String name,
@@ -177,7 +201,7 @@ public abstract class FiltersAggregator extends BucketsAggregator {
         CardinalityUpperBound cardinality,
         Map<String, Object> metadata
     ) throws IOException {
-        if (false == canUseFilterByFilter(parent, factories, otherBucketKey)) {
+        if (false == canUseFilterByFilter(parent, otherBucketKey)) {
             throw new IllegalStateException("Can't execute filter-by-filter");
         }
         List<QueryToFilterAdapter<?>> filtersWithTopLevel = new ArrayList<>(filters.size());
@@ -186,6 +210,7 @@ public abstract class FiltersAggregator extends BucketsAggregator {
         }
         return new FiltersAggregator.FilterByFilter(
             name,
+            factories,
             filtersWithTopLevel,
             keyed,
             context,
@@ -274,9 +299,12 @@ public abstract class FiltersAggregator extends BucketsAggregator {
          * field.
          */
         private int segmentsWithDocCountField;
+        private int segmentsCollected;
+        private int segmentsCounted;
 
         private FilterByFilter(
             String name,
+            AggregatorFactories factories,
             List<QueryToFilterAdapter<?>> filters,
             boolean keyed,
             AggregationContext context,
@@ -284,7 +312,7 @@ public abstract class FiltersAggregator extends BucketsAggregator {
             CardinalityUpperBound cardinality,
             Map<String, Object> metadata
         ) throws IOException {
-            super(name, AggregatorFactories.EMPTY, filters, keyed, null, context, parent, cardinality, metadata);
+            super(name, factories, filters, keyed, null, context, parent, cardinality, metadata);
             this.profiling = context.profiling();
         }
 
@@ -294,6 +322,8 @@ public abstract class FiltersAggregator extends BucketsAggregator {
          */
         @SuppressWarnings("resource") // We're not in change of anything Closeable
         public long estimateCost(long maxCost) throws IOException {
+            assert scoreMode().needsScores() == false;
+            // TODO if we have children we should use a different cost estimate
             this.maxCost = maxCost;
             if (estimatedCost != -1) {
                 return estimatedCost;
@@ -303,7 +333,9 @@ public abstract class FiltersAggregator extends BucketsAggregator {
             for (LeafReaderContext ctx : searcher().getIndexReader().leaves()) {
                 CheckedSupplier<Boolean, IOException> canUseMetadata = canUseMetadata(ctx);
                 for (QueryToFilterAdapter<?> filter : filters()) {
-                    estimatedCost += filter.estimateCountCost(ctx, canUseMetadata);
+                    estimatedCost += subAggregators().length > 0
+                        ? filter.estimateCollectCost(ctx)
+                        : filter.estimateCountCost(ctx, canUseMetadata);
                     if (estimatedCost < 0) {
                         // We've overflowed so we cap out and stop counting.
                         estimatedCost = Long.MAX_VALUE;
@@ -344,21 +376,87 @@ public abstract class FiltersAggregator extends BucketsAggregator {
          */
         @Override
         protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
+            assert scoreMode().needsScores() == false;
+            if (filters().size() == 0) {
+                return LeafBucketCollector.NO_OP_COLLECTOR;
+            }
             Bits live = ctx.reader().getLiveDocs();
-            Counter counter = new Counter(docCountProvider);
             if (false == docCountProvider.alwaysOne()) {
                 segmentsWithDocCountField++;
             }
-            for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) {
-                incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live));
+            if (subAggregators.length == 0) {
+                // TOOD we'd be better off if we could do sub.isNoop() or something.
+                /*
+                 * Without sub.isNoop we always end up in the `collectXXX` modes even if
+                 * the sub-aggregators opt out of traditional collection.
+                 */
+                collectCount(ctx, live);
+            } else {
+                collectSubs(ctx, live, sub);
             }
             // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment.
             throw new CollectionTerminatedException();
         }
 
+        /**
+         * Gather a count of the number of documents that match each filter
+         * without sending any documents to a sub-aggregator. This yields
+         * the correct response when there aren't any sub-aggregators or they
+         * all opt out of needing any sort of collection.
+         */
+        private void collectCount(LeafReaderContext ctx, Bits live) throws IOException {
+            Counter counter = new Counter(docCountProvider);
+            for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) {
+                incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live));
+            }
+        }
+
+        /**
+         * Collect all documents that match all filters and send them to
+         * the sub-aggregators. This method is only required when there are
+         * sub-aggregators that haven't opted out of being collected.
+         * <p>
+         * This collects each filter one at a time, resetting the
+         * sub-aggregators between each filter as though they were hitting
+         * a fresh segment.
+         * <p>
+         * It's <strong>very</strong> tempting to try and collect the
+         * filters into blocks of matches and then reply the whole block
+         * into ascending order without the resetting. That'd probably
+         * work better if the disk was very, very slow and we didn't have
+         * any kind of disk caching. But with disk caching its about twice
+         * as fast to collect each filter one by one like this. And it uses
+         * less memory because there isn't a need to buffer a block of matches.
+         * And its a hell of a lot less code.
+         */
+        private void collectSubs(LeafReaderContext ctx, Bits live, LeafBucketCollector sub) throws IOException {
+            class MatchCollector implements LeafCollector {
+                LeafBucketCollector subCollector = sub;
+                int filterOrd;
+
+                @Override
+                public void collect(int docId) throws IOException {
+                    collectBucket(subCollector, docId, filterOrd);
+                }
+
+                @Override
+                public void setScorer(Scorable scorer) throws IOException {
+                }
+            }
+            MatchCollector collector = new MatchCollector();
+            filters().get(0).collect(ctx, collector, live);
+            for (int filterOrd = 1; filterOrd < filters().size(); filterOrd++) {
+                collector.subCollector = collectableSubAggregators.getLeafCollector(ctx);
+                collector.filterOrd = filterOrd;
+                filters().get(filterOrd).collect(ctx, collector, live);
+            }
+        }
+
         @Override
         public void collectDebugInfo(BiConsumer<String, Object> add) {
             super.collectDebugInfo(add);
+            add.accept("segments_counted", segmentsCounted);
+            add.accept("segments_collected", segmentsCollected);
             add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs);
             add.accept("segments_with_doc_count_field", segmentsWithDocCountField);
             if (estimatedCost != -1) {

+ 21 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java

@@ -17,6 +17,7 @@ import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
+import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.PointRangeQuery;
@@ -216,12 +217,31 @@ public class QueryToFilterAdapter<Q extends Query> {
      * Estimate the cost of calling {@code #count} in a leaf.
      */
     long estimateCountCost(LeafReaderContext ctx, CheckedSupplier<Boolean, IOException> canUseMetadata) throws IOException {
+        return estimateCollectCost(ctx);
+    }
+
+    /**
+     * Collect all documents that match this filter in this leaf.
+     */
+    void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws IOException {
+        BulkScorer scorer = bulkScorer(ctx, () -> {});
+        if (scorer == null) {
+            // No hits in this segment.
+            return;
+        }
+        scorer.score(collector, live);
+    }
+
+    /**
+     * Estimate the cost of calling {@code #count} in a leaf.
+     */
+    long estimateCollectCost(LeafReaderContext ctx) throws IOException {
         BulkScorer scorer = bulkScorer(ctx, () -> scorersPreparedWhileEstimatingCost++);
         if (scorer == null) {
             // There aren't any matches for this filter in this leaf
             return 0;
         }
-        return scorer.cost();   // TODO in another PR (please) change this to ScorerSupplier.cost
+        return scorer.cost(); // TODO change this to ScorerSupplier.cost
     }
 
     /**

+ 11 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java

@@ -350,7 +350,7 @@ public abstract class RangeAggregator extends BucketsAggregator {
             // We don't generate sensible Queries for nanoseconds.
             return null;
         }
-        if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) {
+        if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) {
             return null;
         }
         boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint();
@@ -404,6 +404,16 @@ public abstract class RangeAggregator extends BucketsAggregator {
             rangeFactory,
             averageDocsPerRange
         );
+        if (fromFilters.scoreMode().needsScores()) {
+            /*
+             * Filter by filter won't produce the correct results if the
+             * sub-aggregators need scores because we're not careful with how
+             * we merge filters. Right now we have to build the whole
+             * aggregation in order to know if it'll need scores or not.
+             */
+            // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter
+            return null;
+        }
         return fromFilters;
     }
 

+ 11 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java

@@ -68,7 +68,7 @@ public class StringTermsAggregatorFromFilters extends AdaptingAggregator {
         if (false == valuesSourceConfig.alignesWithSearchIndex()) {
             return null;
         }
-        if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) {
+        if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) {
             return null;
         }
         List<QueryToFilterAdapter<?>> filters = new ArrayList<>();
@@ -112,7 +112,16 @@ public class StringTermsAggregatorFromFilters extends AdaptingAggregator {
             bucketCountThresholds,
             terms
         );
-        return adapted.delegate() == null ? null : adapted;
+        if (adapted.scoreMode().needsScores()) {                /*
+             * Filter by filter won't produce the correct results if the
+             * sub-aggregators need scores because we're not careful with how
+             * we merge filters. Right now we have to build the whole
+             * aggregation in order to know if it'll need scores or not.
+             */
+            // TODO make filter by filter produce the correct result or skip this in canUseFilterbyFilter
+            return null;
+        }
+        return adapted;
     }
 
     private final boolean showTermDocCountError;

+ 243 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java

@@ -13,6 +13,7 @@ import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.CollectionTerminatedException;
@@ -26,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.CheckedConsumer;
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
 import org.elasticsearch.common.lucene.search.Queries;
+import org.elasticsearch.common.time.DateFormatter;
 import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
 import org.elasticsearch.index.mapper.CustomTermFreqField;
 import org.elasticsearch.index.mapper.DateFieldMapper;
@@ -34,6 +36,8 @@ import org.elasticsearch.index.mapper.DocCountFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
 import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
+import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
 import org.elasticsearch.index.mapper.ObjectMapper;
 import org.elasticsearch.index.query.MatchAllQueryBuilder;
 import org.elasticsearch.index.query.MatchQueryBuilder;
@@ -49,6 +53,10 @@ import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.InternalAggregation;
 import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter;
 import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregatorTests;
+import org.elasticsearch.search.aggregations.metrics.InternalMax;
+import org.elasticsearch.search.aggregations.metrics.InternalSum;
+import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
+import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder;
 import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
@@ -56,17 +64,23 @@ import org.elasticsearch.search.internal.ContextIndexSearcherTests.DocumentSubse
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
+import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasEntry;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.lessThan;
 import static org.mockito.Mockito.mock;
 
 public class FiltersAggregatorTests extends AggregatorTestCase {
@@ -101,6 +115,24 @@ public class FiltersAggregatorTests extends AggregatorTestCase {
         directory.close();
     }
 
+    public void testNoFilters() throws IOException {
+        testCase(new FiltersAggregationBuilder("test", new KeyedFilter[0]), new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(List.of());
+        }, (InternalFilters result) -> {
+            assertThat(result.getBuckets(), hasSize(0));
+        });
+    }
+
+    public void testNoFiltersWithSubAggs() throws IOException {
+        testCase(
+            new FiltersAggregationBuilder("test", new KeyedFilter[0]).subAggregation(new MaxAggregationBuilder("m").field("i")),
+            new MatchAllDocsQuery(),
+            iw -> { iw.addDocument(List.of(new SortedNumericDocValuesField("i", 1))); },
+            (InternalFilters result) -> { assertThat(result.getBuckets(), hasSize(0)); },
+            new NumberFieldMapper.NumberFieldType("m", NumberFieldMapper.NumberType.INTEGER)
+        );
+    }
+
     public void testKeyedFilter() throws Exception {
         Directory directory = newDirectory();
         RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
@@ -514,6 +546,217 @@ public class FiltersAggregatorTests extends AggregatorTestCase {
         }, ft);
     }
 
+    public void testSubAggs() throws IOException {
+        MappedFieldType dateFt = new DateFieldMapper.DateFieldType(
+            "test",
+            true,
+            false,
+            false,
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
+            Resolution.MILLISECONDS,
+            null,
+            null
+        );
+        MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER);
+        AggregationBuilder builder = new FiltersAggregationBuilder(
+            "test",
+            new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)),
+            new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false))
+        ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int"));
+        List<List<IndexableField>> docs = new ArrayList<>();
+        docs.add(
+            List.of(
+                new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")),
+                new SortedNumericDocValuesField("int", 100)
+            )
+        );
+        docs.add(
+            List.of(
+                new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")),
+                new SortedNumericDocValuesField("int", 5)
+            )
+        );
+        docs.add(
+            List.of(
+                new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03")),
+                new SortedNumericDocValuesField("int", 10)
+            )
+        );
+         /*
+          * Shuffle the docs so we collect them in a random order which causes
+          * bad implementations of filter-by-filter aggregation to fail with
+          * assertion errors while executing.
+          */
+        Collections.shuffle(docs, random());
+        testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> {
+            InternalFilters filters = (InternalFilters) result;
+            assertThat(filters.getBuckets(), hasSize(2));
+
+            InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); 
+            assertThat(b.getDocCount(), equalTo(1L));
+            InternalMax max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(100.0));
+            InternalSum sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(100.0));
+
+            b = filters.getBucketByKey("q2");
+            assertThat(b.getDocCount(), equalTo(2L));
+            max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(10.0));
+            sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(15.0));
+        }, dateFt, intFt);
+        withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> {
+            assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
+            FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator;
+            int maxDoc = searcher.getIndexReader().maxDoc();
+            assertThat(filterByFilter.estimateCost(maxDoc), equalTo(3L));
+            Map<String, Object> debug = new HashMap<>();
+            filterByFilter.filters().get(0).collectDebugInfo(debug::put);
+            assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1));
+            debug = new HashMap<>();
+            filterByFilter.filters().get(1).collectDebugInfo(debug::put);
+            assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1));
+        }, dateFt, intFt);
+    }
+
+    public void testSubAggsManyDocs() throws IOException {
+        MappedFieldType dateFt = new DateFieldMapper.DateFieldType(
+            "test",
+            true,
+            false,
+            false,
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
+            Resolution.MILLISECONDS,
+            null,
+            null
+        );
+        MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER);
+        AggregationBuilder builder = new FiltersAggregationBuilder(
+            "test",
+            new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)),
+            new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false))
+        ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int"));
+        List<List<IndexableField>> docs = new ArrayList<>();
+        long[] times = new long[] {
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02"),
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02"),
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03"),
+        };
+        for (int i = 0; i < 10000; i++) {
+            docs.add(List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i)));
+        }
+         /*
+          * Shuffle the docs so we collect them in a random order which causes
+          * bad implementations of filter-by-filter aggregation to fail with
+          * assertion errors while executing.
+          */
+        Collections.shuffle(docs, random());
+        testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> {
+            InternalFilters filters = (InternalFilters) result;
+            assertThat(filters.getBuckets(), hasSize(2));
+
+            InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); 
+            assertThat(b.getDocCount(), equalTo(3334L));
+            InternalMax max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(9999.0));
+            InternalSum sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(16668333.0));
+
+            b = filters.getBucketByKey("q2");
+            assertThat(b.getDocCount(), equalTo(6666L));
+            max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(9998.0));
+            sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(33326667.0));
+        }, dateFt, intFt);
+        withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> {
+            assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
+            FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator;
+            int maxDoc = searcher.getIndexReader().maxDoc();
+            assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L)));
+            Map<String, Object> debug = new HashMap<>();
+            filterByFilter.filters().get(0).collectDebugInfo(debug::put);
+            assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1));
+            debug = new HashMap<>();
+            filterByFilter.filters().get(1).collectDebugInfo(debug::put);
+            assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1));
+        }, dateFt, intFt);
+    }
+
+    public void testSubAggsManyFilters() throws IOException {
+        MappedFieldType dateFt = new DateFieldMapper.DateFieldType(
+            "test",
+            true,
+            false,
+            false,
+            DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
+            Resolution.MILLISECONDS,
+            null,
+            null
+        );
+        MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER);
+        List<KeyedFilter> buckets = new ArrayList<>();
+        DateFormatter formatter = DateFormatter.forPattern("strict_date");
+        long start = formatter.parseMillis("2010-01-01");
+        long lastRange = formatter.parseMillis("2020-03-01");
+        while (start < lastRange) {
+            long end = start + TimeUnit.DAYS.toMillis(30);
+            String key = formatter.formatMillis(start) + " to " + formatter.formatMillis(end);
+            buckets.add(new KeyedFilter(key, new RangeQueryBuilder("test").from(start).to(end).includeUpper(false)));
+            start = end;
+        }
+        AggregationBuilder builder = new FiltersAggregationBuilder(
+            "test",
+            buckets.toArray(KeyedFilter[]::new)
+        ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int"));
+        List<List<IndexableField>> docs = new ArrayList<>();
+        long[] times = new long[] {
+            formatter.parseMillis("2010-01-02"),
+            formatter.parseMillis("2020-01-02"),
+            formatter.parseMillis("2020-01-03"), };
+        for (int i = 0; i < 10000; i++) {
+            docs.add(List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i)));
+        }
+         /*
+          * Shuffle the docs so we collect them in a random order which causes
+          * bad implementations of filter-by-filter aggregation to fail with
+          * assertion errors while executing.
+          */
+        Collections.shuffle(docs, random());
+        testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> {
+            InternalFilters filters = (InternalFilters) result;
+            assertThat(filters.getBuckets(), hasSize(buckets.size()));
+
+            InternalFilters.InternalBucket b = filters.getBucketByKey("2010-01-01 to 2010-01-31");
+            assertThat(b.getDocCount(), equalTo(3334L));
+            InternalMax max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(9999.0));
+            InternalSum sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(16668333.0));
+
+            b = filters.getBucketByKey("2019-12-10 to 2020-01-09");
+            assertThat(b.getDocCount(), equalTo(6666L));
+            max = b.getAggregations().get("m");
+            assertThat(max.getValue(), equalTo(9998.0));
+            sum = b.getAggregations().get("s");
+            assertThat(sum.getValue(), equalTo(33326667.0));
+        }, dateFt, intFt);
+        withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> {
+            assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class));
+            FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator;
+            int maxDoc = searcher.getIndexReader().maxDoc();
+            assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L)));
+            for (int b = 0; b < buckets.size(); b++) {
+                Map<String, Object> debug = new HashMap<>();
+                filterByFilter.filters().get(0).collectDebugInfo(debug::put);
+                assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1));
+            }
+        }, dateFt, intFt);
+    }
+
+
+
     @Override
     protected List<ObjectMapper> objectMappers() {
         return MOCK_OBJECT_MAPPERS;

+ 3 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregatorTestCase.java

@@ -7,6 +7,7 @@
  */
 package org.elasticsearch.search.aggregations.bucket.geogrid;
 
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LatLonDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.geo.GeoEncodingUtils;
@@ -24,6 +25,7 @@ import org.elasticsearch.common.geo.GeoBoundingBox;
 import org.elasticsearch.common.geo.GeoBoundingBoxTests;
 import org.elasticsearch.common.geo.GeoUtils;
 import org.elasticsearch.index.mapper.GeoPointFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.AggregationBuilder;
@@ -150,6 +152,7 @@ public abstract class GeoGridAggregatorTestCase<T extends InternalGeoGridBucket>
             docs.add(doc);
             doc.add(new LatLonDocValuesField(FIELD_NAME, latLng[0], latLng[1]));
             doc.add(new SortedSetDocValuesField("t", new BytesRef(t)));
+            doc.add(new Field("t", new BytesRef(t), KeywordFieldMapper.Defaults.FIELD_TYPE));
 
             String hash = hashAsString(latLng[1], latLng[0], precision);
             Map<String, Long> expectedCountPerGeoHash = expectedCountPerTPerGeoHash.get(t);

+ 4 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java

@@ -9,6 +9,7 @@
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
@@ -28,6 +29,7 @@ import org.elasticsearch.common.CheckedBiConsumer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DateFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.search.aggregations.Aggregation;
@@ -272,6 +274,8 @@ public class AutoDateHistogramAggregatorTests extends DateHistogramAggregatorTes
                     new SortedNumericDocValuesField(AGGREGABLE_DATE, d),
                     new SortedSetDocValuesField("k1", aBytes),
                     new SortedSetDocValuesField("k1", d < useC ? bBytes : cBytes),
+                    new Field("k1", aBytes, KeywordFieldMapper.Defaults.FIELD_TYPE),
+                    new Field("k1", d < useC ? bBytes : cBytes, KeywordFieldMapper.Defaults.FIELD_TYPE),
                     new SortedNumericDocValuesField("n", n++)
                 ));
             }

+ 11 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.RandomIndexWriter;
@@ -43,31 +44,41 @@ public abstract class DateHistogramAggregatorTestCase extends AggregatorTestCase
             iw.addDocument(List.of(
                 new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")),
                 new SortedSetDocValuesField("k1", new BytesRef("a")),
+                new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedSetDocValuesField("k2", new BytesRef("a")),
+                new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedNumericDocValuesField("n", 1)
             ));
             iw.addDocument(List.of(
                 new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-03-01T00:00:00Z")),
                 new SortedSetDocValuesField("k1", new BytesRef("a")),
+                new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedSetDocValuesField("k2", new BytesRef("a")),
+                new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedNumericDocValuesField("n", 2)
             ));
             iw.addDocument(List.of(
                 new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-02-01T00:00:00Z")),
                 new SortedSetDocValuesField("k1", new BytesRef("a")),
+                new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedSetDocValuesField("k2", new BytesRef("a")),
+                new Field("k2", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedNumericDocValuesField("n", 3)
             ));
             iw.addDocument(List.of(
                 new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-03-01T00:00:00Z")),
                 new SortedSetDocValuesField("k1", new BytesRef("a")),
+                new Field("k1", new BytesRef("a"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedSetDocValuesField("k2", new BytesRef("b")),
+                new Field("k2", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedNumericDocValuesField("n", 4)
             ));
             iw.addDocument(List.of(
                 new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")),
                 new SortedSetDocValuesField("k1", new BytesRef("b")),
+                new Field("k1", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedSetDocValuesField("k2", new BytesRef("b")),
+                new Field("k2", new BytesRef("b"), KeywordFieldMapper.Defaults.FIELD_TYPE),
                 new SortedNumericDocValuesField("n", 5)
             ));
         };

+ 4 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java

@@ -557,7 +557,11 @@ public class RareTermsAggregatorTests extends AggregatorTestCase {
                     document.add(new SortedNumericDocValuesField(LONG_FIELD, value));
                     document.add(new LongPoint(LONG_FIELD, value));
                     document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(Long.toString(value))));
+                    document.add(new Field(KEYWORD_FIELD, new BytesRef(Long.toString(value)), KeywordFieldMapper.Defaults.FIELD_TYPE));
                     document.add(new SortedSetDocValuesField("even_odd", new BytesRef(value % 2 == 0 ? "even" : "odd")));
+                    document.add(
+                        new Field("even_odd", new BytesRef(value % 2 == 0 ? "even" : "odd"), KeywordFieldMapper.Defaults.FIELD_TYPE)
+                    );
                     indexWriter.addDocument(document);
                     document.clear();
                 }

+ 2 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java

@@ -25,6 +25,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.mapper.BinaryFieldMapper;
 import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MockFieldMapper;
 import org.elasticsearch.index.mapper.TextFieldMapper;
@@ -305,6 +306,7 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase {
                 " }";
             doc.add(new StoredField("_source", new BytesRef(json)));
             doc.add(new SortedSetDocValuesField("kwd", i % 2 == 0 ? new BytesRef("even") : new BytesRef("odd")));
+            doc.add(new Field("kwd", i % 2 == 0 ? new BytesRef("even") : new BytesRef("odd"), KeywordFieldMapper.Defaults.FIELD_TYPE));
             writer.addDocument(doc);
         }
     }

+ 49 - 41
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

@@ -32,6 +32,7 @@ import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.common.CheckedConsumer;
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.geo.GeoPoint;
+import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.network.InetAddresses;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.BigArrays;
@@ -317,9 +318,9 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         List<IndexableField> doc = new ArrayList<IndexableField>();
         for (String v : values) {
             BytesRef bytes = new BytesRef(v);
-            doc.add(new SortedSetDocValuesField("string", bytes));
+            doc.add(new SortedSetDocValuesField(ft.name(), bytes));
             if (ft.isSearchable()) {
-                doc.add(new KeywordField("string", bytes, KeywordFieldMapper.Defaults.FIELD_TYPE));
+                doc.add(new KeywordField(ft.name(), bytes, KeywordFieldMapper.Defaults.FIELD_TYPE));
             }
         }
         return doc;
@@ -645,7 +646,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     }
 
     public void testStringTermsAggregator() throws Exception {
-        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field");
+        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field", randomBoolean(), true, null);
         BiFunction<String, Boolean, List<IndexableField>> luceneFieldFactory = (val, mv) -> {
             List<IndexableField> result = new ArrayList<>(2);
             if (mv) {
@@ -660,8 +661,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         };
         termsAggregator(ValueType.STRING, fieldType, i -> Integer.toString(i),
             String::compareTo, luceneFieldFactory);
-        termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i),
-            val -> new SortedDocValuesField("field", new BytesRef(val)));
+        termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i), val -> luceneFieldFactory.apply(val, false));
     }
 
     public void testLongTermsAggregator() throws Exception {
@@ -675,7 +675,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         MappedFieldType fieldType
             = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.LONG);
         termsAggregator(ValueType.LONG, fieldType, Integer::longValue, Long::compareTo, luceneFieldFactory);
-        termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> new NumericDocValuesField("field", val));
+        termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> luceneFieldFactory.apply(val, false));
     }
 
     public void testDoubleTermsAggregator() throws Exception {
@@ -690,7 +690,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
             = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE);
         termsAggregator(ValueType.DOUBLE, fieldType, Integer::doubleValue, Double::compareTo, luceneFieldFactory);
         termsAggregatorWithNestedMaxAgg(ValueType.DOUBLE, fieldType, Integer::doubleValue,
-            val -> new NumericDocValuesField("field", Double.doubleToRawLongBits(val)));
+            val -> luceneFieldFactory.apply(val, false));
     }
 
     public void testIpTermsAggregator() throws Exception {
@@ -857,7 +857,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
 
     private <T> void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFieldType fieldType,
                                      Function<Integer, T> valueFactory,
-                                     Function<T, IndexableField> luceneFieldFactory) throws Exception {
+                                     Function<T, List<IndexableField>> luceneFieldFactory) throws Exception {
         final Map<T, Long> counts = new HashMap<>();
         int numTerms = scaledRandomIntBetween(8, 128);
         for (int i = 0; i < numTerms; i++) {
@@ -867,8 +867,8 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 for (Map.Entry<T, Long> entry : counts.entrySet()) {
-                    Document document = new Document();
-                    document.add(luceneFieldFactory.apply(entry.getKey()));
+                    List<IndexableField> document = new ArrayList<>();
+                    document.addAll(luceneFieldFactory.apply(entry.getKey()));
                     document.add(new NumericDocValuesField("value", entry.getValue()));
                     indexWriter.addDocument(document);
                 }
@@ -901,7 +901,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
 
                     MappedFieldType fieldType2
                         = new NumberFieldMapper.NumberFieldType("value", NumberFieldMapper.NumberType.LONG);
-                    AggregationContext context = createAggregationContext(indexSearcher, null, fieldType, fieldType2);
+                    AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType, fieldType2);
                     Aggregator aggregator = createAggregator(aggregationBuilder, context);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
@@ -1091,19 +1091,21 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     }
 
     public void testNestedTermsAgg() throws Exception {
+        MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1", randomBoolean(), true, null);
+        MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2", randomBoolean(), true, null);
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
-                Document document = new Document();
-                document.add(new SortedDocValuesField("field1", new BytesRef("a")));
-                document.add(new SortedDocValuesField("field2", new BytesRef("b")));
+                List<IndexableField> document = new ArrayList<>();
+                document.addAll(doc(fieldType1, "a"));
+                document.addAll(doc(fieldType2, "b"));
                 indexWriter.addDocument(document);
-                document = new Document();
-                document.add(new SortedDocValuesField("field1", new BytesRef("c")));
-                document.add(new SortedDocValuesField("field2", new BytesRef("d")));
+                document = new ArrayList<>();
+                document.addAll(doc(fieldType1, "c"));
+                document.addAll(doc(fieldType2, "d"));
                 indexWriter.addDocument(document);
-                document = new Document();
-                document.add(new SortedDocValuesField("field1", new BytesRef("e")));
-                document.add(new SortedDocValuesField("field2", new BytesRef("f")));
+                document = new ArrayList<>();
+                document.addAll(doc(fieldType1, "e"));
+                document.addAll(doc(fieldType2, "f"));
                 indexWriter.addDocument(document);
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);
@@ -1121,10 +1123,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                             .field("field2")
                             .order(BucketOrder.key(true))
                         );
-                    MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1");
-                    MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2");
-
-                    AggregationContext context = createAggregationContext(indexSearcher, null, fieldType1, fieldType2);
+                    AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType1, fieldType2);
                     Aggregator aggregator = createAggregator(aggregationBuilder, context);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
@@ -1309,12 +1308,14 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     }
 
     public void testHeisenpig() throws IOException {
+        MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG);
+        KeywordFieldType animalFieldType = new KeywordFieldType("str", randomBoolean(), true, null);
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 String[] tags = new String[] {"danger", "fluffiness"};
-                indexWriter.addDocuments(generateAnimalDocsWithNested("1", "sheep", tags, new int[] {1, 10}));
-                indexWriter.addDocuments(generateAnimalDocsWithNested("2", "cow", tags, new int[] {3, 1}));
-                indexWriter.addDocuments(generateAnimalDocsWithNested("3", "pig", tags, new int[] {100, 1}));
+                indexWriter.addDocuments(generateAnimalDocsWithNested("1", animalFieldType, "sheep", tags, new int[] {1, 10}));
+                indexWriter.addDocuments(generateAnimalDocsWithNested("2", animalFieldType, "cow", tags, new int[] {3, 1}));
+                indexWriter.addDocuments(generateAnimalDocsWithNested("3", animalFieldType, "pig", tags, new int[] {100, 1}));
                 indexWriter.commit();
                 NestedAggregationBuilder nested = new NestedAggregationBuilder("nested", "nested_object")
                     .subAggregation(
@@ -1326,12 +1327,10 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                     .shardSize(10)
                     .size(10)
                     .order(BucketOrder.aggregation("nested>max_number", false));
-                MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG);
-                MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("str");
                 try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
                     StringTerms result = searchAndReduce(newSearcher(indexReader, false, true),
                         // match root document only
-                        new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), terms, fieldType, nestedFieldType);
+                        Queries.newNonNestedFilter(), terms, animalFieldType, nestedFieldType);
                     assertThat(result.getBuckets().get(0).getKeyAsString(), equalTo("pig"));
                     assertThat(result.getBuckets().get(0).docCount, equalTo(1L));
                     assertThat(((InternalMax) (((InternalNested)result.getBuckets().get(0).getAggregations().get("nested"))
@@ -1404,15 +1403,19 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     }
 
     private void threeLayerStringTestCase(String executionHint) throws IOException {
+        MappedFieldType ift = new KeywordFieldType("i", randomBoolean(), true, null);
+        MappedFieldType jft = new KeywordFieldType("j", randomBoolean(), true, null);
+        MappedFieldType kft = new KeywordFieldType("k", randomBoolean(), true, null);
+
         try (Directory dir = newDirectory()) {
             try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
                 for (int i = 0; i < 10; i++) {
                     for (int j = 0; j < 10; j++) {
                         for (int k = 0; k < 10; k++) {
-                            Document d = new Document();
-                            d.add(new SortedDocValuesField("i", new BytesRef(Integer.toString(i))));
-                            d.add(new SortedDocValuesField("j", new BytesRef(Integer.toString(j))));
-                            d.add(new SortedDocValuesField("k", new BytesRef(Integer.toString(k))));
+                            List<IndexableField> d = new ArrayList<>();
+                            d.addAll(doc(ift, Integer.toString(i)));
+                            d.addAll(doc(jft, Integer.toString(j)));
+                            d.addAll(doc(kft, Integer.toString(k)));
                             writer.addDocument(d);
                         }
                     }
@@ -1422,8 +1425,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                     TermsAggregationBuilder request = new TermsAggregationBuilder("i").field("i").executionHint(executionHint)
                         .subAggregation(new TermsAggregationBuilder("j").field("j").executionHint(executionHint)
                             .subAggregation(new TermsAggregationBuilder("k").field("k").executionHint(executionHint)));
-                    StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request,
-                        keywordField("i"), keywordField("j"), keywordField("k"));
+                    StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request, ift, jft, kft);
                     for (int i = 0; i < 10; i++) {
                         StringTerms.Bucket iBucket = result.getBucketByKey(Integer.toString(i));
                         assertThat(iBucket.getDocCount(), equalTo(100L));
@@ -1707,11 +1709,17 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         return documents;
     }
 
-    private List<Document> generateAnimalDocsWithNested(String id, String animal, String[] tags, int[] nestedValues) {
-        List<Document> documents = new ArrayList<>();
+    private List<List<IndexableField>> generateAnimalDocsWithNested(
+        String id,
+        KeywordFieldType animalFieldType,
+        String animal,
+        String[] tags,
+        int[] nestedValues
+    ) {
+        List<List<IndexableField>> documents = new ArrayList<>();
 
         for (int i = 0; i < tags.length; i++) {
-            Document document = new Document();
+            List<IndexableField> document = new ArrayList<>();
             document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
 
             document.add(new Field(NestedPathFieldMapper.NAME, "nested_object", NestedPathFieldMapper.Defaults.FIELD_TYPE));
@@ -1720,9 +1728,9 @@ public class TermsAggregatorTests extends AggregatorTestCase {
             documents.add(document);
         }
 
-        Document document = new Document();
+        List<IndexableField> document = new ArrayList<>();
         document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedDocValuesField("str", new BytesRef(animal)));
+        document.addAll(doc(animalFieldType, animal));
         document.add(new Field(NestedPathFieldMapper.NAME, "docs", NestedPathFieldMapper.Defaults.FIELD_TYPE));
         document.add(sequenceIDFields.primaryTerm);
         documents.add(document);

+ 1 - 0
x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/RollupResponseTranslationTests.java

@@ -1165,6 +1165,7 @@ public class RollupResponseTranslationTests extends AggregatorTestCase {
     private Document stringValueRollupDoc(String stringValue, long docCount) {
         Document doc = new Document();
         doc.add(new SortedSetDocValuesField("stringfield.terms." + RollupField.VALUE, new BytesRef(stringValue)));
+        doc.add(new Field("stringfield.terms." + RollupField.VALUE, new BytesRef(stringValue), KeywordFieldMapper.Defaults.FIELD_TYPE));
         doc.add(new SortedNumericDocValuesField("stringfield.terms." + RollupField.COUNT_FIELD, docCount));
         return doc;
     }

+ 1 - 1
x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java

@@ -388,7 +388,7 @@ public class GeoLineAggregatorTests extends AggregatorTestCase {
 
         try {
             MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("value_field");
-            MappedFieldType groupFieldType = new KeywordFieldMapper.KeywordFieldType("group_id");
+            MappedFieldType groupFieldType = new KeywordFieldMapper.KeywordFieldType("group_id", false, true, null);
             MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType("sort_field", fieldNumberType);
 
             Terms terms = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), aggregationBuilder,

+ 2 - 1
x-pack/qa/runtime-fields/build.gradle

@@ -72,7 +72,8 @@ subprojects {
           'search.aggregation/10_histogram/*',
           'suggest/50_completion_with_multi_fields/Search by suggestion on geofield-hash on sub field should work',
           // Runtime fields don't have global ords
-          'search.aggregation/20_terms/string profiler via global ordinals',
+          'search.aggregation/20_terms/string profiler via global ordinals filters implementation',
+          'search.aggregation/20_terms/string profiler via global ordinals native implementation',
           'search.aggregation/20_terms/Global ordinals are loaded with the global_ordinals execution hint',
           'search.aggregation/170_cardinality_metric/profiler string',
           /////// NOT SUPPORTED ///////