Browse Source

Add extra profiling information to terms agg (#73636)

I was helping some folks debug an issue with the terms agg and noticed
that we didn't always have the `total_buckets` debug information. I also
noticed that we can't tell how many buckets we build, so I added that
too as `built_buckets`.

Finally, I noticed that when we're using segment ords we count segments
without any values as "multi-valued". We can do better there and count
them as no-valued. That will, mostly, just improve the profiling. When
we collect from global ords we have no way to tell how many values are
on the segment so segments without any values will, sadly, in this case
still be miscounted as multi-valued.
Nik Everett 4 years ago
parent
commit
8904ffe2be

+ 7 - 2
docs/reference/search/profile.asciidoc

@@ -805,7 +805,8 @@ This yields the following aggregation profile output:
             },
             "debug": {
               "total_buckets": 1,
-              "result_strategy": "long_terms"
+              "result_strategy": "long_terms",
+              "built_buckets": 1
             }
           },
           {
@@ -826,6 +827,9 @@ This yields the following aggregation profile output:
               "post_collection": 1584,
               "post_collection_count": 1
             },
+            "debug": {
+              "built_buckets": 1
+            },
             "children": [
               {
                 "type": "NumericTermsAggregator",
@@ -847,7 +851,8 @@ This yields the following aggregation profile output:
                 },
                 "debug": {
                   "total_buckets": 1,
-                  "result_strategy": "long_terms"
+                  "result_strategy": "long_terms",
+                  "built_buckets": 1
                 }
               }
             ]

+ 80 - 61
server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

@@ -8,6 +8,8 @@
 
 package org.elasticsearch.search.profile.aggregation;
 
+import io.github.nik9000.mapmatcher.MapMatcher;
+
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.settings.Settings;
@@ -32,6 +34,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import static io.github.nik9000.mapmatcher.ListMatcher.matchesList;
 import static io.github.nik9000.mapmatcher.MapMatcher.assertMap;
 import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@@ -45,9 +48,6 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSear
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
-import static org.hamcrest.Matchers.hasEntry;
-import static org.hamcrest.Matchers.hasKey;
-import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.SuiteScopeTestCase
@@ -74,6 +74,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
     );
 
     private static final String TOTAL_BUCKETS = "total_buckets";
+    private static final String BUILT_BUCKETS = "built_buckets";
     private static final String DEFERRED = "deferred_aggregators";
     private static final String COLLECTION_STRAT = "collection_strategy";
     private static final String RESULT_STRAT = "result_strategy";
@@ -143,10 +144,10 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(breakdown.get(COLLECT), greaterThan(0L));
             assertThat(breakdown.get(BUILD_AGGREGATION).longValue(), greaterThan(0L));
             assertThat(breakdown.get(REDUCE), equalTo(0L));
-            Map<String, Object> debug = histoAggResult.getDebugInfo();
-            assertThat(debug, notNullValue());
-            assertThat(debug.keySet(), equalTo(Set.of(TOTAL_BUCKETS)));
-            assertThat(((Number) debug.get(TOTAL_BUCKETS)).longValue(), greaterThan(0L));
+            assertMap(
+                histoAggResult.getDebugInfo(),
+                matchesMap().entry(TOTAL_BUCKETS, greaterThan(0L)).entry(BUILT_BUCKETS, greaterThan(0))
+            );
         }
     }
 
@@ -187,11 +188,10 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(histoBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(histoBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(histoBreakdown.get(REDUCE), equalTo(0L));
-            Map<String, Object> histoDebugInfo = histoAggResult.getDebugInfo();
-            assertThat(histoDebugInfo, notNullValue());
-            assertThat(histoDebugInfo.keySet(), equalTo(Set.of(TOTAL_BUCKETS)));
-            assertThat(((Number) histoDebugInfo.get(TOTAL_BUCKETS)).longValue(), greaterThan(0L));
-            assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
+            assertMap(
+                histoAggResult.getDebugInfo(),
+                matchesMap().entry(TOTAL_BUCKETS, greaterThan(0L)).entry(BUILT_BUCKETS, greaterThan(0))
+            );
 
             ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
             assertThat(termsAggResult, notNullValue());
@@ -213,23 +213,33 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(avgAggResult.getQueryName(), equalTo("AvgAggregator"));
             assertThat(avgAggResult.getLuceneDescription(), equalTo("avg"));
             assertThat(avgAggResult.getTime(), greaterThan(0L));
-            Map<String, Long> avgBreakdown = termsAggResult.getTimeBreakdown();
+            Map<String, Long> avgBreakdown = avgAggResult.getTimeBreakdown();
             assertThat(avgBreakdown, notNullValue());
             assertThat(avgBreakdown.keySet(), equalTo(BREAKDOWN_KEYS));
             assertThat(avgBreakdown.get(INITIALIZE), greaterThan(0L));
             assertThat(avgBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(avgBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(avgBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(avgAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(
+                avgAggResult.getDebugInfo(),
+                matchesMap().entry(BUILT_BUCKETS, greaterThan(0))
+            );
             assertThat(avgAggResult.getProfiledChildren().size(), equalTo(0));
         }
     }
 
-    private void assertRemapTermsDebugInfo(ProfileResult termsAggResult) {
-        assertThat(termsAggResult.getDebugInfo(), hasEntry(COLLECTION_STRAT, "remap using many bucket ords"));
-        assertThat(termsAggResult.getDebugInfo(), hasEntry(RESULT_STRAT, "terms"));
-        assertThat(termsAggResult.getDebugInfo(), hasEntry(HAS_FILTER, false));
-        assertThat(termsAggResult.getDebugInfo().toString(), (int) termsAggResult.getDebugInfo().get(SEGMENTS_WITH_SINGLE), greaterThan(0));
+    private void assertRemapTermsDebugInfo(ProfileResult termsAggResult, String... deferredAggregators) {
+        MapMatcher matcher = matchesMap().entry(TOTAL_BUCKETS, greaterThan(0L))
+            .entry(BUILT_BUCKETS, greaterThan(0))
+            .entry(COLLECTION_STRAT, "remap using many bucket ords")
+            .entry(RESULT_STRAT, "terms")
+            .entry(HAS_FILTER, false)
+            .entry(SEGMENTS_WITH_SINGLE, greaterThan(0))
+            .entry(SEGMENTS_WITH_MULTI, 0);
+        if (deferredAggregators.length > 0) {
+            matcher = matcher.entry(DEFERRED, List.of(deferredAggregators));
+        }
+        assertMap(termsAggResult.getDebugInfo(), matcher);
     }
 
     public void testMultiLevelProfileBreadthFirst() {
@@ -261,10 +271,10 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(histoBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(histoBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(histoBreakdown.get(REDUCE), equalTo(0L));
-            Map<String, Object> histoDebugInfo = histoAggResult.getDebugInfo();
-            assertThat(histoDebugInfo, notNullValue());
-            assertThat(histoDebugInfo.keySet(), equalTo(Set.of(TOTAL_BUCKETS)));
-            assertThat(((Number) histoDebugInfo.get(TOTAL_BUCKETS)).longValue(), greaterThan(0L));
+            assertMap(
+                histoAggResult.getDebugInfo(),
+                matchesMap().entry(TOTAL_BUCKETS, greaterThan(0L)).entry(BUILT_BUCKETS, greaterThan(0))
+            );
             assertThat(histoAggResult.getProfiledChildren().size(), equalTo(1));
 
             ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
@@ -279,7 +289,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(termsBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(termsBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(termsBreakdown.get(REDUCE), equalTo(0L));
-            assertRemapTermsDebugInfo(termsAggResult);
+            assertRemapTermsDebugInfo(termsAggResult, "avg");
             assertThat(termsAggResult.getProfiledChildren().size(), equalTo(1));
 
             ProfileResult avgAggResult = termsAggResult.getProfiledChildren().get(0);
@@ -294,7 +304,10 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(avgBreakdown.get(COLLECT), greaterThan(0L));
             assertThat(avgBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(avgBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(avgAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(
+                avgAggResult.getDebugInfo(),
+                matchesMap().entry(BUILT_BUCKETS, greaterThan(0))
+            );
             assertThat(avgAggResult.getProfiledChildren().size(), equalTo(0));
         }
     }
@@ -330,8 +343,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(diversifyBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(diversifyBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(diversifyBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(diversifyAggResult.getDebugInfo(), equalTo(Map.of(DEFERRED, List.of("max"))));
-            assertThat(diversifyAggResult.getProfiledChildren().size(), equalTo(1));
+            assertMap(diversifyAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)).entry(DEFERRED, List.of("max")));
 
             ProfileResult maxAggResult = diversifyAggResult.getProfiledChildren().get(0);
             assertThat(maxAggResult, notNullValue());
@@ -347,7 +359,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(diversifyBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(maxBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(maxBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(maxAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(maxAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(maxAggResult.getProfiledChildren().size(), equalTo(0));
         }
     }
@@ -391,10 +403,10 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(histoBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(histoBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(histoBreakdown.get(REDUCE), equalTo(0L));
-            Map<String, Object> histoDebugInfo = histoAggResult.getDebugInfo();
-            assertThat(histoDebugInfo, notNullValue());
-            assertThat(histoDebugInfo.keySet(), equalTo(Set.of(TOTAL_BUCKETS)));
-            assertThat(((Number) histoDebugInfo.get(TOTAL_BUCKETS)).longValue(), greaterThan(0L));
+            assertMap(
+                histoAggResult.getDebugInfo(),
+                matchesMap().entry(TOTAL_BUCKETS, greaterThan(0L)).entry(BUILT_BUCKETS, greaterThan(0))
+            );
             assertThat(histoAggResult.getProfiledChildren().size(), equalTo(2));
 
             Map<String, ProfileResult> histoAggResultSubAggregations = histoAggResult.getProfiledChildren().stream()
@@ -432,7 +444,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(avgBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(avgBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(avgBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(avgAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(avgAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(avgAggResult.getProfiledChildren().size(), equalTo(0));
 
             ProfileResult maxAggResult = tagsAggResultSubAggregations.get("max");
@@ -448,7 +460,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(maxBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(maxBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(maxBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(maxAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(maxAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(maxAggResult.getProfiledChildren().size(), equalTo(0));
 
             ProfileResult stringsAggResult = histoAggResultSubAggregations.get("strings");
@@ -483,7 +495,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(avgBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(avgBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(avgBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(avgAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(avgAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(avgAggResult.getProfiledChildren().size(), equalTo(0));
 
             maxAggResult = stringsAggResultSubAggregations.get("max");
@@ -499,7 +511,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(maxBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(maxBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(maxBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(maxAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(maxAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(maxAggResult.getProfiledChildren().size(), equalTo(0));
 
             tagsAggResult = stringsAggResultSubAggregations.get("tags");
@@ -535,7 +547,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(avgBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(avgBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(avgBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(avgAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(avgAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(avgAggResult.getProfiledChildren().size(), equalTo(0));
 
             maxAggResult = tagsAggResultSubAggregations.get("max");
@@ -551,7 +563,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(maxBreakdown.get(POST_COLLECTION), greaterThan(0L));
             assertThat(maxBreakdown.get(BUILD_AGGREGATION), greaterThan(0L));
             assertThat(maxBreakdown.get(REDUCE), equalTo(0L));
-            assertThat(maxAggResult.getDebugInfo(), equalTo(Map.of()));
+            assertMap(maxAggResult.getDebugInfo(), matchesMap().entry(BUILT_BUCKETS, greaterThan(0)));
             assertThat(maxAggResult.getProfiledChildren().size(), equalTo(0));
         }
     }
@@ -623,30 +635,36 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(breakdown.get(COLLECT), equalTo(0L));
             assertThat(breakdown.get(BUILD_AGGREGATION).longValue(), greaterThan(0L));
             assertThat(breakdown.get(REDUCE), equalTo(0L));
-            Map<String, Object> debug = histoAggResult.getDebugInfo();
-            assertThat(debug, notNullValue());
-            assertThat(debug.keySet(), equalTo(Set.of("delegate", "delegate_debug")));
-            assertThat(debug.get("delegate"), equalTo("RangeAggregator.FromFilters"));
-            Map<?, ?> delegate = (Map<?, ?>) debug.get("delegate_debug");
-            assertThat(delegate.keySet(), equalTo(Set.of("average_docs_per_range", "ranges", "delegate", "delegate_debug")));
-            assertThat(
-                ((Number) delegate.get("average_docs_per_range")).doubleValue(),
-                equalTo(RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
+            assertMap(
+                histoAggResult.getDebugInfo(),
+                matchesMap().entry(BUILT_BUCKETS, greaterThan(0))
+                    .entry("delegate", "RangeAggregator.FromFilters")
+                    .entry(
+                        "delegate_debug",
+                        matchesMap().entry("average_docs_per_range", equalTo(RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2))
+                            .entry("ranges", 1)
+                            .entry("delegate", "FiltersAggregator.FilterByFilter")
+                            .entry(
+                                "delegate_debug",
+                                matchesMap().entry("segments_with_deleted_docs", 0)
+                                    .entry("segments_with_doc_count_field", 0)
+                                    .entry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
+                                    .entry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2)
+                                    .entry("estimate_cost_time", greaterThanOrEqualTo(0L)) // ~1,276,734 nanos is normal
+                                    .entry("segments_counted", 0)
+                                    .entry("segments_collected", greaterThan(0))
+                                    .entry(
+                                        "filters",
+                                        matchesList().item(
+                                            matchesMap().entry("scorers_prepared_while_estimating_cost", greaterThan(0))
+                                                .entry("query", "DocValuesFieldExistsQuery [field=date]")
+                                                .entry("specialized_for", "docvalues_field_exists")
+                                                .entry("results_from_metadata", 0)
+                                        )
+                                    )
+                            )
+                    )
             );
-            assertThat(((Number) delegate.get("ranges")).longValue(), equalTo(1L));
-            assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
-            Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
-            assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
-            assertThat(delegateDebug, hasEntry("segments_with_doc_count_field", 0));
-            assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
-            assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
-            assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L));  // ~1,276,734 nanos is normal
-            List<?> filtersDebug = (List<?>) delegateDebug.get("filters");
-            assertThat(filtersDebug, hasSize(1));
-            Map<?, ?> queryDebug = (Map<?, ?>) filtersDebug.get(0);
-            assertThat(queryDebug, hasKey("scorers_prepared_while_estimating_cost"));
-            assertThat((int) queryDebug.get("scorers_prepared_while_estimating_cost"), greaterThan(0));
-            assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
         }
     }
 
@@ -721,6 +739,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
                 assertMap(
                     debug,
                     matchesMap().entry("delegate", "RangeAggregator.NoOverlap")
+                        .entry("built_buckets", 1)
                         .entry("delegate_debug", matchesMap().entry("ranges", 1).entry("average_docs_per_range", 10000.0))
                 );
             }

+ 20 - 8
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

@@ -191,7 +191,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
     public void collectDebugInfo(BiConsumer<String, Object> add) {
         super.collectDebugInfo(add);
         add.accept("collection_strategy", collectionStrategy.describe());
-        collectionStrategy.collectDebugInfo(add);
+        add.accept("total_buckets", collectionStrategy.totalBuckets());
         add.accept("result_strategy", resultStrategy.describe());
         add.accept("segments_with_single_valued_ords", segmentsWithSingleValuedOrds);
         add.accept("segments_with_multi_valued_ords", segmentsWithMultiValuedOrds);
@@ -258,6 +258,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
 
         private LongUnaryOperator mapping;
         private LongArray segmentDocCounts;
+        protected int segmentsWithoutValues = 0;
 
         LowCardinality(
             String name,
@@ -303,10 +304,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
                 mapSegmentCountsToGlobalCounts(mapping);
             }
             final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
+            mapping = valuesSource.globalOrdinalsMapping(ctx);
+            if (segmentOrds.getValueCount() == 0) {
+                segmentsWithoutValues++;
+                return LeafBucketCollector.NO_OP_COLLECTOR;
+            }
             segmentDocCounts = bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub.isNoop();
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
-            mapping = valuesSource.globalOrdinalsMapping(ctx);
             // Dense mode doesn't support include/exclude so we don't have to check it here.
             if (singleValues != null) {
                 segmentsWithSingleValuedOrds++;
@@ -347,6 +352,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
             }
         }
 
+        @Override
+        public void collectDebugInfo(BiConsumer<String, Object> add) {
+            super.collectDebugInfo(add);
+            add.accept("segments_without_values", segmentsWithoutValues);
+        }
+
         @Override
         protected void doClose() {
             Releasables.close(resultStrategy, segmentDocCounts, collectionStrategy);
@@ -383,10 +394,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
          */
         abstract String describe();
         /**
-         * Collect debug information to add to the profiling results. This will
-         * only be called if the aggregation is being profiled.
+         * The total number of buckets collected by this strategy.
          */
-        abstract void collectDebugInfo(BiConsumer<String, Object> add);
+        abstract long totalBuckets();
         /**
          * Called when the global ordinals are ready.
          */
@@ -431,7 +441,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         }
 
         @Override
-        void collectDebugInfo(BiConsumer<String, Object> add) {}
+        long totalBuckets() {
+            return valueCount;
+        }
 
         @Override
         void globalOrdsReady(SortedSetDocValues globalOrds) {
@@ -487,8 +499,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         }
 
         @Override
-        void collectDebugInfo(BiConsumer<String, Object> add) {
-            add.accept("total_buckets", bucketOrds.size());
+        long totalBuckets() {
+            return bucketOrds.size();
         }
 
         @Override

+ 2 - 1
server/src/main/java/org/elasticsearch/search/profile/aggregation/AggregationProfileBreakdown.java

@@ -29,7 +29,8 @@ public class AggregationProfileBreakdown extends AbstractProfileBreakdown<Aggreg
      * Add extra debugging information about the aggregation.
      */
     public void addDebugInfo(String key, Object value) {
-        extra.put(key, value);
+        Object old = extra.put(key, value);
+        assert old == null : "debug info duplicate key [" + key + "] was [" + old + "] is [" + value + "]";
     }
 
     @Override

+ 5 - 2
server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java

@@ -69,13 +69,16 @@ public class ProfilingAggregator extends Aggregator {
     @Override
     public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
         Timer timer = profileBreakdown.getTimer(AggregationTimingType.BUILD_AGGREGATION);
+        InternalAggregation[] result;
         timer.start();
         try {
-            return delegate.buildAggregations(owningBucketOrds);
+            result = delegate.buildAggregations(owningBucketOrds);
         } finally {
             timer.stop();
-            delegate.collectDebugInfo(profileBreakdown::addDebugInfo);
         }
+        profileBreakdown.addDebugInfo("built_buckets", result.length);
+        delegate.collectDebugInfo(profileBreakdown::addDebugInfo);
+        return result;
     }
 
     @Override

+ 83 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

@@ -7,6 +7,8 @@
  */
 package org.elasticsearch.search.aggregations.bucket.terms;
 
+import io.github.nik9000.mapmatcher.MapMatcher;
+
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -124,6 +126,8 @@ import java.util.function.BiFunction;
 import java.util.function.Consumer;
 import java.util.function.Function;
 
+import static io.github.nik9000.mapmatcher.MapMatcher.assertMap;
+import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap;
 import static java.util.Collections.singleton;
 import static java.util.stream.Collectors.toList;
 import static org.elasticsearch.index.mapper.SeqNoFieldMapper.PRIMARY_TERM_NAME;
@@ -132,8 +136,10 @@ import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.b
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasEntry;
 import static org.hamcrest.Matchers.hasKey;
+import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
 
@@ -1424,6 +1430,83 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         ));
     }
 
+    public void topLevelProfileTestCase(
+        int count,
+        int extra,
+        IncludeExclude includeExclude,
+        Class<? extends Aggregator> expectedImpl,
+        Function<MapMatcher, MapMatcher> extraMatcher
+    ) throws IOException {
+        randomizeAggregatorImpl = false;
+        KeywordFieldType strFt = new KeywordFieldType("str", false, true, Collections.emptyMap());
+        AggregationBuilder builder = new TermsAggregationBuilder("str").field("str").includeExclude(includeExclude);
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            for (int i = 0; i < count; i++) {
+                iw.addDocument(List.of(new SortedDocValuesField("str", new BytesRef(Integer.toString(i)))));
+            }
+            for (int i = 0; i < extra; i++) {
+                iw.addDocument(List.of());
+            }
+        };
+        debugTestCase(
+            builder,
+            new MatchAllDocsQuery(),
+            buildIndex,
+            (StringTerms result, Class<? extends Aggregator> impl, Map<String, Map<String, Object>> debug) -> {
+                assertThat(impl, equalTo(expectedImpl));
+                assertThat(result.getBuckets(), hasSize(10));
+                assertMap(
+                    debug,
+                    matchesMap().entry(
+                        "str",
+                        extraMatcher.apply(
+                            matchesMap()
+                                .entry("result_strategy", "terms")
+                                .entry("total_buckets", (long) count)
+                                .entry("segments_with_single_valued_ords", greaterThan(0))
+                                .entry("segments_with_multi_valued_ords", 0)
+                        )
+                    )
+                );
+            },
+            strFt
+        );
+    }
+
+    public void testDenseProfile() throws IOException {
+        topLevelProfileTestCase(
+            between(3000, 4000),
+            0,
+            null,
+            GlobalOrdinalsStringTermsAggregator.class,
+            m -> m.entry("has_filter", false).entry("collection_strategy", "dense")
+        );
+    }
+
+    public void testRemapProfile() throws IOException {
+        topLevelProfileTestCase(
+            between(3000, 4000),
+            0,
+            new IncludeExclude(null, "missing"),
+            GlobalOrdinalsStringTermsAggregator.class,
+            m -> m.entry("has_filter", true).entry("collection_strategy", "remap using single bucket ords")
+        );
+    }
+
+    public void testLowCardinalityProfile() throws IOException {
+        int count = between(1000, 2000);
+        int extra = count * between(3, 5);
+        topLevelProfileTestCase(
+            count,
+            extra,
+            null,
+            GlobalOrdinalsStringTermsAggregator.LowCardinality.class,
+            m -> m.entry("has_filter", false)
+                .entry("collection_strategy", "dense")
+                .entry("segments_without_values", greaterThanOrEqualTo(0))
+        );
+    }
+
     public void testNumberToStringValueScript() throws IOException {
         MappedFieldType fieldType
             = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.INTEGER);

+ 7 - 5
test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java

@@ -41,14 +41,11 @@ import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.CheckedBiConsumer;
-import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.common.TriConsumer;
 import org.elasticsearch.common.TriFunction;
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.core.Releasable;
-import org.elasticsearch.core.Releasables;
 import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
 import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.common.settings.Settings;
@@ -56,6 +53,9 @@ import org.elasticsearch.common.util.MockBigArrays;
 import org.elasticsearch.common.util.MockPageCacheRecycler;
 import org.elasticsearch.common.xcontent.ContextParser;
 import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
@@ -602,7 +602,6 @@ public abstract class AggregatorTestCase extends ESTestCase {
         MappedFieldType... fieldTypes
     ) throws IOException {
         // Don't use searchAndReduce because we only want a single aggregator.
-        IndexReaderContext ctx = searcher.getTopReaderContext();
         CircuitBreakerService breakerService = new NoneCircuitBreakerService();
         AggregationContext context = createAggregationContext(
             searcher,
@@ -637,7 +636,10 @@ public abstract class AggregatorTestCase extends ESTestCase {
 
     private void collectDebugInfo(String prefix, Aggregator aggregator, Map<String, Map<String, Object>> allDebug) {
         Map<String, Object> debug = new HashMap<>();
-        aggregator.collectDebugInfo(debug::put);
+        aggregator.collectDebugInfo((key, value) -> {
+            Object old = debug.put(key, value);
+            assertNull("debug info duplicate key [" + key + "] was [" + old + "] is [" + value + "]", old);
+        });
         allDebug.put(prefix + aggregator.name(), debug);
         for (Aggregator sub : aggregator.subAggregators()) {
             collectDebugInfo(aggregator.name() + ".", sub, allDebug);