Ver Fonte

Modest memory savings in date_histogram>terms (#68592)

This saves 16 bytes of memory per bucket for some aggregations.
Specifically, it kicks in when there is a parent bucket and we have a good
estimate on its upper bound cardinality, and we have good estimate on the
per-bucket cardinality of this aggregation, and both those upper bounds
will fit into a single `long`.

That sounds unlikely, but there is a fairly common case where we have it:
a `date_histogram` followed by a `terms` aggregation powered by global
ordinals. This is common enough that we already had at least two rally
operations for it:
* `date-histo-string-terms-via-global-ords`
* `filtered-date-histo-string-terms-via-global-ords`

Running those rally tracks shows that the space savings yields a small
but statistically significant perform bump. The 90th percentile service
time drops by about 4% in the unfiltered case and 1% for the filtered
case. That's not great but it good to know saving 16 bytes doesn't slow
us down.

```
|       50th percentile latency |          date-histo | 3185.77 | 3028.65 | -157.118 | ms |
|       90th percentile latency |          date-histo | 3237.07 | 3101.32 | -135.752 | ms |
|      100th percentile latency |          date-histo | 3270.53 |  3178.7 | -91.8319 | ms |
|  50th percentile service time |          date-histo | 3181.55 | 3024.32 | -157.238 | ms |
|  90th percentile service time |          date-histo | 3232.91 | 3097.67 | -135.238 | ms |
| 100th percentile service time |          date-histo | 3266.63 | 3175.08 | -91.5494 | ms |
|       50th percentile latency | filtered-date-histo | 1349.22 | 1331.94 | -17.2717 | ms |
|       90th percentile latency | filtered-date-histo | 1402.71 |  1383.7 | -19.0131 | ms |
|      100th percentile latency | filtered-date-histo | 1412.41 |  1397.7 | -14.7139 | ms |
|  50th percentile service time | filtered-date-histo | 1345.18 |  1326.2 | -18.9806 | ms |
|  90th percentile service time | filtered-date-histo | 1397.24 | 1378.14 | -19.1031 | ms |
| 100th percentile service time | filtered-date-histo | 1406.69 | 1391.63 | -15.0529 | ms |
```

The microbenchmarks for `LongKeyedBucketOrds`, the interface we're
targeting, show a performance boost on the method in the path of about
13%. This is obvious not the entire hot path, given that th 13% savings
translated to a 4% performance savings over the whole agg. But its
something.

```
Benchmark                                            Mode  Cnt   Score   Error  Units
multiBucketMany                                      avgt    5  10.038 ± 0.009  ns/op
multiBucketManySmall                                 avgt    5   8.738 ± 0.029  ns/op
singleBucketIntoMulti                                avgt    5   7.701 ± 0.073  ns/op
singleBucketIntoSingleImmutableBimorphicInvocation   avgt    5   6.160 ± 0.029  ns/op
singleBucketIntoSingleImmutableMonmorphicInvocation  avgt    5   6.571 ± 0.043  ns/op
singleBucketIntoSingleMutableBimorphicInvocation     avgt    5   7.714 ± 0.010  ns/op
singleBucketIntoSingleMutableMonmorphicInvocation    avgt    5   7.459 ± 0.017  ns/op
```

While I was touching the JMH benchmarks for `LongKeyedBucketOrds` I took
the opportunity to try and make the runs that collect from a single
bucket more comparable to the ones that collect from many buckets. It
only seemed fair.
Nik Everett há 4 anos atrás
pai
commit
9545dafdd5

+ 72 - 15
benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/bucket/terms/LongKeyedBucketOrdsBenchmark.java

@@ -41,14 +41,18 @@ public class LongKeyedBucketOrdsBenchmark {
     /**
      * The number of distinct values to add to the buckets.
      */
-    private static final long DISTINCT_VALUES = 10;
+    private static final long DISTINCT_VALUES = 210;
     /**
      * The number of buckets to create in the {@link #multiBucket} case.
      * <p>
-     * If this is not relatively prime to {@link #DISTINCT_VALUES} then the
-     * values won't be scattered evenly across the buckets.
+     * If this is not relatively prime to {@link #DISTINCT_VALUES_IN_BUCKETS}
+     * then the values won't be scattered evenly across the buckets.
      */
     private static final long DISTINCT_BUCKETS = 21;
+    /**
+     * Number of distinct values to add to values within buckets.
+     */
+    private static final long DISTINCT_VALUES_IN_BUCKETS = 10;
 
     private final PageCacheRecycler recycler = new PageCacheRecycler(Settings.EMPTY);
     private final BigArrays bigArrays = new BigArrays(recycler, null, "REQUEST");
@@ -63,6 +67,7 @@ public class LongKeyedBucketOrdsBenchmark {
     public void forceLoadClasses(Blackhole bh) {
         bh.consume(LongKeyedBucketOrds.FromSingle.class);
         bh.consume(LongKeyedBucketOrds.FromMany.class);
+        bh.consume(LongKeyedBucketOrds.FromManySmall.class);
     }
 
     /**
@@ -75,6 +80,9 @@ public class LongKeyedBucketOrdsBenchmark {
             for (long i = 0; i < LIMIT; i++) {
                 ords.add(0, i % DISTINCT_VALUES);
             }
+            if (ords.size() != DISTINCT_VALUES) {
+                throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
+            }
             bh.consume(ords);
         }
     }
@@ -83,11 +91,14 @@ public class LongKeyedBucketOrdsBenchmark {
      * Emulates the way that most aggregations use {@link LongKeyedBucketOrds}.
      */
     @Benchmark
-    public void singleBucketIntoSingleImmutableBimorphicInvocation(Blackhole bh) {
+    public void singleBucketIntoSingleImmutableMegamorphicInvocation(Blackhole bh) {
         try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE)) {
             for (long i = 0; i < LIMIT; i++) {
                 ords.add(0, i % DISTINCT_VALUES);
             }
+            if (ords.size() != DISTINCT_VALUES) {
+                throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
+            }
             bh.consume(ords);
         }
     }
@@ -106,6 +117,10 @@ public class LongKeyedBucketOrdsBenchmark {
             }
             ords.add(0, i % DISTINCT_VALUES);
         }
+        if (ords.size() != DISTINCT_VALUES) {
+            ords.close();
+            throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
+        }
         bh.consume(ords);
         ords.close();
     }
@@ -116,7 +131,7 @@ public class LongKeyedBucketOrdsBenchmark {
      * {@link #singleBucketIntoSingleMutableMonmorphicInvocation monomorphic invocation}.
      */
     @Benchmark
-    public void singleBucketIntoSingleMutableBimorphicInvocation(Blackhole bh) {
+    public void singleBucketIntoSingleMutableMegamorphicInvocation(Blackhole bh) {
         LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
         for (long i = 0; i < LIMIT; i++) {
             if (i % 100_000 == 0) {
@@ -125,7 +140,9 @@ public class LongKeyedBucketOrdsBenchmark {
                 ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
             }
             ords.add(0, i % DISTINCT_VALUES);
-
+        }
+        if (ords.size() != DISTINCT_VALUES) {
+            throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
         }
         bh.consume(ords);
         ords.close();
@@ -134,28 +151,68 @@ public class LongKeyedBucketOrdsBenchmark {
     /**
      * Emulates an aggregation that collects from a single bucket "by accident".
      * This can happen if an aggregation is under, say, a {@code terms}
-     * aggregation and there is only a single value for that term in the index.
+     * aggregation and there is only a single value for that term in the index
+     * but we can't tell that up front.
      */
     @Benchmark
     public void singleBucketIntoMulti(Blackhole bh) {
         try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
-            for (long i = 0; i < LIMIT; i++) {
-                ords.add(0, i % DISTINCT_VALUES);
-            }
+            singleBucketIntoMultiSmall(ords);
             bh.consume(ords);
         }
     }
 
+    /**
+     * Emulates an aggregation that collects from a single bucket "by accident"
+     * and gets a "small" bucket ords. This can happen to a {@code terms} inside
+     * of another {@code terms} when the "inner" terms only even has a single
+     * bucket.
+     */
+    @Benchmark
+    public void singleBucketIntoMultiSmall(Blackhole bh) {
+        try (LongKeyedBucketOrds ords = new LongKeyedBucketOrds.FromManySmall(bigArrays, 60)) {
+            singleBucketIntoMultiSmall(ords);
+            bh.consume(ords);
+        }
+    }
+
+    private void singleBucketIntoMultiSmall(LongKeyedBucketOrds ords) {
+        for (long i = 0; i < LIMIT; i++) {
+            ords.add(0, i % DISTINCT_VALUES);
+        }
+        if (ords.size() != DISTINCT_VALUES) {
+            throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
+        }
+    }
+
+    /**
+     * Emulates an aggregation that collects from many buckets with a known
+     * bounds on the values.
+     */
+    @Benchmark
+    public void multiBucketManySmall(Blackhole bh) {
+        try (LongKeyedBucketOrds ords = new LongKeyedBucketOrds.FromManySmall(bigArrays, 5)) {
+            multiBucket(bh, ords);
+        }
+    }
+
     /**
      * Emulates an aggregation that collects from many buckets.
      */
     @Benchmark
-    public void multiBucket(Blackhole bh) {
+    public void multiBucketMany(Blackhole bh) {
         try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
-            for (long i = 0; i < LIMIT; i++) {
-                ords.add(i % DISTINCT_BUCKETS, i % DISTINCT_VALUES);
-            }
-            bh.consume(ords);
+            multiBucket(bh, ords);
         }
     }
+
+    private void multiBucket(Blackhole bh, LongKeyedBucketOrds ords) {
+        for (long i = 0; i < LIMIT; i++) {
+            ords.add(i % DISTINCT_BUCKETS, i % DISTINCT_VALUES_IN_BUCKETS);
+        }
+        if (ords.size() != DISTINCT_VALUES) {
+            throw new IllegalArgumentException("Expected [" + DISTINCT_VALUES + "] but found [" + ords.size() + "]");
+        }
+        bh.consume(ords);
+    }
 }

+ 1 - 1
server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

@@ -219,7 +219,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
     }
 
     private void assertRemapTermsDebugInfo(ProfileResult termsAggResult) {
-        assertThat(termsAggResult.getDebugInfo(), hasEntry(COLLECTION_STRAT, "remap"));
+        assertThat(termsAggResult.getDebugInfo(), hasEntry(COLLECTION_STRAT, "remap using many bucket ords"));
         assertThat(termsAggResult.getDebugInfo(), hasEntry(RESULT_STRAT, "terms"));
         assertThat(termsAggResult.getDebugInfo(), hasEntry(HAS_FILTER, false));
         // TODO we only index single valued docs but the ordinals ends up with multi valued sometimes

+ 2 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

@@ -464,12 +464,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         private final LongKeyedBucketOrds bucketOrds;
 
         private RemapGlobalOrds(CardinalityUpperBound cardinality) {
-            bucketOrds = LongKeyedBucketOrds.build(bigArrays(), cardinality);
+            bucketOrds = LongKeyedBucketOrds.buildForValueRange(bigArrays(), cardinality, 0, valueCount - 1);
         }
 
         @Override
         String describe() {
-            return "remap";
+            return "remap using " + bucketOrds.decribe();
         }
 
         @Override

+ 177 - 3
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java

@@ -14,17 +14,41 @@ import org.elasticsearch.common.util.LongHash;
 import org.elasticsearch.common.util.LongLongHash;
 import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 
+import java.util.Locale;
+
 /**
- * Maps long bucket keys to bucket ordinals.
+ * Maps owning bucket ordinals and long bucket keys to bucket ordinals.
  */
 public abstract class LongKeyedBucketOrds implements Releasable {
     /**
-     * Build a {@link LongKeyedBucketOrds}.
+     * Build a {@link LongKeyedBucketOrds} who's values have unknown bounds.
      */
     public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
         return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
     }
 
+    /**
+     * Build a {@link LongKeyedBucketOrds} who's values have known bounds.
+     */
+    public static LongKeyedBucketOrds buildForValueRange(BigArrays bigArrays, CardinalityUpperBound cardinality, long min, long max) {
+        return cardinality.map((int cardinalityUpperBound) -> {
+            if (cardinalityUpperBound < 2) {
+                return new FromSingle(bigArrays);
+            }
+            if (min < 0 || cardinalityUpperBound == Integer.MAX_VALUE) {
+                // cardinalityUpperBound tops out at maxint. If you see maxInt it could be anything above maxint.
+                return new FromMany(bigArrays);
+            }
+            int owningBucketOrdShift = Long.numberOfLeadingZeros(cardinalityUpperBound);
+            int maxBits = 64 - Long.numberOfLeadingZeros(max);
+            if (maxBits < owningBucketOrdShift) {
+                // There is enough space in a long to contain both the owning bucket and the entire range of values
+                return new FromManySmall(bigArrays, owningBucketOrdShift);
+            }
+            return new FromMany(bigArrays);
+        });
+    }
+
     private LongKeyedBucketOrds() {}
 
     /**
@@ -49,7 +73,7 @@ public abstract class LongKeyedBucketOrds implements Releasable {
    public abstract long find(long owningBucketOrd, long value);
 
     /**
-     * Returns the value currently associated with the bucket ordinal
+     * Returns the value currently associated with the bucket ordinal.
      */
     public abstract long get(long ordinal);
 
@@ -63,6 +87,11 @@ public abstract class LongKeyedBucketOrds implements Releasable {
      */
     public abstract long maxOwningBucketOrd();
 
+    /**
+     * Description used in profile results.
+     */
+    public abstract String decribe();
+
     /**
      * Build an iterator for buckets inside {@code owningBucketOrd} in order
      * of increasing ord.
@@ -148,6 +177,11 @@ public abstract class LongKeyedBucketOrds implements Releasable {
             return 0;
         }
 
+        @Override
+        public String decribe() {
+            return "single bucket ords";
+        }
+
         @Override
         public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
             assert owningBucketOrd == 0;
@@ -236,6 +270,11 @@ public abstract class LongKeyedBucketOrds implements Releasable {
             return max;
         }
 
+        @Override
+        public String decribe() {
+            return "many bucket ords";
+        }
+
         @Override
         public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
             // TODO it'd be faster to iterate many ords at once rather than one at a time
@@ -274,4 +313,139 @@ public abstract class LongKeyedBucketOrds implements Releasable {
             ords.close();
         }
     }
+
+    /**
+     * Implementation that packs the {@code owningbucketOrd} into the top
+     * bits of a {@code long} and uses the bottom bits for the value.
+     */
+    public static class FromManySmall extends LongKeyedBucketOrds {
+        private final LongHash ords;
+        private final int owningBucketOrdShift;
+        private final long owningBucketOrdMask;
+
+        public FromManySmall(BigArrays bigArrays, int owningBucketOrdShift) {
+            ords = new LongHash(2, bigArrays);
+            this.owningBucketOrdShift = owningBucketOrdShift;
+            this.owningBucketOrdMask = -1L << owningBucketOrdShift;
+        }
+
+        private long encode(long owningBucketOrd, long value) {
+            // This is in the critical path for collecting some aggs. Be careful of performance.
+            return (owningBucketOrd << owningBucketOrdShift) | value;
+        }
+
+        @Override
+        public long add(long owningBucketOrd, long value) {
+            // This is in the critical path for collecting lots of aggs. Be careful of performance.
+            long enc = encode(owningBucketOrd, value);
+            if (owningBucketOrd != (enc >>> owningBucketOrdShift) && (enc & ~owningBucketOrdMask) != value) {
+                throw new IllegalArgumentException(
+                    String.format(
+                        Locale.ROOT,
+                        "[%s] and [%s] must fit in [%s..%s] bits",
+                        owningBucketOrd,
+                        value,
+                        64 - owningBucketOrdShift,
+                        owningBucketOrdShift
+                    )
+                );
+            }
+            return ords.add(enc);
+        }
+
+        @Override
+        public long find(long owningBucketOrd, long value) {
+            if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
+                return -1;
+            }
+            if ((value & owningBucketOrdMask) != 0) {
+                return -1;
+            }
+            return ords.find(encode(owningBucketOrd, value));
+        }
+
+        @Override
+        public long get(long ordinal) {
+            return ords.get(ordinal) & ~owningBucketOrdMask;
+        }
+
+        @Override
+        public long bucketsInOrd(long owningBucketOrd) {
+            // TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time
+            if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
+                return 0;
+            }
+            long count = 0;
+            long enc = owningBucketOrd << owningBucketOrdShift;
+            for (long i = 0; i < ords.size(); i++) {
+                if ((ords.get(i) & owningBucketOrdMask) == enc) {
+                    count++;
+                }
+            }
+            return count;
+        }
+
+        @Override
+        public long size() {
+            return ords.size();
+        }
+
+        @Override
+        public long maxOwningBucketOrd() {
+            // TODO this is fairly expensive to compute. Can we avoid needing it?
+            long max = -1;
+            for (long i = 0; i < ords.size(); i++) {
+                max = Math.max(max, (ords.get(i) & owningBucketOrdMask) >>> owningBucketOrdShift);
+            }
+            return max;
+        }
+
+        @Override
+        public String decribe() {
+            return "many bucket ords packed using [" + (64 - owningBucketOrdShift) + "/" + owningBucketOrdShift + "] bits";
+        }
+
+        @Override
+        public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
+            // TODO it'd be faster to iterate many ords at once rather than one at a time
+            if (Long.numberOfLeadingZeros(owningBucketOrd) < owningBucketOrdShift) {
+                return BucketOrdsEnum.EMPTY;
+            }
+            final long encodedOwningBucketOrd = owningBucketOrd << owningBucketOrdShift;
+            return new BucketOrdsEnum() {
+                private long ord = -1;
+                private long value;
+
+                @Override
+                public boolean next() {
+                    while (true) {
+                        ord++;
+                        if (ord >= ords.size()) {
+                            return false;
+                        }
+                        long encoded = ords.get(ord);
+                        if ((encoded & owningBucketOrdMask) == encodedOwningBucketOrd) {
+                            value = encoded & ~owningBucketOrdMask;
+                            return true;
+                        }
+                    }
+                }
+
+                @Override
+                public long value() {
+                    return value;
+                }
+
+                @Override
+                public long ord() {
+                    return ord;
+                }
+            };
+        }
+
+        @Override
+        public void close() {
+            ords.close();
+        }
+    }
 }

+ 67 - 56
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrdsTests.java

@@ -20,7 +20,6 @@ import java.util.Objects;
 import java.util.Set;
 
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 
 public class LongKeyedBucketOrdsTests extends ESTestCase {
     private final MockBigArrays bigArrays = new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService());
@@ -96,72 +95,84 @@ public class LongKeyedBucketOrdsTests extends ESTestCase {
 
     public void testCollectsFromManyBuckets() {
         try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
-            // Test a few explicit values
-            assertThat(ords.add(0, 0), equalTo(0L));
-            assertThat(ords.add(1, 0), equalTo(1L));
-            assertThat(ords.add(0, 0), equalTo(-1L));
-            assertThat(ords.add(1, 0), equalTo(-2L));
-            assertThat(ords.size(), equalTo(2L));
-            assertThat(ords.find(0, 0), equalTo(0L));
-            assertThat(ords.find(1, 0), equalTo(1L));
+            assertCollectsFromManyBuckets(ords, scaledRandomIntBetween(1, 10000), Long.MIN_VALUE, Long.MAX_VALUE);
+        }
+    }
 
-            // And some random values
-            Set<OwningBucketOrdAndValue> seen = new HashSet<>();
-            seen.add(new OwningBucketOrdAndValue(0, 0));
-            seen.add(new OwningBucketOrdAndValue(1, 0));
-            OwningBucketOrdAndValue[] values = new OwningBucketOrdAndValue[scaledRandomIntBetween(1, 10000)];
-            long maxAllowedOwningBucketOrd = scaledRandomIntBetween(0, values.length);
-            long maxOwningBucketOrd = Long.MIN_VALUE;
-            for (int i = 0; i < values.length; i++) {
-                values[i] = randomValueOtherThanMany(seen::contains, () ->
-                        new OwningBucketOrdAndValue(randomLongBetween(0, maxAllowedOwningBucketOrd), randomLong()));
-                seen.add(values[i]);
-                maxOwningBucketOrd = Math.max(maxOwningBucketOrd, values[i].owningBucketOrd);
-            }
-            for (int i = 0; i < values.length; i++) {
-                assertThat(ords.find(values[i].owningBucketOrd, values[i].value), equalTo(-1L));
-                assertThat(ords.add(values[i].owningBucketOrd, values[i].value), equalTo(i + 2L));
-                assertThat(ords.find(values[i].owningBucketOrd, values[i].value), equalTo(i + 2L));
-                assertThat(ords.size(), equalTo(i + 3L));
-                if (randomBoolean()) {
-                    assertThat(ords.add(0, 0), equalTo(-1L));
-                }
-            }
-            for (int i = 0; i < values.length; i++) {
-                assertThat(ords.add(values[i].owningBucketOrd, values[i].value), equalTo(-1 - (i + 2L)));
+    public void testCollectsFromManyBucketsSmall() {
+        int owningBucketOrds = scaledRandomIntBetween(1, 10000);
+        long maxValue = randomLongBetween(10000 / owningBucketOrds, 2^(16 * 3));
+        CardinalityUpperBound cardinality = CardinalityUpperBound.ONE.multiply(owningBucketOrds);
+        try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.buildForValueRange(bigArrays, cardinality, 0, maxValue)) {
+            assertCollectsFromManyBuckets(ords, owningBucketOrds, 0, maxValue);
+        }
+    }
+
+    private void assertCollectsFromManyBuckets(LongKeyedBucketOrds ords, int maxAllowedOwningBucketOrd, long minValue, long maxValue) {
+        // Test a few explicit values
+        assertThat(ords.add(0, 0), equalTo(0L));
+        assertThat(ords.add(1, 0), equalTo(1L));
+        assertThat(ords.add(0, 0), equalTo(-1L));
+        assertThat(ords.add(1, 0), equalTo(-2L));
+        assertThat(ords.size(), equalTo(2L));
+        assertThat(ords.find(0, 0), equalTo(0L));
+        assertThat(ords.find(1, 0), equalTo(1L));
+
+        // And some random values
+        Set<OwningBucketOrdAndValue> seen = new HashSet<>();
+        seen.add(new OwningBucketOrdAndValue(0, 0));
+        seen.add(new OwningBucketOrdAndValue(1, 0));
+        OwningBucketOrdAndValue[] values = new OwningBucketOrdAndValue[scaledRandomIntBetween(1, 10000)];
+        long maxOwningBucketOrd = Long.MIN_VALUE;
+        for (int i = 0; i < values.length; i++) {
+            values[i] = randomValueOtherThanMany(seen::contains, () ->
+                    new OwningBucketOrdAndValue(randomLongBetween(0, maxAllowedOwningBucketOrd), randomLongBetween(minValue, maxValue)));
+            seen.add(values[i]);
+            maxOwningBucketOrd = Math.max(maxOwningBucketOrd, values[i].owningBucketOrd);
+        }
+        for (int i = 0; i < values.length; i++) {
+            assertThat(ords.find(values[i].owningBucketOrd, values[i].value), equalTo(-1L));
+            assertThat(ords.add(values[i].owningBucketOrd, values[i].value), equalTo(i + 2L));
+            assertThat(ords.find(values[i].owningBucketOrd, values[i].value), equalTo(i + 2L));
+            assertThat(ords.size(), equalTo(i + 3L));
+            if (randomBoolean()) {
+                assertThat(ords.add(0, 0), equalTo(-1L));
             }
+        }
+        for (int i = 0; i < values.length; i++) {
+            assertThat(ords.add(values[i].owningBucketOrd, values[i].value), equalTo(-1 - (i + 2L)));
+        }
 
-            // And the explicit values are still ok
-            assertThat(ords.add(0, 0), equalTo(-1L));
-            assertThat(ords.add(1, 0), equalTo(-2L));
+        // And the explicit values are still ok
+        assertThat(ords.add(0, 0), equalTo(-1L));
+        assertThat(ords.add(1, 0), equalTo(-2L));
 
 
-            for (long owningBucketOrd = 0; owningBucketOrd <= maxAllowedOwningBucketOrd; owningBucketOrd++) {
-                long expectedCount = 0;
-                LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = ords.ordsEnum(owningBucketOrd);
-                if (owningBucketOrd <= 1) {
+        for (long owningBucketOrd = 0; owningBucketOrd <= maxAllowedOwningBucketOrd; owningBucketOrd++) {
+            long expectedCount = 0;
+            LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = ords.ordsEnum(owningBucketOrd);
+            if (owningBucketOrd <= 1) {
+                expectedCount++;
+                assertTrue(ordsEnum.next());
+                assertThat(ordsEnum.ord(), equalTo(owningBucketOrd));
+                assertThat(ordsEnum.value(), equalTo(0L));
+            }
+            for (int i = 0; i < values.length; i++) {
+                if (values[i].owningBucketOrd == owningBucketOrd) {
                     expectedCount++;
                     assertTrue(ordsEnum.next());
-                    assertThat(ordsEnum.ord(), equalTo(owningBucketOrd));
-                    assertThat(ordsEnum.value(), equalTo(0L));
+                    assertThat(ordsEnum.ord(), equalTo(i + 2L));
+                    assertThat(ordsEnum.value(), equalTo(values[i].value));
                 }
-                for (int i = 0; i < values.length; i++) {
-                    if (values[i].owningBucketOrd == owningBucketOrd) {
-                        expectedCount++;
-                        assertTrue(ordsEnum.next());
-                        assertThat(ordsEnum.ord(), equalTo(i + 2L));
-                        assertThat(ordsEnum.value(), equalTo(values[i].value));
-                    }
-                }
-                assertFalse(ordsEnum.next());
-
-                assertThat(ords.bucketsInOrd(owningBucketOrd), equalTo(expectedCount));
             }
-            assertFalse(ords.ordsEnum(randomLongBetween(maxOwningBucketOrd + 1, Long.MAX_VALUE)).next());
-            assertThat(ords.bucketsInOrd(randomLongBetween(maxOwningBucketOrd + 1, Long.MAX_VALUE)), equalTo(0L));
+            assertFalse(ordsEnum.next());
 
-            assertThat(ords.maxOwningBucketOrd(), greaterThanOrEqualTo(maxOwningBucketOrd));
+            assertThat(ords.bucketsInOrd(owningBucketOrd), equalTo(expectedCount));
         }
+        assertFalse(ords.ordsEnum(randomLongBetween(maxOwningBucketOrd + 1, Long.MAX_VALUE)).next());
+        assertThat(ords.bucketsInOrd(randomLongBetween(maxOwningBucketOrd + 1, Long.MAX_VALUE)), equalTo(0L));
+
+        assertThat(ords.maxOwningBucketOrd(), equalTo(maxOwningBucketOrd));
     }
 
     private class OwningBucketOrdAndValue {

+ 72 - 3
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

@@ -12,6 +12,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.InetAddressPoint;
 import org.apache.lucene.document.LatLonDocValuesField;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -28,6 +29,7 @@ import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
+import org.elasticsearch.common.CheckedConsumer;
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.network.InetAddresses;
@@ -35,10 +37,12 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.MockBigArrays;
 import org.elasticsearch.common.util.MockPageCacheRecycler;
+import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
 import org.elasticsearch.index.mapper.GeoPointFieldMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.IpFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NestedPathFieldMapper;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
@@ -72,6 +76,9 @@ import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuil
 import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
 import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
+import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
+import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
 import org.elasticsearch.search.aggregations.bucket.nested.InternalNested;
 import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregatorTests;
@@ -112,6 +119,7 @@ import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.b
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasEntry;
 import static org.hamcrest.Matchers.instanceOf;
 
 public class TermsAggregatorTests extends AggregatorTestCase {
@@ -192,7 +200,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.collectMode, equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
-        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
+        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap using single bucket ords"));
 
         aggregationBuilder
             .collectMode(Aggregator.SubAggCollectionMode.DEPTH_FIRST);
@@ -200,7 +208,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.collectMode, equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
-        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
+        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap using single bucket ords"));
 
         aggregationBuilder
             .collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
@@ -215,7 +223,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
-        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
+        assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap using single bucket ords"));
 
         indexReader.close();
         directory.close();
@@ -1603,6 +1611,67 @@ public class TermsAggregatorTests extends AggregatorTestCase {
         }
     }
 
+    public void testAsSubAgg() throws IOException {
+        DateFieldType dft = new DateFieldType("d");
+        KeywordFieldType kft = new KeywordFieldType("k", false, true, null);
+        AggregationBuilder builder = new DateHistogramAggregationBuilder("dh").field("d")
+            .calendarInterval(DateHistogramInterval.YEAR)
+            .subAggregation(new TermsAggregationBuilder("k").field("k"));
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            iw.addDocument(
+                List.of(
+                    new SortedNumericDocValuesField("d", dft.parse("2020-02-01T00:00:00Z")),
+                    new LongPoint("d", dft.parse("2020-02-01T00:00:00Z")),
+                    new SortedSetDocValuesField("k", new BytesRef("a"))
+                )
+            );
+            iw.addDocument(
+                List.of(
+                    new SortedNumericDocValuesField("d", dft.parse("2020-03-01T00:00:00Z")),
+                    new LongPoint("d", dft.parse("2020-03-01T00:00:00Z")),
+                    new SortedSetDocValuesField("k", new BytesRef("a"))
+                )
+            );
+            iw.addDocument(
+                List.of(
+                    new SortedNumericDocValuesField("d", dft.parse("2021-02-01T00:00:00Z")),
+                    new LongPoint("d", dft.parse("2021-02-01T00:00:00Z")),
+                    new SortedSetDocValuesField("k", new BytesRef("a"))
+                )
+            );
+            iw.addDocument(
+                List.of(
+                    new SortedNumericDocValuesField("d", dft.parse("2021-03-01T00:00:00Z")),
+                    new LongPoint("d", dft.parse("2021-03-01T00:00:00Z")),
+                    new SortedSetDocValuesField("k", new BytesRef("a"))
+                )
+            );
+            iw.addDocument(
+                List.of(
+                    new SortedNumericDocValuesField("d", dft.parse("2020-02-01T00:00:00Z")),
+                    new LongPoint("d", dft.parse("2020-02-01T00:00:00Z")),
+                    new SortedSetDocValuesField("k", new BytesRef("b"))
+                )
+            );
+        };
+        testCase(builder, new MatchAllDocsQuery(), buildIndex, (InternalDateHistogram dh) -> {
+            assertThat(
+                dh.getBuckets().stream().map(InternalDateHistogram.Bucket::getKeyAsString).collect(toList()),
+                equalTo(List.of("2020-01-01T00:00:00.000Z", "2021-01-01T00:00:00.000Z"))
+            );
+            StringTerms terms = dh.getBuckets().get(0).getAggregations().get("k");
+            assertThat(terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), equalTo(List.of("a", "b")));
+            terms = dh.getBuckets().get(1).getAggregations().get("k");
+            assertThat(terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), equalTo(List.of("a")));
+        }, dft, kft);
+        withAggregator(builder, new MatchAllDocsQuery(), buildIndex, (searcher, aggregator) -> {
+            TermsAggregator terms = (TermsAggregator) aggregator.subAggregator("k");
+            Map<String, Object> info = new HashMap<>();
+            terms.collectDebugInfo(info::put);
+            assertThat(info, hasEntry("collection_strategy", "remap using many bucket ords packed using [2/62] bits"));
+        }, dft, kft);
+    }
+
     private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
     private List<Document> generateDocsWithNested(String id, int value, int[] nestedValues) {
         List<Document> documents = new ArrayList<>();