Browse Source

Speedup time_series agg by caching current tsid ordinal, parent bucket ordinal and buck ordinal (#91784)

This avoids needlessly adding the same parent bucket ordinal or TSIDs to `BytesKeyedBucketOrds`.

Relates to #74660
Martijn van Groningen 2 years ago
parent
commit
da119b0d4d

+ 19 - 0
modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java

@@ -92,8 +92,23 @@ public class TimeSeriesAggregator extends BucketsAggregator {
     protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException {
         return new LeafBucketCollectorBase(sub, null) {
 
+            // Keeping track of these fields helps to reduce time spent attempting to add bucket + tsid combos that already were added.
+            long currentTsidOrd = -1;
+            long currentBucket = -1;
+            long currentBucketOrdinal;
+
             @Override
             public void collect(int doc, long bucket) throws IOException {
+                // Naively comparing bucket against currentBucket and tsid ord to currentBucket can work really well.
+                // TimeSeriesIndexSearcher ensures that docs are emitted in tsid and timestamp order, so if tsid ordinal
+                // changes to what is stored in currentTsidOrd then that ordinal well never occur again. Same applies
+                // currentBucket if there is no parent aggregation or the immediate parent aggregation creates buckets
+                // based on @timestamp field or dimension fields (fields that make up the tsid).
+                if (currentBucket == bucket && currentTsidOrd == aggCtx.getTsidOrd()) {
+                    collectExistingBucket(sub, doc, currentBucketOrdinal);
+                    return;
+                }
+
                 long bucketOrdinal = bucketOrds.add(bucket, aggCtx.getTsid());
                 if (bucketOrdinal < 0) { // already seen
                     bucketOrdinal = -1 - bucketOrdinal;
@@ -101,6 +116,10 @@ public class TimeSeriesAggregator extends BucketsAggregator {
                 } else {
                     collectBucket(sub, doc, bucketOrdinal);
                 }
+
+                currentBucketOrdinal = bucketOrdinal;
+                currentTsidOrd = aggCtx.getTsidOrd();
+                currentBucket = bucket;
             }
         };
     }