Преглед на файлове

Prevent `date_histogram` from OOMing (#72081)

This prevents the `date_histogram` from running out of memory allocating
empty buckets when you set the interval to something tiny like `seconds`
and aggregate over a very wide date range. Without this change we'd
allocate memory very quickly and throw and out of memory error, taking
down the node. With it we instead throw the standard "too many buckets"
error.

Relates to #71758
Nik Everett преди 4 години
родител
ревизия
5f281ceedd

+ 32 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/10_histogram.yml

@@ -660,3 +660,35 @@ setup:
               histogram:
                 field: number
                 interval: 5e-10
+
+---
+"Tiny tiny tiny date_range":
+  - skip:
+      version: " - 7.99.99"
+      reason:  fixed in 8.0 and being backported to 7.13.0
+
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body:
+            - '{"index": {}}'
+            - '{"date": "2018-01-01T00:00:00Z"}'
+            - '{"index": {}}'
+            - '{"date": "2019-01-01T00:00:00Z"}'
+            - '{"index": {}}'
+            - '{"date": "2020-01-01T00:00:00Z"}'
+            - '{"index": {}}'
+            - '{"date": "2021-01-01T00:00:00Z"}'
+
+  - do:
+      catch: '/Trying to create too many buckets. Must be less than or equal to: \[65536\]/'
+      search:
+        index: test_1
+        body:
+          size: 0
+          aggs:
+            histo:
+              date_histogram:
+                field: date
+                interval: second

+ 52 - 10
server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java

@@ -33,6 +33,7 @@ import java.util.List;
 import java.util.ListIterator;
 import java.util.Map;
 import java.util.Objects;
+import java.util.function.LongConsumer;
 
 /**
  * Implementation of {@link Histogram}.
@@ -289,7 +290,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
         for (InternalAggregation aggregation : aggregations) {
             InternalDateHistogram histogram = (InternalDateHistogram) aggregation;
             if (histogram.buckets.isEmpty() == false) {
-                pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
+                pq.add(new IteratorAndCurrent<Bucket>(histogram.buckets.iterator()));
             }
         }
 
@@ -351,14 +352,50 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
         return createBucket(buckets.get(0).key, docCount, aggs);
     }
 
+    /**
+     * When we pre-count the empty buckets we report them periodically
+     * because you can configure the date_histogram to create an astounding
+     * number of buckets. It'd take a while to count that high only to abort.
+     * So we report every couple thousand buckets. It's be simpler to report
+     * every single bucket we plan to allocate one at a time but that'd cause
+     * needless overhead on the circuit breakers. Counting a couple thousand
+     * buckets is plenty fast to fail this quickly in pathological cases and
+     * plenty large to keep the overhead minimal.
+     */
+    private static final int REPORT_EMPTY_EVERY = 10_000;
+
     private void addEmptyBuckets(List<Bucket> list, ReduceContext reduceContext) {
-        Bucket lastBucket = null;
-        LongBounds bounds = emptyBucketInfo.bounds;
+        /*
+         * Make sure we have space for the empty buckets we're going to add by
+         * counting all of the empties we plan to add and firing them into
+         * consumeBucketsAndMaybeBreak.
+         */
+        class Counter implements LongConsumer {
+            private int size = list.size();
+
+            @Override
+            public void accept(long key) {
+                size++;
+                if (size >= REPORT_EMPTY_EVERY) {
+                    reduceContext.consumeBucketsAndMaybeBreak(size);
+                    size = 0;
+                }
+            }
+        }
+        Counter counter = new Counter();
+        iterateEmptyBuckets(list, list.listIterator(), counter);
+        reduceContext.consumeBucketsAndMaybeBreak(counter.size);
+
+        InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(List.of(emptyBucketInfo.subAggregations), reduceContext);
         ListIterator<Bucket> iter = list.listIterator();
+        iterateEmptyBuckets(list, iter, key -> iter.add(new InternalDateHistogram.Bucket(key, 0, keyed, format, reducedEmptySubAggs)));
+    }
+
+    private void iterateEmptyBuckets(List<Bucket> list, ListIterator<Bucket> iter, LongConsumer onBucket) {
+        LongBounds bounds = emptyBucketInfo.bounds;
 
         // first adding all the empty buckets *before* the actual data (based on the extended_bounds.min the user requested)
-        InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(Collections.singletonList(emptyBucketInfo.subAggregations),
-                reduceContext);
+
         if (bounds != null) {
             Bucket firstBucket = iter.hasNext() ? list.get(iter.nextIndex()) : null;
             if (firstBucket == null) {
@@ -366,7 +403,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
                     long key = bounds.getMin() + offset;
                     long max = bounds.getMax() + offset;
                     while (key <= max) {
-                        iter.add(new InternalDateHistogram.Bucket(key, 0, keyed, format, reducedEmptySubAggs));
+                        onBucket.accept(key);
                         key = nextKey(key).longValue();
                     }
                 }
@@ -375,7 +412,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
                     long key = bounds.getMin() + offset;
                     if (key < firstBucket.key) {
                         while (key < firstBucket.key) {
-                            iter.add(new InternalDateHistogram.Bucket(key, 0, keyed, format, reducedEmptySubAggs));
+                            onBucket.accept(key);
                             key = nextKey(key).longValue();
                         }
                     }
@@ -383,6 +420,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
             }
         }
 
+        Bucket lastBucket = null;
         // now adding the empty buckets within the actual data,
         // e.g. if the data series is [1,2,3,7] there're 3 empty buckets that will be created for 4,5,6
         while (iter.hasNext()) {
@@ -390,7 +428,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
             if (lastBucket != null) {
                 long key = nextKey(lastBucket.key).longValue();
                 while (key < nextBucket.key) {
-                    iter.add(new InternalDateHistogram.Bucket(key, 0, keyed, format, reducedEmptySubAggs));
+                    onBucket.accept(key);
                     key = nextKey(key).longValue();
                 }
                 assert key == nextBucket.key : "key: " + key + ", nextBucket.key: " + nextBucket.key;
@@ -403,7 +441,7 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
             long key = nextKey(lastBucket.key).longValue();
             long max = bounds.getMax() + offset;
             while (key <= max) {
-                iter.add(new InternalDateHistogram.Bucket(key, 0, keyed, format, reducedEmptySubAggs));
+                onBucket.accept(key);
                 key = nextKey(key).longValue();
             }
         }
@@ -412,9 +450,11 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
     @Override
     public InternalAggregation reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
         List<Bucket> reducedBuckets = reduceBuckets(aggregations, reduceContext);
+        boolean alreadyAccountedForBuckets = false;
         if (reduceContext.isFinalReduce()) {
             if (minDocCount == 0) {
                 addEmptyBuckets(reducedBuckets, reduceContext);
+                alreadyAccountedForBuckets = true;
             }
             if (InternalOrder.isKeyDesc(order)) {
                 // we just need to reverse here...
@@ -428,7 +468,9 @@ public final class InternalDateHistogram extends InternalMultiBucketAggregation<
                 CollectionUtil.introSort(reducedBuckets, order.comparator());
             }
         }
-        reduceContext.consumeBucketsAndMaybeBreak(reducedBuckets.size());
+        if (false == alreadyAccountedForBuckets) {
+            reduceContext.consumeBucketsAndMaybeBreak(reducedBuckets.size());
+        }
         return new InternalDateHistogram(getName(), reducedBuckets, order, minDocCount, offset, emptyBucketInfo,
                 format, keyed, getMetadata());
     }

+ 13 - 12
server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java

@@ -347,30 +347,31 @@ public final class InternalHistogram extends InternalMultiBucketAggregation<Inte
         return Math.floor((key - emptyBucketInfo.offset) / emptyBucketInfo.interval) * emptyBucketInfo.interval + emptyBucketInfo.offset;
     }
 
+    /**
+     * When we pre-count the empty buckets we report them periodically
+     * because you can configure the histogram to create more buckets than
+     * there are atoms in the universe. It'd take a while to count that high
+     * only to abort. So we report every couple thousand buckets. It's be
+     * simpler to report every single bucket we plan to allocate one at a time
+     * but that'd cause needless overhead on the circuit breakers. Counting a
+     * couple thousand buckets is plenty fast to fail this quickly in
+     * pathological cases and plenty large to keep the overhead minimal.
+     */
+    private static final int REPORT_EMPTY_EVERY = 10_000;
+
     private void addEmptyBuckets(List<Bucket> list, ReduceContext reduceContext) {
         /*
          * Make sure we have space for the empty buckets we're going to add by
          * counting all of the empties we plan to add and firing them into
          * consumeBucketsAndMaybeBreak.
-         *
-         * We don't count all of the buckets we plan to allocate and then report
-         * them all at once because we you can configure the histogram to create
-         * more buckets than there are hydrogen atoms in the universe. It'd take
-         * a while to count that high only to abort. So we report every couple
-         * thousand buckets. It's be simpler to report every single bucket we plan
-         * to allocate one at a time but that'd cause needless overhead on the
-         * circuit breakers. Counting a couple thousand buckets is plenty fast
-         * to fail this quickly in pathological cases and plenty large to keep
-         * the overhead minimal.
          */
-        int reportEmptyEvery = 10000;
         class Counter implements DoubleConsumer {
             private int size = list.size();
 
             @Override
             public void accept(double key) {
                 size++;
-                if (size >= reportEmptyEvery) {
+                if (size >= REPORT_EMPTY_EVERY) {
                     reduceContext.consumeBucketsAndMaybeBreak(size);
                     size = 0;
                 }

+ 26 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogramTests.java

@@ -9,7 +9,9 @@
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
 import org.elasticsearch.common.Rounding;
+import org.elasticsearch.common.Rounding.DateTimeUnit;
 import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.aggregations.BucketOrder;
 import org.elasticsearch.search.aggregations.InternalAggregations;
@@ -180,4 +182,28 @@ public class InternalDateHistogramTests extends InternalMultiBucketAggregationTe
         }
         return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, format, keyed, metadata);
     }
+
+    public void testLargeReduce() {
+        expectReduceUsesTooManyBuckets(
+            new InternalDateHistogram(
+                "h",
+                List.of(),
+                BucketOrder.key(true),
+                0,
+                0,
+                new InternalDateHistogram.EmptyBucketInfo(
+                    Rounding.builder(DateTimeUnit.SECOND_OF_MINUTE).build(),
+                    InternalAggregations.EMPTY,
+                    new LongBounds(
+                        DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2018-01-01T00:00:00Z"),
+                        DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2021-01-01T00:00:00Z")
+                    )
+                ),
+                DocValueFormat.RAW,
+                false,
+                null
+            ),
+            100000
+        );
+    }
 }

+ 2 - 26
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogramTests.java

@@ -9,12 +9,9 @@
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
 import org.apache.lucene.util.TestUtil;
-import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.InternalAggregation;
 import org.elasticsearch.search.aggregations.InternalAggregations;
-import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
 import org.elasticsearch.test.InternalAggregationTestCase;
 import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
 
@@ -24,9 +21,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
-import java.util.function.IntConsumer;
-
-import static org.hamcrest.Matchers.equalTo;
 
 public class InternalHistogramTests extends InternalMultiBucketAggregationTestCase<InternalHistogram> {
 
@@ -103,7 +97,7 @@ public class InternalHistogramTests extends InternalMultiBucketAggregationTestCa
     }
 
     public void testLargeReduce() {
-        InternalHistogram histo = new InternalHistogram(
+        expectReduceUsesTooManyBuckets(new InternalHistogram(
             "h",
             List.of(),
             BucketOrder.key(true),
@@ -112,25 +106,7 @@ public class InternalHistogramTests extends InternalMultiBucketAggregationTestCa
             DocValueFormat.RAW,
             false,
             null
-        );
-        InternalAggregation.ReduceContext reduceContext = InternalAggregation.ReduceContext.forFinalReduction(
-            BigArrays.NON_RECYCLING_INSTANCE,
-            null,
-            new IntConsumer() {
-                int buckets;
-
-                @Override
-                public void accept(int value) {
-                    buckets += value;
-                    if (buckets > 100000) {
-                        throw new IllegalArgumentException("too big!");
-                    }
-                }
-            },
-            PipelineTree.EMPTY
-        );
-        Exception e = expectThrows(IllegalArgumentException.class, () -> histo.reduce(List.of(histo), reduceContext));
-        assertThat(e.getMessage(), equalTo("too big!"));
+        ), 100000);
     }
 
     @Override

+ 28 - 0
test/framework/src/main/java/org/elasticsearch/test/InternalMultiBucketAggregationTestCase.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.test;
 
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.Aggregations;
 import org.elasticsearch.search.aggregations.InternalAggregation;
@@ -17,15 +18,18 @@ import org.elasticsearch.search.aggregations.MultiBucketConsumerService;
 import org.elasticsearch.search.aggregations.ParsedAggregation;
 import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
 import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.function.IntConsumer;
 import java.util.function.Supplier;
 
 import static java.util.Collections.emptyMap;
+import static org.hamcrest.Matchers.equalTo;
 
 public abstract class InternalMultiBucketAggregationTestCase<T extends InternalAggregation & MultiBucketsAggregation>
         extends InternalAggregationTestCase<T> {
@@ -185,4 +189,28 @@ public abstract class InternalMultiBucketAggregationTestCase<T extends InternalA
          * No-op.
          */
     }
+
+    /**
+     * Build a reuce 
+     */
+    protected static void expectReduceUsesTooManyBuckets(InternalAggregation agg, int bucketLimit) {
+        InternalAggregation.ReduceContext reduceContext = InternalAggregation.ReduceContext.forFinalReduction(
+            BigArrays.NON_RECYCLING_INSTANCE,
+            null,
+            new IntConsumer() {
+                int buckets;
+
+                @Override
+                public void accept(int value) {
+                    buckets += value;
+                    if (buckets > bucketLimit) {
+                        throw new IllegalArgumentException("too big!");
+                    }
+                }
+            },
+            PipelineTree.EMPTY
+        );
+        Exception e = expectThrows(IllegalArgumentException.class, () -> agg.reduce(List.of(agg), reduceContext));
+        assertThat(e.getMessage(), equalTo("too big!"));
+    }
 }