Browse Source

Fail variable_width_histogram that collects from many (#58619)

Adds an explicit check to `variable_width_histogram` to stop it from
trying to collect from many buckets because it can't. I tried to make it
do so but that is more than an afternoon's project, sadly. So for now we
just disallow it.

Relates to #42035
Nik Everett 5 years ago
parent
commit
32bdf8549b

+ 2 - 0
docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc

@@ -57,6 +57,8 @@ Response:
 --------------------------------------------------
 // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
 
+IMPORTANT: This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket.
+
 ==== Clustering Algorithm
 Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents
 are sorted and linearly separated into `3/4 * shard_size buckets`.

+ 7 - 0
server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorFactory.java

@@ -65,6 +65,13 @@ public class VariableWidthHistogramAggregatorFactory extends ValuesSourceAggrega
                                           Aggregator parent,
                                           boolean collectsFromSingleBucket,
                                           Map<String, Object> metadata) throws IOException{
+        if (collectsFromSingleBucket == false) {
+            throw new IllegalArgumentException(
+                "["
+                    + VariableWidthHistogramAggregationBuilder.NAME
+                    + "] cannot be nested inside an aggregation that collects more than a single bucket."
+            );
+        }
         AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config,
             VariableWidthHistogramAggregationBuilder.NAME);
         if (aggregatorSupplier instanceof VariableWidthHistogramAggregatorSupplier == false) {

+ 45 - 1
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorTests.java

@@ -31,13 +31,16 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.CheckedConsumer;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregationBuilders;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms;
+import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
 import org.elasticsearch.search.aggregations.metrics.InternalStats;
 import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder;
@@ -51,12 +54,15 @@ import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
 
+import static java.util.stream.Collectors.toList;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+
 public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase {
 
     private static final String NUMERIC_FIELD = "numeric";
 
     private static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
-    private VariableWidthHistogramAggregationBuilder aggregationBuilder;
 
     public void testNoDocs() throws Exception{
         final List<Number> dataset = Arrays.asList();
@@ -424,6 +430,44 @@ public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase {
 
     }
 
+    public void testAsSubAggregation() throws IOException {
+        AggregationBuilder builder = new TermsAggregationBuilder("t").field("t")
+            .subAggregation(new VariableWidthHistogramAggregationBuilder("v").field("v").setNumBuckets(2));
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
+            iw.addDocument(List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 1)));
+            iw.addDocument(List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 10)));
+            iw.addDocument(List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 11)));
+
+            iw.addDocument(List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 20)));
+            iw.addDocument(List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 30)));
+        };
+        Consumer<LongTerms> verify = terms -> {
+            /*
+             * This is what the result should be but it never gets called because of
+             * the explicit check. We do expect to remove the check in the future,
+             * thus, this stays.
+             */
+            LongTerms.Bucket t1 = terms.getBucketByKey("1");
+            InternalVariableWidthHistogram v1 = t1.getAggregations().get("v");
+            assertThat(
+                v1.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
+                equalTo(List.of(1.0, 10.5))
+            );
+
+            LongTerms.Bucket t2 = terms.getBucketByKey("1");
+            InternalVariableWidthHistogram v2 = t2.getAggregations().get("v");
+            assertThat(
+                v2.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
+                equalTo(List.of(20.0, 30))
+            );
+        };
+        Exception e = expectThrows(
+            IllegalArgumentException.class,
+            () -> testCase(builder, DEFAULT_QUERY, buildIndex, verify, longField("t"), longField("v"))
+        );
+        assertThat(e.getMessage(), containsString("cannot be nested"));
+    }
+
 
     private void testSearchCase(final Query query, final List<Number> dataset, boolean multipleSegments,
                                 final Consumer<VariableWidthHistogramAggregationBuilder> configure,