Kaynağa Gözat

Add histogram field type support to boxplot aggs (#52265)

Add support for the histogram field type to boxplot aggs.

Closes #52233
Relates to #33112
Igor Motov 5 yıl önce
ebeveyn
işleme
0898df4aac

+ 1 - 0
docs/reference/aggregations/metrics.asciidoc

@@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[]
 
 include::metrics/median-absolute-deviation-aggregation.asciidoc[]
 
+include::metrics/boxplot-aggregation.asciidoc[]
 
 
 

+ 2 - 1
docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc

@@ -4,7 +4,8 @@
 === Boxplot Aggregation
 
 A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
-These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
+These values can be generated by a provided script or extracted from specific numeric or
+<<histogram,histogram fields>> in the documents.
 
 The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
 median, first quartile (25th percentile)  and third quartile (75th percentile) values.

+ 2 - 2
docs/reference/aggregations/metrics/percentile-aggregation.asciidoc

@@ -285,7 +285,7 @@ GET latency/_search
 
 <1> Compression controls memory usage and approximation error
 
-// tag::[t-digest]
+// tag::t-digest[]
 The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
 more nodes available, the higher the accuracy (and large memory footprint) proportional
 to the volume of data.  The `compression` parameter limits the maximum number of
@@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
 of data which arrives sorted and in-order) the default settings will produce a
 TDigest roughly 64KB in size.  In practice data tends to be more random and
 the TDigest will use less memory.
-// tag::[t-digest]
+// end::t-digest[]
 
 ==== HDR Histogram
 

+ 1 - 0
docs/reference/mapping/types/histogram.asciidoc

@@ -37,6 +37,7 @@ following aggregations and queries:
 
 * <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
 * <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
+* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
 * <<query-dsl-exists-query,exists>> query
 
 [[mapping-types-histogram-building-histogram]]

+ 3 - 3
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java

@@ -29,7 +29,7 @@ import java.util.Objects;
 
 import static org.elasticsearch.search.aggregations.metrics.PercentilesMethod.COMPRESSION_FIELD;
 
-public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
+public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
     BoxplotAggregationBuilder> {
     public static final String NAME = "boxplot";
 
@@ -37,7 +37,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
 
     static {
         PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
-        ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
+        ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
         PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
     }
 
@@ -98,7 +98,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
 
     @Override
     protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
-                                                  ValuesSourceConfig<ValuesSource.Numeric> config,
+                                                  ValuesSourceConfig<ValuesSource> config,
                                                   AggregatorFactory parent,
                                                   AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
         return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);

+ 31 - 14
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java

@@ -11,6 +11,8 @@ import org.apache.lucene.search.ScoreMode;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.ObjectArray;
+import org.elasticsearch.index.fielddata.HistogramValue;
+import org.elasticsearch.index.fielddata.HistogramValues;
 import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.aggregations.Aggregator;
@@ -29,12 +31,12 @@ import java.util.Map;
 
 public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
 
-    private final ValuesSource.Numeric valuesSource;
+    private final ValuesSource valuesSource;
     private final DocValueFormat format;
     protected ObjectArray<TDigestState> states;
     protected final double compression;
 
-    BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
+    BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
                       SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
                       Map<String, Object> metaData) throws IOException {
         super(name, context, parent, pipelineAggregators, metaData);
@@ -58,23 +60,38 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
             return LeafBucketCollector.NO_OP_COLLECTOR;
         }
         final BigArrays bigArrays = context.bigArrays();
-        final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
-        return new LeafBucketCollectorBase(sub, values) {
-            @Override
-            public void collect(int doc, long bucket) throws IOException {
-                states = bigArrays.grow(states, bucket + 1);
-
-                if (values.advanceExact(doc)) {
+        if (valuesSource instanceof ValuesSource.Histogram) {
+            final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
+            return new LeafBucketCollectorBase(sub, values) {
+                @Override
+                public void collect(int doc, long bucket) throws IOException {
                     TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
                     if (values.advanceExact(doc)) {
-                        final int valueCount = values.docValueCount();
-                        for (int i = 0; i < valueCount; i++) {
-                            state.add(values.nextValue());
+                        final HistogramValue sketch = values.histogram();
+                        while(sketch.next()) {
+                            state.add(sketch.value(), sketch.count());
                         }
                     }
                 }
-            }
-        };
+            };
+        } else {
+            final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
+            return new LeafBucketCollectorBase(sub, values) {
+                @Override
+                public void collect(int doc, long bucket) throws IOException {
+                    states = bigArrays.grow(states, bucket + 1);
+                    if (values.advanceExact(doc)) {
+                        TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
+                        if (values.advanceExact(doc)) {
+                            final int valueCount = values.docValueCount();
+                            for (int i = 0; i < valueCount; i++) {
+                                state.add(values.nextValue());
+                            }
+                        }
+                    }
+                }
+            };
+        }
     }
 
     private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {

+ 3 - 3
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java

@@ -20,12 +20,12 @@ import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 
-public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
+public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
 
     private final double compression;
 
     BoxplotAggregatorFactory(String name,
-                             ValuesSourceConfig<ValuesSource.Numeric> config,
+                             ValuesSourceConfig<ValuesSource> config,
                              double compression,
                              QueryShardContext queryShardContext,
                              AggregatorFactory parent,
@@ -46,7 +46,7 @@ public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<Valu
     }
 
     @Override
-    protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
+    protected Aggregator doCreateInternal(ValuesSource valuesSource,
                                           SearchContext searchContext,
                                           Aggregator parent,
                                           boolean collectsFromSingleBucket,

+ 33 - 4
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java

@@ -27,6 +27,8 @@ import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
 import org.elasticsearch.search.aggregations.metrics.TDigestState;
 import org.elasticsearch.test.ESSingleNodeTestCase;
 import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
+import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
+import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
 import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
 
 import java.util.ArrayList;
@@ -131,8 +133,7 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
         }
     }
 
-    public void testTDigestHistogram() throws Exception {
-
+    private void setupTDigestHistogram(int compression) throws Exception {
         XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
             .startObject()
               .startObject("_doc")
@@ -170,8 +171,6 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
         PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2);
         client().admin().indices().putMapping(request2).actionGet();
 
-
-        int compression = TestUtil.nextInt(random(), 200, 300);
         TDigestState histogram = new TDigestState(compression);
         BulkRequest bulkRequest = new BulkRequest();
 
@@ -218,6 +217,11 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
 
         response = client().prepareSearch("pre_agg").get();
         assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
+    }
+
+    public void testTDigestHistogram() throws Exception {
+        int compression = TestUtil.nextInt(random(), 200, 300);
+        setupTDigestHistogram(compression);
 
         PercentilesAggregationBuilder builder =
             AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
@@ -236,6 +240,31 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
         }
     }
 
+    public void testBoxplotHistogram() throws Exception {
+        int compression = TestUtil.nextInt(random(), 200, 300);
+        setupTDigestHistogram(compression);
+        BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);
+
+        SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
+        SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
+        SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();
+
+        Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
+        Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
+        Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
+        assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
+        assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
+        assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
+        assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);
+
+        assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
+        assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
+        assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
+        assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
+        assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
+        assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
+    }
+
     @Override
     protected Collection<Class<? extends Plugin>> getPlugins() {
         List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());