소스 검색

Add Boxplot Aggregation (#51948)

Adds a `boxplot` aggregation that calculates min, max, medium and the first
and the third quartiles of the given data set.

Closes #33112
Igor Motov 5 년 전
부모
커밋
c50cfa0668

+ 185 - 0
docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc

@@ -0,0 +1,185 @@
+[role="xpack"]
+[testenv="basic"]
+[[search-aggregations-metrics-boxplot-aggregation]]
+=== Boxplot Aggregation
+
+A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
+These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
+
+The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
+median, first quartile (25th percentile)  and third quartile (75th percentile) values.
+
+==== Syntax
+
+A `boxplot` aggregation looks like this in isolation:
+
+[source,js]
+--------------------------------------------------
+{
+    "boxplot": {
+        "field": "load_time"
+    }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+Let's look at a boxplot representing load time:
+
+[source,console]
+--------------------------------------------------
+GET latency/_search
+{
+    "size": 0,
+    "aggs" : {
+        "load_time_boxplot" : {
+            "boxplot" : {
+                "field" : "load_time" <1>
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:latency]
+<1> The field `load_time` must be a numeric field
+
+The response will look like this:
+
+[source,console-result]
+--------------------------------------------------
+{
+    ...
+
+   "aggregations": {
+      "load_time_boxplot": {
+         "min": 0.0,
+         "max": 990.0,
+         "q1": 165.0,
+         "q2": 445.0,
+         "q3": 725.0
+      }
+   }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
+
+==== Script
+
+The boxplot metric supports scripting.  For example, if our load times
+are in milliseconds but we want values calculated in seconds, we could use
+a script to convert them on-the-fly:
+
+[source,console]
+--------------------------------------------------
+GET latency/_search
+{
+    "size": 0,
+    "aggs" : {
+        "load_time_boxplot" : {
+            "boxplot" : {
+                "script" : {
+                    "lang": "painless",
+                    "source": "doc['load_time'].value / params.timeUnit", <1>
+                    "params" : {
+                        "timeUnit" : 1000   <2>
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:latency]
+
+<1> The `field` parameter is replaced with a `script` parameter, which uses the
+script to generate values which percentiles are calculated on
+<2> Scripting supports parameterized input just like any other script
+
+This will interpret the `script` parameter as an `inline` script with the `painless` script language and no script parameters. To use a
+stored script use the following syntax:
+
+[source,console]
+--------------------------------------------------
+GET latency/_search
+{
+    "size": 0,
+    "aggs" : {
+        "load_time_boxplot" : {
+            "boxplot" : {
+                "script" : {
+                    "id": "my_script",
+                    "params": {
+                        "field": "load_time"
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:latency,stored_example_script]
+
+[[search-aggregations-metrics-boxplot-aggregation-approximation]]
+==== Boxplot values are (usually) approximate
+
+The algorithm used by the `boxplot` metric is called TDigest (introduced by
+Ted Dunning in
+https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf[Computing Accurate Quantiles using T-Digests]).
+
+[WARNING]
+====
+Boxplot as other percentile aggregations are also
+https://en.wikipedia.org/wiki/Nondeterministic_algorithm[non-deterministic].
+This means you can get slightly different results using the same data.
+====
+
+[[search-aggregations-metrics-boxplot-aggregation-compression]]
+==== Compression
+
+Approximate algorithms must balance memory utilization with estimation accuracy.
+This balance can be controlled using a `compression` parameter:
+
+[source,console]
+--------------------------------------------------
+GET latency/_search
+{
+    "size": 0,
+    "aggs" : {
+        "load_time_boxplot" : {
+            "boxplot" : {
+                "field" : "load_time",
+                "compression" : 200 <1>
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:latency]
+
+<1> Compression controls memory usage and approximation error
+
+include::percentile-aggregation.asciidoc[tags=t-digest]
+
+==== Missing value
+
+The `missing` parameter defines how documents that are missing a value should be treated.
+By default they will be ignored but it is also possible to treat them as if they
+had a value.
+
+[source,console]
+--------------------------------------------------
+GET latency/_search
+{
+    "size": 0,
+    "aggs" : {
+        "grade_boxplot" : {
+            "boxplot" : {
+                "field" : "grade",
+                "missing": 10 <1>
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:latency]
+
+<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`.

+ 2 - 0
docs/reference/aggregations/metrics/percentile-aggregation.asciidoc

@@ -285,6 +285,7 @@ GET latency/_search
 
 <1> Compression controls memory usage and approximation error
 
+// tag::[t-digest]
 The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
 more nodes available, the higher the accuracy (and large memory footprint) proportional
 to the volume of data.  The `compression` parameter limits the maximum number of
@@ -300,6 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
 of data which arrives sorted and in-order) the default settings will produce a
 TDigest roughly 64KB in size.  In practice data tends to be more random and
 the TDigest will use less memory.
+// tag::[t-digest]
 
 ==== HDR Histogram
 

+ 1 - 1
server/src/test/java/org/elasticsearch/search/aggregations/metrics/MinAggregatorTests.java

@@ -378,7 +378,7 @@ public class MinAggregatorTests extends AggregatorTestCase {
             iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
             iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
         }, (Consumer<InternalGlobal>) global -> {
-            assertEquals(1.0, global.getDocCount(), 2);
+            assertEquals(2, global.getDocCount());
             assertTrue(AggregationInspectionHelper.hasValue(global));
             assertNotNull(global.getAggregations().asMap().get("min"));
 

+ 11 - 2
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java

@@ -7,12 +7,15 @@ package org.elasticsearch.xpack.analytics;
 
 import org.elasticsearch.action.ActionRequest;
 import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.common.xcontent.ContextParser;
 import org.elasticsearch.index.mapper.Mapper;
 import org.elasticsearch.license.XPackLicenseState;
 import org.elasticsearch.plugins.ActionPlugin;
 import org.elasticsearch.plugins.MapperPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.plugins.SearchPlugin;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.xpack.analytics.boxplot.InternalBoxplot;
 import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
 import org.elasticsearch.xpack.core.XPackPlugin;
 import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction;
@@ -21,6 +24,7 @@ import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction;
 import org.elasticsearch.xpack.analytics.action.AnalyticsInfoTransportAction;
 import org.elasticsearch.xpack.analytics.action.AnalyticsUsageTransportAction;
 import org.elasticsearch.xpack.analytics.action.TransportAnalyticsStatsAction;
+import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
 import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregationBuilder;
 import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregator;
 import org.elasticsearch.xpack.analytics.stringstats.InternalStringStats;
@@ -56,11 +60,16 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
 
     @Override
     public List<AggregationSpec> getAggregations() {
-        return singletonList(
+        return Arrays.asList(
             new AggregationSpec(
                 StringStatsAggregationBuilder.NAME,
                 StringStatsAggregationBuilder::new,
-                StringStatsAggregationBuilder::parse).addResultReader(InternalStringStats::new)
+                StringStatsAggregationBuilder::parse).addResultReader(InternalStringStats::new),
+            new AggregationSpec(
+                BoxplotAggregationBuilder.NAME,
+                BoxplotAggregationBuilder::new,
+                (ContextParser<String, AggregationBuilder>) (p, c) -> BoxplotAggregationBuilder.parse(c, p))
+                .addResultReader(InternalBoxplot::new)
         );
     }
 

+ 63 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/Boxplot.java

@@ -0,0 +1,63 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
+
+public interface Boxplot extends NumericMetricsAggregation.MultiValue {
+
+    /**
+     * @return The minimum value of all aggregated values.
+     */
+    double getMin();
+
+    /**
+     * @return The maximum value of all aggregated values.
+     */
+    double getMax();
+
+    /**
+     * @return The first quartile of all aggregated values.
+     */
+    double getQ1();
+
+    /**
+     * @return The second quartile of all aggregated values.
+     */
+    double getQ2();
+
+    /**
+     * @return The third quartile of all aggregated values.
+     */
+    double getQ3();
+
+    /**
+     * @return The minimum value of all aggregated values as a String.
+     */
+    String getMinAsString();
+
+    /**
+     * @return The maximum value of all aggregated values as a String.
+     */
+    String getMaxAsString();
+
+    /**
+     * @return The first quartile of all aggregated values as a String.
+     */
+    String getQ1AsString();
+
+    /**
+     * @return The second quartile of all aggregated values as a String.
+     */
+    String getQ2AsString();
+
+    /**
+     * @return The third quartile of all aggregated values as a String.
+     */
+    String getQ3AsString();
+
+}

+ 132 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilder.java

@@ -0,0 +1,132 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.AggregatorFactory;
+import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
+import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
+import org.elasticsearch.search.aggregations.support.ValueType;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
+import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
+import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Objects;
+
+import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD;
+
+public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
+    BoxplotAggregationBuilder> {
+    public static final String NAME = "boxplot";
+
+    private static final ObjectParser<BoxplotAggregationBuilder, Void> PARSER;
+
+    static {
+        PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
+        ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
+        PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
+    }
+
+    public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
+        return PARSER.parse(parser, new BoxplotAggregationBuilder(aggregationName), null);
+    }
+
+    private double compression = 100.0;
+
+    public BoxplotAggregationBuilder(String name) {
+        super(name, CoreValuesSourceType.NUMERIC, ValueType.NUMERIC);
+    }
+
+    protected BoxplotAggregationBuilder(BoxplotAggregationBuilder clone,
+                                        AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metaData) {
+        super(clone, factoriesBuilder, metaData);
+        this.compression = clone.compression;
+    }
+
+    @Override
+    protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metaData) {
+        return new BoxplotAggregationBuilder(this, factoriesBuilder, metaData);
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public BoxplotAggregationBuilder(StreamInput in) throws IOException {
+        super(in, CoreValuesSourceType.NUMERIC, ValueType.NUMERIC);
+        compression = in.readDouble();
+    }
+
+    @Override
+    protected void innerWriteTo(StreamOutput out) throws IOException {
+        out.writeDouble(compression);
+    }
+
+    /**
+     * Expert: set the compression. Higher values improve accuracy but also
+     * memory usage. Only relevant when using {@link PercentilesMethod#TDIGEST}.
+     */
+    public BoxplotAggregationBuilder compression(double compression) {
+        if (compression < 0.0) {
+            throw new IllegalArgumentException(
+                "[compression] must be greater than or equal to 0. Found [" + compression + "] in [" + name + "]");
+        }
+        this.compression = compression;
+        return this;
+    }
+
+    /**
+     * Expert: get the compression. Higher values improve accuracy but also
+     * memory usage. Only relevant when using {@link PercentilesMethod#TDIGEST}.
+     */
+    public double compression() {
+        return compression;
+    }
+
+    @Override
+    protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
+                                                  ValuesSourceConfig<ValuesSource.Numeric> config,
+                                                  AggregatorFactory parent,
+                                                  AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
+        return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
+    }
+
+    @Override
+    public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
+        builder.field(COMPRESSION_FIELD.getPreferredName(), compression);
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || getClass() != obj.getClass()) return false;
+        if (super.equals(obj) == false) return false;
+        BoxplotAggregationBuilder other = (BoxplotAggregationBuilder) obj;
+        return Objects.equals(compression, other.compression);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), compression);
+    }
+
+    @Override
+    public String getType() {
+        return NAME;
+    }
+}
+

+ 136 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregator.java

@@ -0,0 +1,136 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.ScoreMode;
+import org.elasticsearch.common.lease.Releasables;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.ObjectArray;
+import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
+import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
+import org.elasticsearch.search.aggregations.metrics.TDigestState;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
+
+    private final ValuesSource.Numeric valuesSource;
+    private final DocValueFormat format;
+    protected ObjectArray<TDigestState> states;
+    protected final double compression;
+
+    BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
+                      SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
+                      Map<String, Object> metaData) throws IOException {
+        super(name, context, parent, pipelineAggregators, metaData);
+        this.valuesSource = valuesSource;
+        this.format = formatter;
+        this.compression = compression;
+        if (valuesSource != null) {
+            states = context.bigArrays().newObjectArray(1);
+        }
+    }
+
+    @Override
+    public ScoreMode scoreMode() {
+        return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
+    }
+
+    @Override
+    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
+                                                final LeafBucketCollector sub) throws IOException {
+        if (valuesSource == null) {
+            return LeafBucketCollector.NO_OP_COLLECTOR;
+        }
+        final BigArrays bigArrays = context.bigArrays();
+        final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
+        return new LeafBucketCollectorBase(sub, values) {
+            @Override
+            public void collect(int doc, long bucket) throws IOException {
+                states = bigArrays.grow(states, bucket + 1);
+
+                if (values.advanceExact(doc)) {
+                    TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
+                    if (values.advanceExact(doc)) {
+                        final int valueCount = values.docValueCount();
+                        for (int i = 0; i < valueCount; i++) {
+                            state.add(values.nextValue());
+                        }
+                    }
+                }
+            }
+        };
+    }
+
+    private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
+        states = bigArrays.grow(states, bucket + 1);
+        TDigestState state = states.get(bucket);
+        if (state == null) {
+            state = new TDigestState(compression);
+            states.set(bucket, state);
+        }
+        return state;
+    }
+
+    @Override
+    public boolean hasMetric(String name) {
+        try {
+            InternalBoxplot.Metrics.resolve(name);
+            return true;
+        } catch (IllegalArgumentException iae) {
+            return false;
+        }
+    }
+
+    @Override
+    public double metric(String name, long owningBucketOrd) {
+        TDigestState state = null;
+        if (valuesSource != null && owningBucketOrd < states.size()) {
+            state = states.get(owningBucketOrd);
+        }
+        return InternalBoxplot.Metrics.resolve(name).value(state);
+    }
+
+
+    @Override
+    public InternalAggregation buildAggregation(long owningBucketOrdinal) {
+        TDigestState state = getState(owningBucketOrdinal);
+        if (state == null) {
+            return buildEmptyAggregation();
+        } else {
+            return new InternalBoxplot(name, state, format, pipelineAggregators(), metaData());
+        }
+    }
+
+    TDigestState getState(long bucketOrd) {
+        if (valuesSource == null || bucketOrd >= states.size()) {
+            return null;
+        }
+        return states.get(bucketOrd);
+    }
+
+    @Override
+    public InternalAggregation buildEmptyAggregation() {
+        return new InternalBoxplot(name, new TDigestState(compression), format, pipelineAggregators(), metaData());
+    }
+
+    @Override
+    public void doClose() {
+        Releasables.close(states);
+    }
+}

+ 59 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorFactory.java

@@ -0,0 +1,59 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.AggregatorFactory;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
+import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
+
+    private final double compression;
+
+    BoxplotAggregatorFactory(String name,
+                             ValuesSourceConfig<ValuesSource.Numeric> config,
+                             double compression,
+                             QueryShardContext queryShardContext,
+                             AggregatorFactory parent,
+                             AggregatorFactories.Builder subFactoriesBuilder,
+                             Map<String, Object> metaData) throws IOException {
+        super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);
+        this.compression = compression;
+    }
+
+    @Override
+    protected Aggregator createUnmapped(SearchContext searchContext,
+                                        Aggregator parent,
+                                        List<PipelineAggregator> pipelineAggregators,
+                                        Map<String, Object> metaData)
+        throws IOException {
+        return new BoxplotAggregator(name, null, config.format(), compression, searchContext, parent,
+            pipelineAggregators, metaData);
+    }
+
+    @Override
+    protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
+                                          SearchContext searchContext,
+                                          Aggregator parent,
+                                          boolean collectsFromSingleBucket,
+                                          List<PipelineAggregator> pipelineAggregators,
+                                          Map<String, Object> metaData) throws IOException {
+        return new BoxplotAggregator(name, valuesSource, config.format(), compression, searchContext, parent,
+            pipelineAggregators, metaData);
+    }
+
+}

+ 212 - 0
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/boxplot/InternalBoxplot.java

@@ -0,0 +1,212 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation;
+import org.elasticsearch.search.aggregations.metrics.TDigestState;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+
+public class InternalBoxplot extends InternalNumericMetricsAggregation.MultiValue implements Boxplot {
+
+    enum Metrics {
+
+        MIN, MAX, Q1, Q2, Q3;
+
+        public static Metrics resolve(String name) {
+            return Metrics.valueOf(name.toUpperCase(Locale.ROOT));
+        }
+
+        public String value() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+
+        double value(InternalBoxplot boxplot) {
+            switch (this) {
+                case MIN:
+                    return boxplot.getMin();
+                case MAX:
+                    return boxplot.getMax();
+                case Q1:
+                    return boxplot.getQ1();
+                case Q2:
+                    return boxplot.getQ2();
+                case Q3:
+                    return boxplot.getQ3();
+                default:
+                    throw new IllegalArgumentException("Unknown value [" + this.value() + "] in the boxplot aggregation");
+            }
+        }
+
+        double value(TDigestState state) {
+            switch (this) {
+                case MIN:
+                    return state == null ? Double.NEGATIVE_INFINITY : state.getMin();
+                case MAX:
+                    return state == null ? Double.POSITIVE_INFINITY : state.getMax();
+                case Q1:
+                    return state == null ? Double.NaN : state.quantile(0.25);
+                case Q2:
+                    return state == null ? Double.NaN : state.quantile(0.5);
+                case Q3:
+                    return state == null ? Double.NaN : state.quantile(0.75);
+                default:
+                    throw new IllegalArgumentException("Unknown value [" + this.value() + "] in the boxplot aggregation");
+            }
+        }
+    }
+
+    private final TDigestState state;
+
+    InternalBoxplot(String name, TDigestState state, DocValueFormat formatter, List<PipelineAggregator> pipelineAggregators,
+                    Map<String, Object> metaData) {
+        super(name, pipelineAggregators, metaData);
+        this.state = state;
+        this.format = formatter;
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public InternalBoxplot(StreamInput in) throws IOException {
+        super(in);
+        format = in.readNamedWriteable(DocValueFormat.class);
+        state = TDigestState.read(in);
+    }
+
+    @Override
+    protected void doWriteTo(StreamOutput out) throws IOException {
+        out.writeNamedWriteable(format);
+        TDigestState.write(state, out);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return BoxplotAggregationBuilder.NAME;
+    }
+
+    @Override
+    public double getMin() {
+        return state.getMin();
+    }
+
+    @Override
+    public double getMax() {
+        return state.getMax();
+    }
+
+    @Override
+    public double getQ1() {
+        return state.quantile(0.25);
+    }
+
+    @Override
+    public double getQ2() {
+        return state.quantile(0.5);
+    }
+
+    @Override
+    public double getQ3() {
+        return state.quantile(0.75);
+    }
+
+    @Override
+    public String getMinAsString() {
+        return valueAsString(Metrics.MIN.name());
+    }
+
+    @Override
+    public String getMaxAsString() {
+        return valueAsString(Metrics.MAX.name());
+    }
+
+    @Override
+    public String getQ1AsString() {
+        return valueAsString(Metrics.Q1.name());
+    }
+
+    @Override
+    public String getQ2AsString() {
+        return valueAsString(Metrics.Q2.name());
+    }
+
+    @Override
+    public String getQ3AsString() {
+        return valueAsString(Metrics.Q3.name());
+    }
+
+    @Override
+    public double value(String name) {
+        return Metrics.resolve(name).value(this);
+    }
+
+    // for testing only
+    DocValueFormat format() {
+        return format;
+    }
+
+    // for testing only
+    TDigestState state() {
+        return state;
+    }
+
+    @Override
+    public InternalBoxplot reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
+        TDigestState merged = null;
+        for (InternalAggregation aggregation : aggregations) {
+            final InternalBoxplot percentiles = (InternalBoxplot) aggregation;
+            if (merged == null) {
+                merged = new TDigestState(percentiles.state.compression());
+            }
+            merged.add(percentiles.state);
+        }
+        return new InternalBoxplot(name, merged, format, pipelineAggregators(), metaData);
+    }
+
+    @Override
+    public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
+        builder.field("min", getMin());
+        builder.field("max", getMax());
+        builder.field("q1", getQ1());
+        builder.field("q2", getQ2());
+        builder.field("q3", getQ3());
+        if (format != DocValueFormat.RAW) {
+            builder.field("min_as_string", format.format(getMin()));
+            builder.field("max_as_string", format.format(getMax()));
+            builder.field("q1_as_string", format.format(getQ1()));
+            builder.field("q2_as_string", format.format(getQ2()));
+            builder.field("q3_as_string", format.format(getQ3()));
+        }
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), state);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || getClass() != obj.getClass()) return false;
+        if (super.equals(obj) == false) return false;
+
+        InternalBoxplot that = (InternalBoxplot) obj;
+        return Objects.equals(state, that.state);
+    }
+}
+

+ 65 - 0
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregationBuilderTests.java

@@ -0,0 +1,65 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import static org.hamcrest.Matchers.hasSize;
+
+public class BoxplotAggregationBuilderTests extends AbstractSerializingTestCase<BoxplotAggregationBuilder> {
+    String aggregationName;
+
+    @Before
+    public void setupName() {
+        aggregationName = randomAlphaOfLength(10);
+    }
+
+    @Override
+    protected NamedXContentRegistry xContentRegistry() {
+        SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.singletonList(new AnalyticsPlugin()));
+        return new NamedXContentRegistry(searchModule.getNamedXContents());
+    }
+
+    @Override
+    protected BoxplotAggregationBuilder doParseInstance(XContentParser parser) throws IOException {
+        assertSame(XContentParser.Token.START_OBJECT, parser.nextToken());
+        AggregatorFactories.Builder parsed = AggregatorFactories.parseAggregators(parser);
+        assertThat(parsed.getAggregatorFactories(), hasSize(1));
+        assertThat(parsed.getPipelineAggregatorFactories(), hasSize(0));
+        BoxplotAggregationBuilder agg = (BoxplotAggregationBuilder) parsed.getAggregatorFactories().iterator().next();
+        assertNull(parser.nextToken());
+        assertNotNull(agg);
+        return agg;
+    }
+
+    @Override
+    protected BoxplotAggregationBuilder createTestInstance() {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder(aggregationName)
+            .field(randomAlphaOfLength(10));
+        if (randomBoolean()) {
+            aggregationBuilder.compression(randomDoubleBetween(0, 100, true));
+        }
+        return aggregationBuilder;
+    }
+
+    @Override
+    protected Writeable.Reader<BoxplotAggregationBuilder> instanceReader() {
+        return BoxplotAggregationBuilder::new;
+    }
+}
+

+ 323 - 0
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/boxplot/BoxplotAggregatorTests.java

@@ -0,0 +1,323 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.CheckedConsumer;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
+import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
+import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
+
+import java.io.IOException;
+import java.util.function.Consumer;
+
+import static java.util.Collections.singleton;
+import static org.hamcrest.Matchers.equalTo;
+
+public class BoxplotAggregatorTests extends AggregatorTestCase {
+
+    public void testNoMatchingField() throws IOException {
+        testCase(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 3)));
+        }, boxplot -> {
+            assertEquals(Double.POSITIVE_INFINITY, boxplot.getMin(), 0);
+            assertEquals(Double.NEGATIVE_INFINITY, boxplot.getMax(), 0);
+            assertEquals(Double.NaN, boxplot.getQ1(), 0);
+            assertEquals(Double.NaN, boxplot.getQ2(), 0);
+            assertEquals(Double.NaN, boxplot.getQ3(), 0);
+        });
+    }
+
+    public void testMatchesSortedNumericDocValues() throws IOException {
+        testCase(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 5)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 10)));
+        }, boxplot -> {
+            assertEquals(2, boxplot.getMin(), 0);
+            assertEquals(10, boxplot.getMax(), 0);
+            assertEquals(2, boxplot.getQ1(), 0);
+            assertEquals(3.5, boxplot.getQ2(), 0);
+            assertEquals(5, boxplot.getQ3(), 0);
+        });
+    }
+
+    public void testMatchesNumericDocValues() throws IOException {
+        testCase(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 10)));
+        }, boxplot -> {
+            assertEquals(2, boxplot.getMin(), 0);
+            assertEquals(10, boxplot.getMax(), 0);
+            assertEquals(2, boxplot.getQ1(), 0);
+            assertEquals(3.5, boxplot.getQ2(), 0);
+            assertEquals(5, boxplot.getQ3(), 0);
+        });
+    }
+
+    public void testSomeMatchesSortedNumericDocValues() throws IOException {
+        testCase(new DocValuesFieldExistsQuery("number"), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number2", 2)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 5)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 10)));
+        }, boxplot -> {
+            assertEquals(2, boxplot.getMin(), 0);
+            assertEquals(10, boxplot.getMax(), 0);
+            assertEquals(2, boxplot.getQ1(), 0);
+            assertEquals(3.5, boxplot.getQ2(), 0);
+            assertEquals(5, boxplot.getQ3(), 0);
+        });
+    }
+
+    public void testSomeMatchesNumericDocValues() throws IOException {
+        testCase(new DocValuesFieldExistsQuery("number"), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number2", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 10)));
+        }, boxplot -> {
+            assertEquals(2, boxplot.getMin(), 0);
+            assertEquals(10, boxplot.getMax(), 0);
+            assertEquals(2, boxplot.getQ1(), 0);
+            assertEquals(3.5, boxplot.getQ2(), 0);
+            assertEquals(5, boxplot.getQ3(), 0);
+        });
+    }
+
+    public void testUnmappedWithMissingField() throws IOException {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot")
+            .field("does_not_exist").missing(0L);
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
+        }, (Consumer<InternalBoxplot>) boxplot -> {
+            assertEquals(0, boxplot.getMin(), 0);
+            assertEquals(0, boxplot.getMax(), 0);
+            assertEquals(0, boxplot.getQ1(), 0);
+            assertEquals(0, boxplot.getQ2(), 0);
+            assertEquals(0, boxplot.getQ3(), 0);
+        }, fieldType);
+    }
+
+    public void testUnsupportedType() {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot").field("not_a_number");
+
+        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
+        fieldType.setName("not_a_number");
+        fieldType.setHasDocValues(true);
+
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+            () -> testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+                iw.addDocument(singleton(new SortedSetDocValuesField("string", new BytesRef("foo"))));
+            }, (Consumer<InternalBoxplot>) boxplot -> {
+                fail("Should have thrown exception");
+            }, fieldType));
+        assertEquals(e.getMessage(), "Expected numeric type on field [not_a_number], but got [keyword]");
+    }
+
+    public void testBadMissingField() {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot").field("number")
+            .missing("not_a_number");
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        expectThrows(NumberFormatException.class,
+            () -> testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+                iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 10)));
+            }, (Consumer<InternalBoxplot>) boxplot -> {
+                fail("Should have thrown exception");
+            }, fieldType));
+    }
+
+    public void testUnmappedWithBadMissingField() {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot")
+            .field("does_not_exist").missing("not_a_number");
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        expectThrows(NumberFormatException.class,
+            () -> testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+                iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+                iw.addDocument(singleton(new NumericDocValuesField("number", 10)));
+            }, (Consumer<InternalBoxplot>) boxplot -> {
+                fail("Should have thrown exception");
+            }, fieldType));
+    }
+
+    public void testEmptyBucket() throws IOException {
+        HistogramAggregationBuilder histogram = new HistogramAggregationBuilder("histo").field("number").interval(10).minDocCount(0)
+            .subAggregation(new BoxplotAggregationBuilder("boxplot").field("number"));
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        testCase(histogram, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 21)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 23)));
+        }, (Consumer<InternalHistogram>) histo -> {
+            assertThat(histo.getBuckets().size(), equalTo(3));
+
+            assertNotNull(histo.getBuckets().get(0).getAggregations().asMap().get("boxplot"));
+            InternalBoxplot boxplot = (InternalBoxplot) histo.getBuckets().get(0).getAggregations().asMap().get("boxplot");
+            assertEquals(1, boxplot.getMin(), 0);
+            assertEquals(3, boxplot.getMax(), 0);
+            assertEquals(1, boxplot.getQ1(), 0);
+            assertEquals(2, boxplot.getQ2(), 0);
+            assertEquals(3, boxplot.getQ3(), 0);
+
+            assertNotNull(histo.getBuckets().get(1).getAggregations().asMap().get("boxplot"));
+            boxplot = (InternalBoxplot) histo.getBuckets().get(1).getAggregations().asMap().get("boxplot");
+            assertEquals(Double.POSITIVE_INFINITY, boxplot.getMin(), 0);
+            assertEquals(Double.NEGATIVE_INFINITY, boxplot.getMax(), 0);
+            assertEquals(Double.NaN, boxplot.getQ1(), 0);
+            assertEquals(Double.NaN, boxplot.getQ2(), 0);
+            assertEquals(Double.NaN, boxplot.getQ3(), 0);
+
+            assertNotNull(histo.getBuckets().get(2).getAggregations().asMap().get("boxplot"));
+            boxplot = (InternalBoxplot) histo.getBuckets().get(2).getAggregations().asMap().get("boxplot");
+            assertEquals(21, boxplot.getMin(), 0);
+            assertEquals(23, boxplot.getMax(), 0);
+            assertEquals(21, boxplot.getQ1(), 0);
+            assertEquals(22, boxplot.getQ2(), 0);
+            assertEquals(23, boxplot.getQ3(), 0);
+        }, fieldType);
+    }
+
+    public void testFormatter() throws IOException {
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot").field("number")
+            .format("0000.0");
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+        }, (Consumer<InternalBoxplot>) boxplot -> {
+            assertEquals(1, boxplot.getMin(), 0);
+            assertEquals(5, boxplot.getMax(), 0);
+            assertEquals(1.75, boxplot.getQ1(), 0);
+            assertEquals(3, boxplot.getQ2(), 0);
+            assertEquals(4.25, boxplot.getQ3(), 0);
+            assertEquals("0001.0", boxplot.getMinAsString());
+            assertEquals("0005.0", boxplot.getMaxAsString());
+            assertEquals("0001.8", boxplot.getQ1AsString());
+            assertEquals("0003.0", boxplot.getQ2AsString());
+            assertEquals("0004.2", boxplot.getQ3AsString());
+        }, fieldType);
+    }
+
+    public void testGetProperty() throws IOException {
+        GlobalAggregationBuilder globalBuilder = new GlobalAggregationBuilder("global")
+            .subAggregation(new BoxplotAggregationBuilder("boxplot").field("number"));
+
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+
+        testCase(globalBuilder, new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 2)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 3)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 4)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 5)));
+        }, (Consumer<InternalGlobal>) global -> {
+            assertEquals(5, global.getDocCount());
+            assertTrue(AggregationInspectionHelper.hasValue(global));
+            assertNotNull(global.getAggregations().asMap().get("boxplot"));
+            InternalBoxplot boxplot = (InternalBoxplot) global.getAggregations().asMap().get("boxplot");
+            assertThat(global.getProperty("boxplot"), equalTo(boxplot));
+            assertThat(global.getProperty("boxplot.min"), equalTo(1.0));
+            assertThat(global.getProperty("boxplot.max"), equalTo(5.0));
+            assertThat(boxplot.getProperty("min"), equalTo(1.0));
+            assertThat(boxplot.getProperty("max"), equalTo(5.0));
+        }, fieldType);
+    }
+
+    private void testCase(Query query,
+                          CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
+                          Consumer<InternalBoxplot> verify) throws IOException {
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
+        fieldType.setName("number");
+        BoxplotAggregationBuilder aggregationBuilder = new BoxplotAggregationBuilder("boxplot").field("number");
+        testCase(aggregationBuilder, query, buildIndex, verify, fieldType);
+    }
+
+    private <T extends AggregationBuilder, V extends InternalAggregation> void testCase(
+        T aggregationBuilder, Query query,
+        CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
+        Consumer<V> verify, MappedFieldType fieldType) throws IOException {
+        try (Directory directory = newDirectory()) {
+            RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
+            buildIndex.accept(indexWriter);
+            indexWriter.close();
+
+            try (IndexReader indexReader = DirectoryReader.open(directory)) {
+                IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
+
+                V agg = searchAndReduce(indexSearcher, query, aggregationBuilder, fieldType);
+                verify.accept(agg);
+
+            }
+        }
+    }
+
+
+}

+ 112 - 0
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/boxplot/InternalBoxplotTests.java

@@ -0,0 +1,112 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.analytics.boxplot;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.ParsedAggregation;
+import org.elasticsearch.search.aggregations.metrics.TDigestState;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.test.InternalAggregationTestCase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class InternalBoxplotTests extends InternalAggregationTestCase<InternalBoxplot> {
+    @Override
+    protected InternalBoxplot createTestInstance(String name, List<PipelineAggregator> pipelineAggregators,
+                                                 Map<String, Object> metaData) {
+        int numValues = frequently() ? randomInt(100) : 0;
+        double[] values = new double[numValues];
+        TDigestState state = new TDigestState(100);
+        for (int i = 0; i < numValues; ++i) {
+            state.add(randomDouble());
+        }
+        DocValueFormat formatter = randomNumericDocValueFormat();
+
+        return new InternalBoxplot(name, state, formatter, pipelineAggregators, metaData);
+    }
+
+    @Override
+    protected Writeable.Reader<InternalBoxplot> instanceReader() {
+        return InternalBoxplot::new;
+    }
+
+    @Override
+    protected void assertReduced(InternalBoxplot reduced, List<InternalBoxplot> inputs) {
+        TDigestState expected = new TDigestState(reduced.state().compression());
+        for (InternalBoxplot input : inputs) {
+            expected.add(input.state());
+        }
+        assertNotNull(expected);
+        assertEquals(expected.getMax(), reduced.getMax(), 0);
+        assertEquals(expected.getMin(), reduced.getMin(), 0);
+    }
+
+    @Override
+    protected void assertFromXContent(InternalBoxplot min, ParsedAggregation parsedAggregation) {
+        // There is no ParsedBoxplot yet so we cannot test it here
+    }
+
+    @Override
+    protected InternalBoxplot mutateInstance(InternalBoxplot instance) {
+        String name = instance.getName();
+        TDigestState state;
+        try (BytesStreamOutput output = new BytesStreamOutput()) {
+            TDigestState.write(instance.state(), output);
+            try (StreamInput in = new NamedWriteableAwareStreamInput(output.bytes().streamInput(), getNamedWriteableRegistry())) {
+                state = TDigestState.read(in);
+            }
+        } catch (IOException ex) {
+            throw new IllegalStateException(ex);
+        }
+        DocValueFormat formatter = instance.format();
+        List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
+        Map<String, Object> metaData = instance.getMetaData();
+        switch (between(0, 2)) {
+            case 0:
+                name += randomAlphaOfLength(5);
+                break;
+            case 1:
+                state.add(randomDouble());
+                break;
+            case 2:
+                if (metaData == null) {
+                    metaData = new HashMap<>(1);
+                } else {
+                    metaData = new HashMap<>(instance.getMetaData());
+                }
+                metaData.put(randomAlphaOfLength(15), randomInt());
+                break;
+            default:
+                throw new AssertionError("Illegal randomisation branch");
+        }
+        return new InternalBoxplot(name, state, formatter, pipelineAggregators, metaData);
+    }
+
+    @Override
+    protected List<NamedXContentRegistry.Entry> getNamedXContents() {
+        List<NamedXContentRegistry.Entry> extendedNamedXContents = new ArrayList<>(super.getNamedXContents());
+        extendedNamedXContents.add(new NamedXContentRegistry.Entry(Aggregation.class,
+            new ParseField(BoxplotAggregationBuilder.NAME),
+            (p, c) -> {
+                assumeTrue("There is no ParsedBoxlot yet", false);
+                return null;
+            }
+        ));
+        return extendedNamedXContents;
+    }
+}

+ 56 - 0
x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/boxplot.yml

@@ -0,0 +1,56 @@
+setup:
+  - skip:
+      features: headers
+  - do:
+      indices.create:
+        index: latency
+        body:
+          mappings:
+            properties:
+              load_time:
+                type: double
+
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      bulk:
+        refresh: true
+        body:
+          - index:
+              _index: "latency"
+          - load_time: 2
+
+          - index:
+              _index: "latency"
+          - load_time: 3
+
+          - index:
+              _index: "latency"
+          - load_time: 5
+
+          - index:
+              _index: "latency"
+          - load_time: 10
+
+          - index:
+              _index: "latency"
+          - load_time: 15
+---
+"Basic Search":
+
+  - do:
+      search:
+        index: "latency"
+        body:
+          size: 0
+          aggs:
+            plot:
+              boxplot:
+                field: "load_time"
+
+  - match: { aggregations.plot.min: 2.0 }
+  - match: { aggregations.plot.max: 15.0 }
+  - match: { aggregations.plot.q1: 2.75 }
+  - match: { aggregations.plot.q2: 5.0 }
+  - match: { aggregations.plot.q3: 11.25 }
+