Browse Source

Adds a minimum interval to `auto_date_histogram`. (#42814)

Adds a minimum interval to `auto_date_histogram`. We do this by
restricting the roundings passed into to the aggregator.
Paul Sanwald 6 years ago
parent
commit
6357857bba

+ 33 - 0
docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc

@@ -258,6 +258,39 @@ Like with the normal <<search-aggregations-bucket-datehistogram-aggregation, `da
 scripts and value level scripts are supported. This aggregation does not however, support the `min_doc_count`, 
 `extended_bounds` and `order` parameters.  
 
+==== Minimum Interval parameter
+
+The `minimum_interval` allows the caller to specify the minimum rounding interval that should be used.
+This can make the collection process more efficient, as the aggregation will not attempt to round at
+any interval lower than `minimum_interval`.
+
+The accepted units for `minimum_interval` are:
+
+* year
+* month
+* day
+* hour
+* minute
+* second
+
+[source,js]
+--------------------------------------------------
+POST /sales/_search?size=0
+{
+    "aggs" : {
+        "sale_date" : {
+             "auto_date_histogram" : {
+                 "field" : "date",
+                 "buckets": 10,
+                 "minimum_interval": "minute"
+             }
+         }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[setup:sales]
+
 ==== Missing value
 
 The `missing` parameter defines how documents that are missing a value should be treated.

+ 88 - 21
server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilder.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.Rounding;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -48,42 +49,65 @@ import java.util.Arrays;
 import java.util.Map;
 import java.util.Objects;
 
+import static java.util.Map.entry;
+
 public class AutoDateHistogramAggregationBuilder
         extends ValuesSourceAggregationBuilder<ValuesSource.Numeric, AutoDateHistogramAggregationBuilder> {
 
     public static final String NAME = "auto_date_histogram";
 
     private static final ParseField NUM_BUCKETS_FIELD = new ParseField("buckets");
+    private static final ParseField MINIMUM_INTERVAL_FIELD = new ParseField("minimum_interval");
 
     private static final ObjectParser<AutoDateHistogramAggregationBuilder, Void> PARSER;
     static {
         PARSER = new ObjectParser<>(AutoDateHistogramAggregationBuilder.NAME);
         ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, true);
-
         PARSER.declareInt(AutoDateHistogramAggregationBuilder::setNumBuckets, NUM_BUCKETS_FIELD);
+        PARSER.declareStringOrNull(AutoDateHistogramAggregationBuilder::setMinimumIntervalExpression, MINIMUM_INTERVAL_FIELD);
     }
 
+    public static final Map<Rounding.DateTimeUnit, String> ALLOWED_INTERVALS = Map.ofEntries(
+        entry(Rounding.DateTimeUnit.YEAR_OF_CENTURY, "year"),
+        entry(Rounding.DateTimeUnit.MONTH_OF_YEAR, "month"),
+        entry(Rounding.DateTimeUnit.DAY_OF_MONTH, "day"),
+        entry( Rounding.DateTimeUnit.HOUR_OF_DAY, "hour"),
+        entry(Rounding.DateTimeUnit.MINUTES_OF_HOUR, "minute"),
+        entry(Rounding.DateTimeUnit.SECOND_OF_MINUTE, "second")
+    );
+
     /**
      *
      * Build roundings, computed dynamically as roundings are time zone dependent.
      * The current implementation probably should not be invoked in a tight loop.
      * @return Array of RoundingInfo
      */
-    static RoundingInfo[] buildRoundings(ZoneId timeZone) {
+    static RoundingInfo[] buildRoundings(ZoneId timeZone, String minimumInterval) {
+
+        int indexToSliceFrom = 0;
+
         RoundingInfo[] roundings = new RoundingInfo[6];
-        roundings[0] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.SECOND_OF_MINUTE, timeZone),
-            1000L, "s", 1, 5, 10, 30);
-        roundings[1] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.MINUTES_OF_HOUR, timeZone),
+        roundings[0] = new RoundingInfo(Rounding.DateTimeUnit.SECOND_OF_MINUTE,
+            timeZone, 1000L, "s",1, 5, 10, 30);
+        roundings[1] = new RoundingInfo(Rounding.DateTimeUnit.MINUTES_OF_HOUR, timeZone,
             60 * 1000L, "m", 1, 5, 10, 30);
-        roundings[2] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.HOUR_OF_DAY, timeZone),
-            60 * 60 * 1000L, "h",1, 3, 12);
-        roundings[3] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.DAY_OF_MONTH, timeZone),
+        roundings[2] = new RoundingInfo(Rounding.DateTimeUnit.HOUR_OF_DAY, timeZone,
+            60 * 60 * 1000L, "h", 1, 3, 12);
+        roundings[3] = new RoundingInfo(Rounding.DateTimeUnit.DAY_OF_MONTH, timeZone,
             24 * 60 * 60 * 1000L, "d", 1, 7);
-        roundings[4] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.MONTH_OF_YEAR, timeZone),
+        roundings[4] = new RoundingInfo(Rounding.DateTimeUnit.MONTH_OF_YEAR, timeZone,
             30 * 24 * 60 * 60 * 1000L, "M", 1, 3);
-        roundings[5] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.YEAR_OF_CENTURY, timeZone),
+        roundings[5] = new RoundingInfo(Rounding.DateTimeUnit.YEAR_OF_CENTURY, timeZone,
             365 * 24 * 60 * 60 * 1000L, "y", 1, 5, 10, 20, 50, 100);
-        return roundings;
+
+        for (int i = 0; i < roundings.length; i++) {
+            RoundingInfo roundingInfo = roundings[i];
+            if (roundingInfo.getDateTimeUnit().equals(minimumInterval)) {
+                indexToSliceFrom = i;
+                break;
+            }
+        }
+        return Arrays.copyOfRange(roundings, indexToSliceFrom, roundings.length);
     }
 
     public static AutoDateHistogramAggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
@@ -92,6 +116,22 @@ public class AutoDateHistogramAggregationBuilder
 
     private int numBuckets = 10;
 
+    private String minimumIntervalExpression;
+
+    public String getMinimumIntervalExpression() {
+        return minimumIntervalExpression;
+    }
+
+    public AutoDateHistogramAggregationBuilder setMinimumIntervalExpression(String minimumIntervalExpression) {
+        if (minimumIntervalExpression != null && !ALLOWED_INTERVALS.containsValue(minimumIntervalExpression)) {
+            throw new IllegalArgumentException(MINIMUM_INTERVAL_FIELD.getPreferredName() +
+                " must be one of [" + ALLOWED_INTERVALS.values().toString() + "]");
+        }
+        this.minimumIntervalExpression = minimumIntervalExpression;
+        return this;
+    }
+
+
     /** Create a new builder with the given name. */
     public AutoDateHistogramAggregationBuilder(String name) {
         super(name, ValuesSourceType.NUMERIC, ValueType.DATE);
@@ -101,12 +141,17 @@ public class AutoDateHistogramAggregationBuilder
     public AutoDateHistogramAggregationBuilder(StreamInput in) throws IOException {
         super(in, ValuesSourceType.NUMERIC, ValueType.DATE);
         numBuckets = in.readVInt();
+        //TODO[PCS] update after backport
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            minimumIntervalExpression = in.readOptionalString();
+        }
     }
 
     protected AutoDateHistogramAggregationBuilder(AutoDateHistogramAggregationBuilder clone, Builder factoriesBuilder,
             Map<String, Object> metaData) {
         super(clone, factoriesBuilder, metaData);
         this.numBuckets = clone.numBuckets;
+        this.minimumIntervalExpression = clone.minimumIntervalExpression;
     }
 
     @Override
@@ -117,6 +162,9 @@ public class AutoDateHistogramAggregationBuilder
     @Override
     protected void innerWriteTo(StreamOutput out) throws IOException {
         out.writeVInt(numBuckets);
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeOptionalString(minimumIntervalExpression);
+        }
     }
 
     @Override
@@ -139,7 +187,7 @@ public class AutoDateHistogramAggregationBuilder
     @Override
     protected ValuesSourceAggregatorFactory<Numeric, ?> innerBuild(SearchContext context, ValuesSourceConfig<Numeric> config,
             AggregatorFactory<?> parent, Builder subFactoriesBuilder) throws IOException {
-        RoundingInfo[] roundings = buildRoundings(timeZone());
+        RoundingInfo[] roundings = buildRoundings(timeZone(), getMinimumIntervalExpression());
         int maxRoundingInterval = Arrays.stream(roundings,0, roundings.length-1)
             .map(rounding -> rounding.innerIntervals)
             .flatMapToInt(Arrays::stream)
@@ -152,7 +200,9 @@ public class AutoDateHistogramAggregationBuilder
             throw new IllegalArgumentException(NUM_BUCKETS_FIELD.getPreferredName()+
                 " must be less than " + bucketCeiling);
         }
-        return new AutoDateHistogramAggregatorFactory(name, config, numBuckets, roundings, context, parent, subFactoriesBuilder, metaData);
+        return new AutoDateHistogramAggregatorFactory(name, config, numBuckets, roundings, context, parent,
+            subFactoriesBuilder,
+            metaData);
     }
 
     static Rounding createRounding(Rounding.DateTimeUnit interval, ZoneId timeZone) {
@@ -167,18 +217,19 @@ public class AutoDateHistogramAggregationBuilder
     @Override
     protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
         builder.field(NUM_BUCKETS_FIELD.getPreferredName(), numBuckets);
+        builder.field(MINIMUM_INTERVAL_FIELD.getPreferredName(), minimumIntervalExpression);
         return builder;
     }
 
     @Override
     protected int innerHashCode() {
-        return Objects.hash(numBuckets);
+        return Objects.hash(numBuckets, minimumIntervalExpression);
     }
 
     @Override
     protected boolean innerEquals(Object obj) {
         AutoDateHistogramAggregationBuilder other = (AutoDateHistogramAggregationBuilder) obj;
-        return Objects.equals(numBuckets, other.numBuckets);
+        return Objects.equals(numBuckets, other.numBuckets) && Objects.equals(minimumIntervalExpression, other.minimumIntervalExpression);
     }
 
     public static class RoundingInfo implements Writeable {
@@ -186,12 +237,22 @@ public class AutoDateHistogramAggregationBuilder
         final int[] innerIntervals;
         final long roughEstimateDurationMillis;
         final String unitAbbreviation;
-
-        public RoundingInfo(Rounding rounding, long roughEstimateDurationMillis, String unitAbbreviation, int... innerIntervals) {
-            this.rounding = rounding;
+        final String dateTimeUnit;
+
+        public RoundingInfo(Rounding.DateTimeUnit dateTimeUnit,
+                            ZoneId timeZone,
+                            long roughEstimateDurationMillis,
+                            String unitAbbreviation,
+                            int... innerIntervals) {
+            this.rounding = createRounding(dateTimeUnit, timeZone);
             this.roughEstimateDurationMillis = roughEstimateDurationMillis;
             this.unitAbbreviation = unitAbbreviation;
             this.innerIntervals = innerIntervals;
+            Objects.requireNonNull(dateTimeUnit, "dateTimeUnit cannot be null");
+            if (!ALLOWED_INTERVALS.containsKey(dateTimeUnit)) {
+                throw new IllegalArgumentException("dateTimeUnit must be one of " + ALLOWED_INTERVALS.keySet().toString());
+            }
+            this.dateTimeUnit = ALLOWED_INTERVALS.get(dateTimeUnit);
         }
 
         public RoundingInfo(StreamInput in) throws IOException {
@@ -199,6 +260,7 @@ public class AutoDateHistogramAggregationBuilder
             roughEstimateDurationMillis = in.readVLong();
             innerIntervals = in.readIntArray();
             unitAbbreviation = in.readString();
+            dateTimeUnit = in.readString();
         }
 
         @Override
@@ -207,19 +269,22 @@ public class AutoDateHistogramAggregationBuilder
             out.writeVLong(roughEstimateDurationMillis);
             out.writeIntArray(innerIntervals);
             out.writeString(unitAbbreviation);
+            out.writeString(dateTimeUnit);
         }
 
         public int getMaximumInnerInterval() {
             return innerIntervals[innerIntervals.length - 1];
         }
 
+        public String getDateTimeUnit() { return this.dateTimeUnit; }
+
         public long getRoughEstimateDurationMillis() {
             return roughEstimateDurationMillis;
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(rounding, Arrays.hashCode(innerIntervals));
+            return Objects.hash(rounding, Arrays.hashCode(innerIntervals), dateTimeUnit);
         }
 
         @Override
@@ -231,8 +296,10 @@ public class AutoDateHistogramAggregationBuilder
                 return false;
             }
             RoundingInfo other = (RoundingInfo) obj;
-            return Objects.equals(rounding, other.rounding) &&
-                    Objects.deepEquals(innerIntervals, other.innerIntervals);
+            return Objects.equals(rounding, other.rounding)
+                && Objects.deepEquals(innerIntervals, other.innerIntervals)
+                && Objects.equals(dateTimeUnit, other.dateTimeUnit)
+                ;
         }
     }
 }

+ 8 - 3
server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorFactory.java

@@ -40,9 +40,14 @@ public final class AutoDateHistogramAggregatorFactory
     private final int numBuckets;
     private RoundingInfo[] roundingInfos;
 
-    public AutoDateHistogramAggregatorFactory(String name, ValuesSourceConfig<Numeric> config, int numBuckets, RoundingInfo[] roundingInfos,
-            SearchContext context, AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder,
-            Map<String, Object> metaData) throws IOException {
+    public AutoDateHistogramAggregatorFactory(String name,
+                                              ValuesSourceConfig<Numeric> config,
+                                              int numBuckets,
+                                              RoundingInfo[] roundingInfos,
+                                              SearchContext context,
+                                              AggregatorFactory<?> parent,
+                                              AggregatorFactories.Builder subFactoriesBuilder,
+                                              Map<String, Object> metaData) throws IOException {
         super(name, config, context, parent, subFactoriesBuilder, metaData);
         this.numBuckets = numBuckets;
         this.roundingInfos = roundingInfos;

+ 1 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/AutoDateHistogramTests.java

@@ -29,6 +29,7 @@ public class AutoDateHistogramTests extends BaseAggregationTestCase<AutoDateHist
         AutoDateHistogramAggregationBuilder builder = new AutoDateHistogramAggregationBuilder(randomAlphaOfLengthBetween(1, 10));
         builder.field(INT_FIELD_NAME);
         builder.setNumBuckets(randomIntBetween(1, 100000));
+        //TODO[PCS]: add builder pattern here
         if (randomBoolean()) {
             builder.format("###.##");
         }

+ 67 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregationBuilderTests.java

@@ -0,0 +1,67 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.histogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class AutoDateHistogramAggregationBuilderTests extends ESTestCase {
+
+    public void testInvalidInterval() {
+        AutoDateHistogramAggregationBuilder builder = new AutoDateHistogramAggregationBuilder("name");
+        IllegalArgumentException wrongIntervalName = expectThrows(IllegalArgumentException.class,
+            () -> builder.setMinimumIntervalExpression("foobar"));
+        assertTrue(wrongIntervalName.getMessage().startsWith("minimum_interval must be one of"));
+    }
+
+    public void testBuildRoundingsWithNullParameter() {
+        int expectedLength = AutoDateHistogramAggregationBuilder.ALLOWED_INTERVALS.size();
+        AutoDateHistogramAggregationBuilder.RoundingInfo[] roundings = AutoDateHistogramAggregationBuilder.buildRoundings(null, null);
+        assertThat(roundings.length, equalTo(expectedLength));
+    }
+
+    public void testBuildRoundingsWithMinIntervalOfAYear() {
+        int[] expectedYearIntervals = { 1, 5, 10, 20, 50, 100 };
+        AutoDateHistogramAggregationBuilder.RoundingInfo[] roundings = AutoDateHistogramAggregationBuilder.buildRoundings(null, "year");
+        assertThat(roundings.length, equalTo(1));
+        AutoDateHistogramAggregationBuilder.RoundingInfo year = roundings[0];
+        assertEquals(year.unitAbbreviation, "y");
+        assertEquals(year.dateTimeUnit, "year");
+        assertEquals(year.roughEstimateDurationMillis, 31536000000L);
+        assertArrayEquals(year.innerIntervals, expectedYearIntervals);
+    }
+
+    public void testRoundingsMatchAllowedIntervals() {
+        AutoDateHistogramAggregationBuilder.RoundingInfo[] roundings = AutoDateHistogramAggregationBuilder.buildRoundings(
+            null, "second");
+        Set<String> actualDateTimeUnits = Arrays.stream(roundings)
+            .map(AutoDateHistogramAggregationBuilder.RoundingInfo::getDateTimeUnit)
+            .collect(Collectors.toSet());
+        Set<String> expectedDateTimeUnits = new HashSet<>(AutoDateHistogramAggregationBuilder.ALLOWED_INTERVALS.values());
+        assertEquals(actualDateTimeUnits, expectedDateTimeUnits);
+    }
+
+}

+ 8 - 8
server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java

@@ -44,7 +44,6 @@ import java.util.TreeMap;
 import static org.elasticsearch.common.unit.TimeValue.timeValueHours;
 import static org.elasticsearch.common.unit.TimeValue.timeValueMinutes;
 import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
-import static org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.createRounding;
 import static org.hamcrest.Matchers.equalTo;
 
 public class InternalAutoDateHistogramTests extends InternalMultiBucketAggregationTestCase<InternalAutoDateHistogram> {
@@ -64,7 +63,7 @@ public class InternalAutoDateHistogramTests extends InternalMultiBucketAggregati
                                                        Map<String, Object> metaData,
                                                        InternalAggregations aggregations) {
 
-        roundingInfos = AutoDateHistogramAggregationBuilder.buildRoundings(null);
+        roundingInfos = AutoDateHistogramAggregationBuilder.buildRoundings(null, null);
         int nbBuckets = randomNumberOfBuckets();
         int targetBuckets = randomIntBetween(1, nbBuckets * 2 + 1);
         List<InternalAutoDateHistogram.Bucket> buckets = new ArrayList<>(nbBuckets);
@@ -93,12 +92,12 @@ public class InternalAutoDateHistogramTests extends InternalMultiBucketAggregati
         // Since we pass 0 as the starting index to getAppropriateRounding, we'll also use
         // an innerInterval that is quite large, such that targetBuckets * roundings[i].getMaximumInnerInterval()
         // will be larger than the estimate.
-        roundings[0] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.SECOND_OF_MINUTE, timeZone),
-            1000L, "s",1000);
-        roundings[1] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.MINUTES_OF_HOUR, timeZone),
-            60 * 1000L, "m",1, 5, 10, 30);
-        roundings[2] = new RoundingInfo(createRounding(Rounding.DateTimeUnit.HOUR_OF_DAY, timeZone),
-            60 * 60 * 1000L, "h",1, 3, 12);
+        roundings[0] = new RoundingInfo(Rounding.DateTimeUnit.SECOND_OF_MINUTE, timeZone,
+            1000L, "s", 1000);
+        roundings[1] = new RoundingInfo(Rounding.DateTimeUnit.MINUTES_OF_HOUR, timeZone,
+            60 * 1000L, "m", 1, 5, 10, 30);
+        roundings[2] = new RoundingInfo(Rounding.DateTimeUnit.HOUR_OF_DAY, timeZone,
+            60 * 60 * 1000L, "h", 1, 3, 12);
 
         OffsetDateTime timestamp = Instant.parse("2018-01-01T00:00:01.000Z").atOffset(ZoneOffset.UTC);
         // We want to pass a roundingIdx of zero, because in order to reproduce this bug, we need the function
@@ -109,6 +108,7 @@ public class InternalAutoDateHistogramTests extends InternalMultiBucketAggregati
         assertThat(result, equalTo(2));
     }
 
+
     @Override
     @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/39497")
     // TODO: When resolving the above AwaitsFix, just delete this override. Method is only overriden to apply the annotation.

+ 2 - 1
server/src/test/java/org/elasticsearch/search/aggregations/pipeline/PipelineAggregationHelperTests.java

@@ -172,7 +172,8 @@ public class PipelineAggregationHelperTests extends ESTestCase {
             case 2:
             default:
                 AutoDateHistogramAggregationBuilder.RoundingInfo[] roundings = new AutoDateHistogramAggregationBuilder.RoundingInfo[1];
-                factory = new AutoDateHistogramAggregatorFactory("name", mock(ValuesSourceConfig.class), 1, roundings,
+                factory = new AutoDateHistogramAggregatorFactory("name", mock(ValuesSourceConfig.class),
+                    1, roundings,
                     mock(SearchContext.class), null, new AggregatorFactories.Builder(), Collections.emptyMap());
         }