Browse Source

SQL: Use calendar_interval of 1d for HISTOGRAMs with 1 DAY intervals (#52749)

Andrei Stefan 5 years ago
parent
commit
556f5fa33b

+ 2 - 2
docs/reference/sql/functions/grouping.asciidoc

@@ -87,8 +87,8 @@ actually used will be `INTERVAL '2' DAY`. If the interval specified is less than
 
 [IMPORTANT]
 All intervals specified for a date/time HISTOGRAM will use a <<search-aggregations-bucket-datehistogram-aggregation,fixed interval>>
-in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR` AND `INTERVAL '1' MONTH`  where a calendar interval is used.
-The choice for a calendar interval was made for having a more intuitive result for YEAR and MONTH groupings. In the case of YEAR, for example, the calendar intervals consider a one year
+in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR`, `INTERVAL '1' MONTH` and `INTERVAL '1' DAY`  where a calendar interval is used.
+The choice for a calendar interval was made for having a more intuitive result for YEAR, MONTH and DAY groupings. In the case of YEAR, for example, the calendar intervals consider a one year
 bucket as the one starting on January 1st that specific year, whereas a fixed interval one-year-bucket considers one year as a number
 of milliseconds (for example, `31536000000ms` corresponding to 365 days, 24 hours per day, 60 minutes per hour etc.). With fixed intervals,
 the day of February 5th, 2019 for example, belongs to a bucket that starts on December 20th, 2018 and {es} (and implicitly {es-sql}) would

+ 24 - 0
x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

@@ -531,6 +531,30 @@ null                    |10             |null
 1953-11-01T00:00:00.000Z|1              |1953-11-07T00:00:00.000Z
 ;
 
+histogramOneDay
+schema::h:ts|c:l|birth_date:ts
+SELECT HISTOGRAM(birth_date, INTERVAL 1 DAY) AS h, COUNT(*) as c, birth_date FROM test_emp WHERE YEAR(birth_date) BETWEEN 1959 AND 1960 GROUP BY h, birth_date ORDER BY h ASC;
+
+           h            |       c       |       birth_date       
+------------------------+---------------+------------------------
+1959-01-27T00:00:00.000Z|1              |1959-01-27T00:00:00.000Z
+1959-04-07T00:00:00.000Z|1              |1959-04-07T00:00:00.000Z
+1959-07-23T00:00:00.000Z|2              |1959-07-23T00:00:00.000Z
+1959-08-10T00:00:00.000Z|1              |1959-08-10T00:00:00.000Z
+1959-08-19T00:00:00.000Z|1              |1959-08-19T00:00:00.000Z
+1959-10-01T00:00:00.000Z|1              |1959-10-01T00:00:00.000Z
+1959-12-03T00:00:00.000Z|1              |1959-12-03T00:00:00.000Z
+1959-12-25T00:00:00.000Z|1              |1959-12-25T00:00:00.000Z
+1960-02-20T00:00:00.000Z|1              |1960-02-20T00:00:00.000Z
+1960-03-09T00:00:00.000Z|1              |1960-03-09T00:00:00.000Z
+1960-05-25T00:00:00.000Z|1              |1960-05-25T00:00:00.000Z
+1960-07-20T00:00:00.000Z|1              |1960-07-20T00:00:00.000Z
+1960-08-09T00:00:00.000Z|1              |1960-08-09T00:00:00.000Z
+1960-09-06T00:00:00.000Z|1              |1960-09-06T00:00:00.000Z
+1960-10-04T00:00:00.000Z|1              |1960-10-04T00:00:00.000Z
+1960-12-17T00:00:00.000Z|1              |1960-12-17T00:00:00.000Z
+;
+
 histogramDateTimeWithMonthOnTop
 schema::h:i|c:l
 SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;

+ 1 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java

@@ -31,6 +31,7 @@ public class Histogram extends GroupingFunction {
     private final ZoneId zoneId;
     public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString();
     public static String MONTH_INTERVAL = DateHistogramInterval.MONTH.toString();
+    public static String DAY_INTERVAL = DateHistogramInterval.DAY.toString();
 
     public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) {
         super(source, field, Collections.singletonList(interval));

+ 15 - 2
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java

@@ -41,6 +41,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.CompoundNumeric
 import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
 import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
 import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction;
+import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalDayTime;
 import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalYearMonth;
 import org.elasticsearch.xpack.sql.expression.literal.interval.Intervals;
 import org.elasticsearch.xpack.sql.plan.logical.Pivot;
@@ -80,6 +81,7 @@ import org.elasticsearch.xpack.sql.session.EmptyExecutable;
 import org.elasticsearch.xpack.sql.util.Check;
 import org.elasticsearch.xpack.sql.util.DateUtils;
 
+import java.time.Duration;
 import java.time.Period;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -90,6 +92,7 @@ import java.util.Map.Entry;
 import java.util.concurrent.atomic.AtomicReference;
 
 import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
+import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.DAY_INTERVAL;
 import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.MONTH_INTERVAL;
 import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.YEAR_INTERVAL;
 import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toAgg;
@@ -332,14 +335,24 @@ class QueryFolder extends RuleExecutor<PhysicalPlan> {
 
                                     // When the histogram is `INTERVAL '1' YEAR` or `INTERVAL '1' MONTH`, the interval used in 
                                     // the ES date_histogram will be a calendar_interval with value "1y" or "1M" respectively.
-                                    // All other intervals will be fixed_intervals expressed in ms.
                                     if (field instanceof FieldAttribute) {
                                         key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), calendarInterval, h.zoneId());
                                     } else if (field instanceof Function) {
                                         key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), calendarInterval, h.zoneId());
                                     }
                                 }
-                                // typical interval
+                                // interval of exactly 1 day
+                                else if (value instanceof IntervalDayTime 
+                                        && ((IntervalDayTime) value).interval().equals(Duration.ofDays(1))) {
+                                    // When the histogram is `INTERVAL '1' DAY` the interval used in 
+                                    // the ES date_histogram will be a calendar_interval with value "1d"
+                                    if (field instanceof FieldAttribute) {
+                                        key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), DAY_INTERVAL, h.zoneId());
+                                    } else if (field instanceof Function) {
+                                        key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), DAY_INTERVAL, h.zoneId());
+                                    }
+                                }
+                                // All other intervals will be fixed_intervals expressed in ms.
                                 else {
                                     long intervalAsMillis = Intervals.inMillis(h.interval());
 

+ 39 - 0
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

@@ -1078,6 +1078,45 @@ public class QueryTranslatorTests extends ESTestCase {
                     + "\"fixed_interval\":\"12960000000ms\",\"time_zone\":\"Z\"}}}]}}}"));
     }
 
+    public void testGroupByOneDayHistogramQueryTranslator() {
+        PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        assertEquals(1, eqe.output().size());
+        assertEquals("h", eqe.output().get(0).qualifiedName());
+        assertEquals(DATETIME, eqe.output().get(0).dataType());
+        assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
+            endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+                    + "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
+    }
+    
+    public void testGroupByMoreDaysHistogramQueryTranslator() {
+        PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL '1 5' DAY TO HOUR) AS h FROM test GROUP BY h");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        assertEquals(1, eqe.output().size());
+        assertEquals("h", eqe.output().get(0).qualifiedName());
+        assertEquals(DATETIME, eqe.output().get(0).dataType());
+        assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
+            endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+                    + "\"fixed_interval\":\"104400000ms\",\"time_zone\":\"Z\"}}}]}}}"));
+    }
+
+    public void testGroupByMoreDaysHistogram_WithFunction_QueryTranslator() {
+        PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date + INTERVAL 5 DAYS, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        assertEquals(1, eqe.output().size());
+        assertEquals("h", eqe.output().get(0).qualifiedName());
+        assertEquals(DATETIME, eqe.output().get(0).dataType());
+        assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
+                endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.add(" +
+                        "InternalSqlScriptUtils.docValue(doc,params.v0),InternalSqlScriptUtils.intervalDayTime(params.v1,params.v2))\"," +
+                        "\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"PT120H\",\"v2\":\"INTERVAL_DAY\"}}," +
+                        "\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
+                        "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
+    }
+
     public void testGroupByYearAndScalarsQueryTranslator() {
         PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
         assertEquals(EsQueryExec.class, p.getClass());