Browse Source

SQL: Fix issue with GROUP BY YEAR() (#49559)

Grouping By YEAR() is translated to a histogram aggregation, but
previously if there was a scalar function invloved (e.g.:
`YEAR(date + INTERVAL 2 YEARS)`), there was no proper script created
and the histogram was applied on a field with name: `date + INTERVAL 2 YEARS`
which doesn't make sense, and resulted in null result.

Check the underlying field of YEAR() and if it's a function call
`asScript()` to properly get the painless script on which the histogram
is applied.

Fixes: #49386
Marios Trivyzas 5 years ago
parent
commit
93c37abc94

+ 23 - 0
x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

@@ -534,6 +534,29 @@ SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY
 null           |10   
 ;
 
+histogramYearOnDateTimeWithScalars
+schema::year:i|c:l
+SELECT YEAR(CAST(birth_date + INTERVAL 5 YEARS AS DATE) + INTERVAL 20 MONTHS) AS year, COUNT(*) as c FROM test_emp GROUP BY 1;
+
+     year      |   c
+---------------+---------------
+null           |10
+1958           |2
+1959           |12
+1960           |7
+1961           |7
+1962           |4
+1963           |5
+1964           |5
+1965           |7
+1966           |9
+1967           |7
+1968           |7
+1969           |8
+1970           |6
+1971           |4
+;
+
 histogramNumericWithExpression
 schema::h:i|c:l
 SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;

+ 14 - 4
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java

@@ -283,10 +283,20 @@ final class QueryTranslator {
                     // dates are handled differently because of date histograms
                     if (exp instanceof DateTimeHistogramFunction) {
                         DateTimeHistogramFunction dthf = (DateTimeHistogramFunction) exp;
-                        if (dthf.calendarInterval() != null) {
-                            key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.calendarInterval(), dthf.zoneId());
-                        } else {
-                            key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.fixedInterval(), dthf.zoneId());
+                        Expression field = dthf.field();
+                        if (field instanceof FieldAttribute) {
+                            if (dthf.calendarInterval() != null) {
+                                key = new GroupByDateHistogram(aggId, nameOf(field), dthf.calendarInterval(), dthf.zoneId());
+                            } else {
+                                key = new GroupByDateHistogram(aggId, nameOf(field), dthf.fixedInterval(), dthf.zoneId());
+                            }
+                        } else if (field instanceof Function) {
+                            ScriptTemplate script = ((Function) field).asScript();
+                            if (dthf.calendarInterval() != null) {
+                                key = new GroupByDateHistogram(aggId, script, dthf.calendarInterval(), dthf.zoneId());
+                            } else {
+                                key = new GroupByDateHistogram(aggId, script, dthf.fixedInterval(), dthf.zoneId());
+                            }
                         }
                     }
                     // all other scalar functions become a script

+ 16 - 0
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

@@ -999,6 +999,22 @@ public class QueryTranslatorTests extends ESTestCase {
                     + "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
     }
 
+    public void testGroupByYearAndScalarsQueryTranslator() {
+        PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        assertEquals(1, eqe.output().size());
+        assertEquals("YEAR(CAST(date + INTERVAL 5 months AS DATE))", eqe.output().get(0).qualifiedName());
+        assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
+        assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
+                endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" +
+                        "InternalSqlScriptUtils.add(InternalSqlScriptUtils.docValue(doc,params.v0)," +
+                        "InternalSqlScriptUtils.intervalYearMonth(params.v1,params.v2)),params.v3)\"," +
+                        "\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"P5M\",\"v2\":\"INTERVAL_MONTH\"," +
+                        "\"v3\":\"DATE\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
+                        "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
+    }
+
     public void testGroupByHistogramWithDate() {
         LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)");
         assertTrue(p instanceof Aggregate);