Browse Source

SQL: handle MIN and MAX functions on dates in Painless scripts (#57605)

* Convert to date/datetime the result of numeric aggregations (min, max)
in Painless scripts
Andrei Stefan 5 years ago
parent
commit
f1de99e2a6

+ 14 - 2
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java

@@ -14,6 +14,7 @@ import org.elasticsearch.xpack.ql.expression.function.grouping.GroupingFunction;
 import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
 import org.elasticsearch.xpack.ql.expression.gen.script.Scripts;
 import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataTypes;
 import org.elasticsearch.xpack.ql.util.DateUtils;
 
 import java.time.OffsetTime;
@@ -108,19 +109,30 @@ public abstract class ScalarFunction extends Function {
     }
     
     protected ScriptTemplate scriptWithAggregate(AggregateFunction aggregate) {
-        String template = "{}";
+        String template = basicTemplate(aggregate);
         return new ScriptTemplate(processScript(template),
                 paramsBuilder().agg(aggregate).build(),
                 dataType());
     }
 
+    // This method isn't actually used at the moment, since there is no grouping function (ie HISTOGRAM)
+    // that currently results in a script being generated
     protected ScriptTemplate scriptWithGrouping(GroupingFunction grouping) {
-        String template = "{}";
+        String template = basicTemplate(grouping);
         return new ScriptTemplate(processScript(template),
                 paramsBuilder().grouping(grouping).build(),
                 dataType());
     }
 
+    // FIXME: this needs to be refactored to account for different datatypes in different projects (ie DATE from SQL)
+    private String basicTemplate(Function function) {
+        if (function.dataType().name().equals("DATE") || function.dataType() == DataTypes.DATETIME) {
+            return "{sql}.asDateTime({})";
+        } else {
+            return "{}";
+        }
+    }
+
     protected ScriptTemplate scriptWithField(FieldAttribute field) {
         return new ScriptTemplate(processScript(Scripts.DOC_VALUE),
                 paramsBuilder().variable(field.name()).build(),

+ 79 - 0
x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec

@@ -1188,3 +1188,82 @@ GROUP BY gender ORDER BY gender;
 17811.071545718776|1.2151168881502939E11|3.1723426960671306E8|F
 15904.093950318531|1.699198993070239E11 |2.529402043805585E8 |M
 ;
+
+
+aggWithMinOfDatesAndCastAsDate
+schema::g:s|m:date
+SELECT gender g, MIN(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() ORDER BY g;
+
+       g       |           m            
+---------------+------------------------
+null           |1953-01-23T00:00:00.000Z
+F              |1952-04-19T00:00:00.000Z
+M              |1952-02-27T00:00:00.000Z
+;
+
+aggWithMinOfDatetime
+schema::g:s|m:ts
+SELECT gender g, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender;
+
+       g       |           m            
+---------------+------------------------
+null           |1953-01-23T00:00:00.000Z
+F              |1952-04-19T00:00:00.000Z
+M              |1952-02-27T00:00:00.000Z
+;
+
+aggWithMinOfDatetimeAndDate
+schema::g:s|mc:date|m:ts
+SELECT gender g, MIN(CAST(birth_date AS DATE)) mc, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() AND MIN(birth_date) <= CURRENT_TIMESTAMP() ORDER BY g;
+
+       g       |           mc           |           m            
+---------------+------------------------+------------------------
+null           |1953-01-23T00:00:00.000Z|1953-01-23T00:00:00.000Z
+F              |1952-04-19T00:00:00.000Z|1952-04-19T00:00:00.000Z
+M              |1952-02-27T00:00:00.000Z|1952-02-27T00:00:00.000Z
+;
+
+aggWithMaxOfDatetime
+schema::g:s|m:ts
+SELECT gender g, MAX(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender;
+
+       g       |           m            
+---------------+------------------------
+null           |1963-06-07T00:00:00.000Z
+F              |1964-10-18T00:00:00.000Z
+M              |1965-01-03T00:00:00.000Z
+;
+
+aggWithMaxOfDate
+schema::g:s|m:date
+SELECT gender g, MAX(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING m < CAST('2020-01-01' AS DATE) ORDER BY gender;
+
+       g       |           m            
+---------------+------------------------
+null           |1963-06-07T00:00:00.000Z
+F              |1964-10-18T00:00:00.000Z
+M              |1965-01-03T00:00:00.000Z
+;
+
+aggWithMinMaxOfDatetime
+schema::g:s|mx:ts|mn:ts
+SELECT gender g, MAX(birth_date) mx, MIN(birth_date) mn FROM test_emp GROUP BY gender HAVING mn < NOW() AND mx > CAST('1950-01-01' AS DATE) ORDER BY gender;
+
+       g       |           mx           |           mn           
+---------------+------------------------+------------------------
+null           |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z
+F              |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z
+M              |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z
+;
+
+aggWithMinMaxOfDate
+schema::g:s|mx:date|mn:date
+SELECT gender g, MAX(CAST(birth_date AS DATE)) mx, MIN(CAST(birth_date AS DATE)) mn FROM test_emp GROUP BY gender HAVING mn < CAST('2020-01-01' AS DATE) OR mx < CAST('1980-01-01T12:00:00' AS DATETIME) ORDER BY gender;
+
+       g       |           mx           |           mn           
+---------------+------------------------+------------------------
+null           |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z
+F              |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z
+M              |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z
+;
+

+ 37 - 2
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

@@ -912,8 +912,8 @@ public class QueryTranslatorTests extends ESTestCase {
                 "\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":" +
                 "{\"bucket_selector\":{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"" +
                 "InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(InternalSqlScriptUtils.coalesce(" +
-                "[params.a0]),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\",\"params\":" +
-                "{\"v0\":\"2020-01-01T00:00:00.000Z\"}}"));
+                "[InternalSqlScriptUtils.asDateTime(params.a0)]),InternalSqlScriptUtils.asDateTime(params.v0)))\"," +
+                "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-01-01T00:00:00.000Z\"}}"));
         assertTrue(esQExec.queryContainer().query() instanceof ScriptQuery);
         ScriptQuery sq = (ScriptQuery) esQExec.queryContainer().query();
         assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(" +
@@ -2124,4 +2124,39 @@ public class QueryTranslatorTests extends ESTestCase {
             }
         }
     }
+
+    public void testScriptsInsideAggregateFunctions_WithDatetimeField() {
+        PhysicalPlan p = optimizeAndPlan("SELECT MAX(date) FROM test HAVING MAX(date) > CAST('2020-05-03T12:34:56.000Z' AS DATETIME)");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder();
+        assertEquals(1, aggBuilder.getSubAggregations().size());
+        assertEquals(1, aggBuilder.getPipelineAggregations().size());
+        String aggName = aggBuilder.getSubAggregations().iterator().next().getName();
+        String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName();
+        assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString(
+            "\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":{\"bucket_selector\":"
+            + "{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter("
+            + "InternalQlScriptUtils.gt(InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\","
+            + "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-05-03T12:34:56.000Z\"}},\"gap_policy\":\"skip\"}}}}}}"));
+    }
+
+    public void testScriptsInsideAggregateFunctions_WithDateField_AndExtendedStats() {
+        PhysicalPlan p = optimizeAndPlan("SELECT MIN(CAST(date AS DATE)), MAX(CAST(date AS DATE)) FROM test HAVING "
+            + "MIN(CAST(date AS DATE)) > CAST('2020-05-03T12:34:56.000Z' AS DATE)");
+        assertEquals(EsQueryExec.class, p.getClass());
+        EsQueryExec eqe = (EsQueryExec) p;
+        AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder();
+        assertEquals(1, aggBuilder.getSubAggregations().size());
+        assertEquals(1, aggBuilder.getPipelineAggregations().size());
+        String aggName = aggBuilder.getSubAggregations().iterator().next().getName();
+        String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName();
+        assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString(
+            "\"aggregations\":{\"" + aggName + "\":{\"stats\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast("
+            + "InternalQlScriptUtils.docValue(doc,params.v0),params.v1)\",\"lang\":\"painless\",\"params\":"
+            + "{\"v0\":\"date\",\"v1\":\"DATE\"}}}},\"" + havingName + "\":{\"bucket_selector\":{\"buckets_path\":"
+            + "{\"a0\":\"" + aggName + ".min\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt("
+            + "InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\","
+            + "\"params\":{\"v0\":\"2020-05-03T00:00:00.000Z\"}},\"gap_policy\":\"skip\"}}}}}}"));
+    }
 }