Browse Source

Esql bucket function for date nanos (#118474) (#118670)

This adds support for running the bucket function over a date nanos field. Code wise, this just delegates to DateTrunc, which already supports date nanos, so most of the PR is just tests and the auto-generated docs.

Resolves #118031
Mark Tozzi 10 months ago
parent
commit
9166cd8d37

+ 6 - 0
docs/changelog/118474.yaml

@@ -0,0 +1,6 @@
+pr: 118474
+summary: Esql bucket function for date nanos
+area: ES|QL
+type: enhancement
+issues:
+ - 118031

+ 306 - 0
docs/reference/esql/functions/kibana/definition/bucket.json

@@ -310,6 +310,312 @@
       "variadic" : false,
       "returnType" : "date"
     },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "date_period",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "date",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "date",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "date",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "date",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "text",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "date",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "text",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "text",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "date",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "text",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "keyword",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        },
+        {
+          "name" : "from",
+          "type" : "text",
+          "optional" : true,
+          "description" : "Start of the range. Can be a number, a date or a date expressed as a string."
+        },
+        {
+          "name" : "to",
+          "type" : "text",
+          "optional" : true,
+          "description" : "End of the range. Can be a number, a date or a date expressed as a string."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "Numeric or date expression from which to derive buckets."
+        },
+        {
+          "name" : "buckets",
+          "type" : "time_duration",
+          "optional" : false,
+          "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
     {
       "params" : [
         {

+ 11 - 0
docs/reference/esql/functions/types/bucket.asciidoc

@@ -16,6 +16,17 @@ date | integer | text | date | date
 date | integer | text | keyword | date
 date | integer | text | text | date
 date | time_duration | | | date
+date_nanos | date_period | | | date_nanos
+date_nanos | integer | date | date | date_nanos
+date_nanos | integer | date | keyword | date_nanos
+date_nanos | integer | date | text | date_nanos
+date_nanos | integer | keyword | date | date_nanos
+date_nanos | integer | keyword | keyword | date_nanos
+date_nanos | integer | keyword | text | date_nanos
+date_nanos | integer | text | date | date_nanos
+date_nanos | integer | text | keyword | date_nanos
+date_nanos | integer | text | text | date_nanos
+date_nanos | time_duration | | | date_nanos
 double | double | | | double
 double | integer | double | double | double
 double | integer | double | integer | double

+ 74 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec

@@ -548,6 +548,80 @@ yr:date_nanos                  | mo:date_nanos                  | mn:date_nanos
 2023-01-01T00:00:00.000000000Z | 2023-10-01T00:00:00.000000000Z | 2023-10-23T12:10:00.000000000Z | 2023-10-23T12:15:03.360000000Z
 ;
 
+Bucket Date nanos by Year
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY yr = BUCKET(nanos, 1 year);
+
+ct:long | yr:date_nanos
+8       | 2023-01-01T00:00:00.000000000Z 
+;
+
+Bucket Date nanos by Year, range version
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY yr = BUCKET(nanos, 5, "1999-01-01", NOW());
+
+ct:long | yr:date_nanos
+8       | 2023-01-01T00:00:00.000000000Z 
+;
+
+Bucket Date nanos by Month
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY mo = BUCKET(nanos, 1 month);
+
+ct:long | mo:date_nanos
+8       | 2023-10-01T00:00:00.000000000Z 
+;
+
+Bucket Date nanos by Month, range version
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY mo = BUCKET(nanos, 20, "2023-01-01", "2023-12-31");
+
+ct:long | mo:date_nanos
+8       | 2023-10-01T00:00:00.000000000Z 
+;
+
+Bucket Date nanos by Week, range version
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY mo = BUCKET(nanos, 55, "2023-01-01", "2023-12-31");
+
+ct:long | mo:date_nanos
+8       | 2023-10-23T00:00:00.000000000Z 
+;
+Bucket Date nanos by 10 minutes
+required_capability: date_trunc_date_nanos
+required_capability: date_nanos_bucket
+
+FROM date_nanos 
+| WHERE millis > "2020-01-01" 
+| STATS ct = count(*) BY mn = BUCKET(nanos, 10 minutes);
+
+ct:long | mn:date_nanos                 
+4       | 2023-10-23T13:50:00.000000000Z
+1       | 2023-10-23T13:30:00.000000000Z
+1       | 2023-10-23T12:20:00.000000000Z
+2       | 2023-10-23T12:10:00.000000000Z
+;
+
 Add date nanos
 required_capability: date_nanos_add_subtract
 

+ 5 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -357,6 +357,11 @@ public class EsqlCapabilities {
          */
         DATE_TRUNC_DATE_NANOS(),
 
+        /**
+         * Support date nanos values as the field argument to bucket
+         */
+        DATE_NANOS_BUCKET(),
+
         /**
          * support aggregations on date nanos
          */

+ 6 - 6
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java

@@ -90,7 +90,7 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
     private final Expression to;
 
     @FunctionInfo(
-        returnType = { "double", "date" },
+        returnType = { "double", "date", "date_nanos" },
         description = """
             Creates groups of values - buckets - out of a datetime or numeric input.
             The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.""",
@@ -169,7 +169,7 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
         Source source,
         @Param(
             name = "field",
-            type = { "integer", "long", "double", "date" },
+            type = { "integer", "long", "double", "date", "date_nanos" },
             description = "Numeric or date expression from which to derive buckets."
         ) Expression field,
         @Param(
@@ -241,7 +241,7 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
 
     @Override
     public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
-        if (field.dataType() == DataType.DATETIME) {
+        if (field.dataType() == DataType.DATETIME || field.dataType() == DataType.DATE_NANOS) {
             Rounding.Prepared preparedRounding;
             if (buckets.dataType().isWholeNumber()) {
                 int b = ((Number) buckets.fold()).intValue();
@@ -314,8 +314,8 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
     }
 
     // supported parameter type combinations (1st, 2nd, 3rd, 4th):
-    // datetime, integer, string/datetime, string/datetime
-    // datetime, rounding/duration, -, -
+    // datetime/date_nanos, integer, string/datetime, string/datetime
+    // datetime/date_nanos, rounding/duration, -, -
     // numeric, integer, numeric, numeric
     // numeric, numeric, -, -
     @Override
@@ -329,7 +329,7 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
             return TypeResolution.TYPE_RESOLVED;
         }
 
-        if (fieldType == DataType.DATETIME) {
+        if (fieldType == DataType.DATETIME || fieldType == DataType.DATE_NANOS) {
             TypeResolution resolution = isType(
                 buckets,
                 dt -> dt.isWholeNumber() || DataType.isTemporalAmount(dt),

+ 90 - 1
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/BucketTests.java

@@ -12,15 +12,19 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Rounding;
+import org.elasticsearch.common.time.DateUtils;
 import org.elasticsearch.index.mapper.DateFieldMapper;
+import org.elasticsearch.logging.LogManager;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
 import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
 import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
 
 import java.time.Duration;
+import java.time.Instant;
 import java.time.Period;
 import java.util.ArrayList;
 import java.util.List;
@@ -38,6 +42,7 @@ public class BucketTests extends AbstractScalarFunctionTestCase {
     public static Iterable<Object[]> parameters() {
         List<TestCaseSupplier> suppliers = new ArrayList<>();
         dateCases(suppliers, "fixed date", () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z"));
+        dateNanosCases(suppliers, "fixed date nanos", () -> DateUtils.toLong(Instant.parse("2023-02-17T09:00:00.00Z")));
         dateCasesWithSpan(
             suppliers,
             "fixed date with period",
@@ -54,6 +59,22 @@ public class BucketTests extends AbstractScalarFunctionTestCase {
             Duration.ofDays(1L),
             "[86400000 in Z][fixed]"
         );
+        dateNanosCasesWithSpan(
+            suppliers,
+            "fixed date nanos with period",
+            () -> DateUtils.toLong(Instant.parse("2023-01-01T00:00:00.00Z")),
+            DataType.DATE_PERIOD,
+            Period.ofYears(1),
+            "[YEAR_OF_CENTURY in Z][fixed to midnight]"
+        );
+        dateNanosCasesWithSpan(
+            suppliers,
+            "fixed date nanos with duration",
+            () -> DateUtils.toLong(Instant.parse("2023-02-17T09:00:00.00Z")),
+            DataType.TIME_DURATION,
+            Duration.ofDays(1L),
+            "[86400000 in Z][fixed]"
+        );
         numberCases(suppliers, "fixed long", DataType.LONG, () -> 100L);
         numberCasesWithSpan(suppliers, "fixed long with span", DataType.LONG, () -> 100L);
         numberCases(suppliers, "fixed int", DataType.INTEGER, () -> 100);
@@ -142,6 +163,62 @@ public class BucketTests extends AbstractScalarFunctionTestCase {
         }));
     }
 
+    private static void dateNanosCasesWithSpan(
+        List<TestCaseSupplier> suppliers,
+        String name,
+        LongSupplier date,
+        DataType spanType,
+        Object span,
+        String spanStr
+    ) {
+        suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, spanType), () -> {
+            List<TestCaseSupplier.TypedData> args = new ArrayList<>();
+            args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field"));
+            args.add(new TestCaseSupplier.TypedData(span, spanType, "buckets").forceLiteral());
+            return new TestCaseSupplier.TestCase(
+                args,
+                Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["),
+                DataType.DATE_NANOS,
+                resultsMatcher(args)
+            );
+        }));
+    }
+
+    private static void dateNanosCases(List<TestCaseSupplier> suppliers, String name, LongSupplier date) {
+        for (DataType fromType : DATE_BOUNDS_TYPE) {
+            for (DataType toType : DATE_BOUNDS_TYPE) {
+                suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, DataType.INTEGER, fromType, toType), () -> {
+                    List<TestCaseSupplier.TypedData> args = new ArrayList<>();
+                    args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field"));
+                    // TODO more "from" and "to" and "buckets"
+                    args.add(new TestCaseSupplier.TypedData(50, DataType.INTEGER, "buckets").forceLiteral());
+                    args.add(dateBound("from", fromType, "2023-02-01T00:00:00.00Z"));
+                    args.add(dateBound("to", toType, "2023-03-01T09:00:00.00Z"));
+                    return new TestCaseSupplier.TestCase(
+                        args,
+                        Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["),
+                        DataType.DATE_NANOS,
+                        resultsMatcher(args)
+                    );
+                }));
+                // same as above, but a low bucket count and datetime bounds that match it (at hour span)
+                suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, DataType.INTEGER, fromType, toType), () -> {
+                    List<TestCaseSupplier.TypedData> args = new ArrayList<>();
+                    args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field"));
+                    args.add(new TestCaseSupplier.TypedData(4, DataType.INTEGER, "buckets").forceLiteral());
+                    args.add(dateBound("from", fromType, "2023-02-17T09:00:00Z"));
+                    args.add(dateBound("to", toType, "2023-02-17T12:00:00Z"));
+                    return new TestCaseSupplier.TestCase(
+                        args,
+                        Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["),
+                        DataType.DATE_NANOS,
+                        equalTo(Rounding.builder(Rounding.DateTimeUnit.HOUR_OF_DAY).build().prepareForUnknown().round(date.getAsLong()))
+                    );
+                }));
+            }
+        }
+    }
+
     private static final DataType[] NUMBER_BOUNDS_TYPES = new DataType[] { DataType.INTEGER, DataType.LONG, DataType.DOUBLE };
 
     private static void numberCases(List<TestCaseSupplier> suppliers, String name, DataType numberType, Supplier<Number> number) {
@@ -221,7 +298,19 @@ public class BucketTests extends AbstractScalarFunctionTestCase {
     private static Matcher<Object> resultsMatcher(List<TestCaseSupplier.TypedData> typedData) {
         if (typedData.get(0).type() == DataType.DATETIME) {
             long millis = ((Number) typedData.get(0).data()).longValue();
-            return equalTo(Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis));
+            long expected = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis);
+            LogManager.getLogger(getTestClass()).info("Expected: " + Instant.ofEpochMilli(expected));
+            LogManager.getLogger(getTestClass()).info("Input: " + Instant.ofEpochMilli(millis));
+            return equalTo(expected);
+        }
+        if (typedData.get(0).type() == DataType.DATE_NANOS) {
+            long nanos = ((Number) typedData.get(0).data()).longValue();
+            long expected = DateUtils.toNanoSeconds(
+                Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(DateUtils.toMilliSeconds(nanos))
+            );
+            LogManager.getLogger(getTestClass()).info("Expected: " + DateUtils.toInstant(expected));
+            LogManager.getLogger(getTestClass()).info("Input: " + DateUtils.toInstant(nanos));
+            return equalTo(expected);
         }
         return equalTo(((Number) typedData.get(0).data()).doubleValue());
     }