Browse Source

ESQL: extend BUCKET with spans. Turn it into a grouping function (#107272)

This extends `BUCKET` function to accept a two-parameters-only
invocation: the first parameter remains as is, while the second is a
span. It can be a numeric (floating point) span, if the first argument
is numeric, or a date period or time duration, if the first argument is
a date.

Also, the function can now be invoked with the alias BIN.

Additionally, the function has been turned into a grouping-only function
and thus can only be used within a `STATS` command.
Bogdan Pintea 1 year ago
parent
commit
a2c2e8fe47
22 changed files with 995 additions and 508 deletions
  1. 5 0
      docs/changelog/107272.yaml
  2. 3 3
      docs/reference/esql/esql-get-started.asciidoc
  3. 11 11
      docs/reference/esql/functions/bucket.asciidoc
  4. 1 1
      docs/reference/esql/functions/description/bucket.asciidoc
  5. 144 54
      docs/reference/esql/functions/kibana/definition/bucket.json
  6. 2 2
      docs/reference/esql/functions/kibana/docs/bucket.md
  7. 475 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
  8. 0 276
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec
  9. 0 22
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec
  10. 0 50
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/ints.csv-spec
  11. 10 6
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec
  12. 0 14
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/unsigned_long.csv-spec
  13. 39 6
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java
  14. 3 2
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
  15. 99 45
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
  16. 33 0
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java
  17. 13 5
      x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java
  18. 68 0
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
  19. 4 6
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java
  20. 76 1
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/BucketTests.java
  21. 4 4
      x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
  22. 5 0
      x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/Expression.java

+ 5 - 0
docs/changelog/107272.yaml

@@ -0,0 +1,5 @@
+pr: 107272
+summary: "ESQL: extend BUCKET with spans"
+area: ES|QL
+type: enhancement
+issues: []

+ 3 - 3
docs/reference/esql/esql-get-started.asciidoc

@@ -248,7 +248,7 @@ For example, to create hourly buckets for the data on October 23rd:
 
 [source,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=gs-bucket]
+include::{esql-specs}/bucket.csv-spec[tag=gs-bucket]
 ----
 
 Combine `BUCKET` with <<esql-stats-by>> to create a histogram. For example,
@@ -256,14 +256,14 @@ to count the number of events per hour:
 
 [source,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=gs-bucket-stats-by]
+include::{esql-specs}/bucket.csv-spec[tag=gs-bucket-stats-by]
 ----
 
 Or the median duration per hour:
 
 [source,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=gs-bucket-stats-by-median]
+include::{esql-specs}/bucket.csv-spec[tag=gs-bucket-stats-by-median]
 ----
 
 [discrete]

+ 11 - 11
docs/reference/esql/functions/bucket.asciidoc

@@ -35,11 +35,11 @@ in monthly buckets:
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=docsBucketMonth]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonth]
 ----
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-include::{esql-specs}/date.csv-spec[tag=docsBucketMonth-result]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonth-result]
 |===
 
 The goal isn't to provide *exactly* the target number of buckets, it's to pick a
@@ -51,11 +51,11 @@ Combine `BUCKET` with
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=docsBucketMonthlyHistogram]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram]
 ----
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-include::{esql-specs}/date.csv-spec[tag=docsBucketMonthlyHistogram-result]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram-result]
 |===
 
 NOTE: `BUCKET` does not create buckets that don't match any documents.
@@ -66,11 +66,11 @@ at most 100 buckets in a year results in weekly buckets:
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=docsBucketWeeklyHistogram]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogram]
 ----
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-include::{esql-specs}/date.csv-spec[tag=docsBucketWeeklyHistogram-result]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogram-result]
 |===
 
 NOTE: `BUCKET` does not filter any rows. It only uses the provided range to
@@ -83,11 +83,11 @@ salary histogram:
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/ints.csv-spec[tag=docsBucketNumeric]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumeric]
 ----
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-include::{esql-specs}/ints.csv-spec[tag=docsBucketNumeric-result]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumeric-result]
 |===
 
 Unlike the earlier example that intentionally filters on a date range, you
@@ -102,7 +102,7 @@ per hour:
 
 [source.styled,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=docsBucketLast24hr]
+include::{esql-specs}/bucket.csv-spec[tag=docsBucketLast24hr]
 ----
 
 Create monthly buckets for the year 1985, and calculate the average salary by
@@ -110,9 +110,9 @@ hiring month:
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/date.csv-spec[tag=bucket_in_agg]
+include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg]
 ----
 [%header.monospaced.styled,format=dsv,separator=|]
 |===
-include::{esql-specs}/date.csv-spec[tag=bucket_in_agg-result]
+include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg-result]
 |===

+ 1 - 1
docs/reference/esql/functions/description/bucket.asciidoc

@@ -2,4 +2,4 @@
 
 *Description*
 
-Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into.
+Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.

+ 144 - 54
docs/reference/esql/functions/kibana/definition/bucket.json

@@ -2,8 +2,26 @@
   "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
   "type" : "eval",
   "name" : "bucket",
-  "description" : "Creates human-friendly buckets and returns a datetime value\nfor each row that corresponds to the resulting bucket the row falls into.",
+  "description" : "Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either\nbe provided directly, or chosen based on a recommended count and values range.",
   "signatures" : [
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "datetime",
+          "optional" : false,
+          "description" : ""
+        },
+        {
+          "name" : "buckets",
+          "type" : "date_period",
+          "optional" : false,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "datetime"
+    },
     {
       "params" : [
         {
@@ -21,12 +39,30 @@
         {
           "name" : "from",
           "type" : "datetime",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "datetime",
+          "optional" : true,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "datetime"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "datetime",
+          "optional" : false,
+          "description" : ""
+        },
+        {
+          "name" : "buckets",
+          "type" : "time_duration",
           "optional" : false,
           "description" : ""
         }
@@ -34,6 +70,24 @@
       "variadic" : false,
       "returnType" : "datetime"
     },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "double",
+          "optional" : false,
+          "description" : ""
+        },
+        {
+          "name" : "buckets",
+          "type" : "double",
+          "optional" : false,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "double"
+    },
     {
       "params" : [
         {
@@ -51,13 +105,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -81,13 +135,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -111,13 +165,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -141,13 +195,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -171,13 +225,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -201,13 +255,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -231,13 +285,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -261,13 +315,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -291,12 +345,30 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
+          "optional" : true,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "double"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "integer",
+          "optional" : false,
+          "description" : ""
+        },
+        {
+          "name" : "buckets",
+          "type" : "double",
           "optional" : false,
           "description" : ""
         }
@@ -321,13 +393,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -351,13 +423,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -381,13 +453,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -411,13 +483,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -441,13 +513,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -471,13 +543,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -501,13 +573,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -531,13 +603,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -561,12 +633,30 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
+          "optional" : true,
+          "description" : ""
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "double"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "long",
+          "optional" : false,
+          "description" : ""
+        },
+        {
+          "name" : "buckets",
+          "type" : "double",
           "optional" : false,
           "description" : ""
         }
@@ -591,13 +681,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -621,13 +711,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -651,13 +741,13 @@
         {
           "name" : "from",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -681,13 +771,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -711,13 +801,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -741,13 +831,13 @@
         {
           "name" : "from",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -771,13 +861,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "double",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -801,13 +891,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "integer",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],
@@ -831,13 +921,13 @@
         {
           "name" : "from",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         },
         {
           "name" : "to",
           "type" : "long",
-          "optional" : false,
+          "optional" : true,
           "description" : ""
         }
       ],

+ 2 - 2
docs/reference/esql/functions/kibana/docs/bucket.md

@@ -3,6 +3,6 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
 -->
 
 ### BUCKET
-Creates human-friendly buckets and returns a datetime value
-for each row that corresponds to the resulting bucket the row falls into.
+Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either
+be provided directly, or chosen based on a recommended count and values range.
 

+ 475 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec

@@ -0,0 +1,475 @@
+// BUCKET-specific tests
+
+//
+// Date bucketing
+//
+
+bucketSimpleMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::bucket_month[]
+ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
+| STATS date=VALUES(date) BY bucket=BUCKET(date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+// end::bucket_month[]
+;
+
+// tag::bucket_month-result[]
+           date:datetime | bucket:datetime
+1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
+// end::bucket_month-result[]
+;
+
+bucketSimpleWeek#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::bucket_week[]
+ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
+| STATS date=VALUES(date) BY bucket=BUCKET(date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+// end::bucket_week[]
+;
+
+// tag::bucket_week-result[]
+           date:datetime | bucket:datetime
+1985-07-09T00:00:00.000Z | 1985-07-08T00:00:00.000Z
+// end::bucket_week-result[]
+;
+
+bucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hd = MV_SORT(VALUES(hire_date)) BY b = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| SORT hd
+;
+
+                                      hd:date                                 |           b:date
+[1985-02-18T00:00:00.000Z, 1985-02-24T00:00:00.000Z]                          |1985-02-01T00:00:00.000Z
+1985-05-13T00:00:00.000Z                                                      |1985-05-01T00:00:00.000Z
+1985-07-09T00:00:00.000Z                                                      |1985-07-01T00:00:00.000Z
+1985-09-17T00:00:00.000Z                                                      |1985-09-01T00:00:00.000Z
+[1985-10-14T00:00:00.000Z, 1985-10-20T00:00:00.000Z]                          |1985-10-01T00:00:00.000Z
+[1985-11-19T00:00:00.000Z, 1985-11-20T00:00:00.000Z, 1985-11-21T00:00:00.000Z]|1985-11-01T00:00:00.000Z
+;
+
+bucketWeek#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+from employees
+| where hire_date >= "1985-01-01T00:00:00Z" and hire_date < "1986-01-01T00:00:00Z"
+| stats hire_date = mv_sort(values(hire_date)) by hd = bucket(hire_date, 55, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| sort hire_date
+;
+
+hire_date:date                                                                | hd:date
+[1985-02-18T00:00:00.000Z, 1985-02-24T00:00:00.000Z]                          |1985-02-18T00:00:00.000Z
+1985-05-13T00:00:00.000Z                                                      |1985-05-13T00:00:00.000Z
+1985-07-09T00:00:00.000Z                                                      |1985-07-08T00:00:00.000Z
+1985-09-17T00:00:00.000Z                                                      |1985-09-16T00:00:00.000Z
+[1985-10-14T00:00:00.000Z, 1985-10-20T00:00:00.000Z]                          |1985-10-14T00:00:00.000Z
+[1985-11-19T00:00:00.000Z, 1985-11-20T00:00:00.000Z, 1985-11-21T00:00:00.000Z]|1985-11-18T00:00:00.000Z
+;
+
+bucketYearInAgg#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1999-01-01T00:00:00Z"
+| STATS COUNT(*) by bucket = BUCKET(hire_date, 5, "1999-01-01T00:00:00Z", NOW())
+| sort bucket;
+
+COUNT(*):long            | bucket:date
+1                        | 1999-01-01T00:00:00.000Z
+;
+
+bucketYearInAggConstRefsString#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1999-01-01T00:00:00Z"
+| EVAL bucket_start = "1999-01-01T00:00:00Z"
+| EVAL bucket_end = NOW()
+| STATS COUNT(*) BY bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
+| SORT bucket
+;
+
+COUNT(*):long            | bucket:date
+1                        | 1999-01-01T00:00:00.000Z
+;
+
+bucketYearInAggConstRefsConcat#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1999-01-01T00:00:00Z"
+| EVAL bucket_start = CONCAT("1999-01-01", "T", "00:00:00Z")
+| EVAL bucket_end = NOW()
+| STATS COUNT(*) by bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
+| sort bucket
+;
+
+COUNT(*):long            | bucket:date
+1                        | 1999-01-01T00:00:00.000Z
+;
+
+bucketYearInAggConstRefsDate#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1999-01-01T00:00:00Z"
+| EVAL bucket_start = TO_DATETIME("1999-01-01T00:00:00.000Z")
+| EVAL bucket_end = NOW()
+| STATS COUNT(*) BY bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
+| SORT bucket
+;
+
+COUNT(*):long            | bucket:date
+1                        | 1999-01-01T00:00:00.000Z
+;
+
+bucketYearInAggConstRefsRename#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1999-01-01T00:00:00Z"
+| EVAL bucket_start = "1999-01-01T00:00:00Z"
+| EVAL bucket_end = NOW()
+| RENAME bucket_end as be, bucket_start as bs
+| STATS c = COUNT(*) by BUCKET(hire_date, 5, bs, be)
+| SORT c
+;
+
+c:long            | BUCKET(hire_date, 5, bs, be):date
+1                 | 1999-01-01T00:00:00.000Z
+;
+
+bucketMonthInAgg#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::bucket_in_agg[]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| SORT bucket
+// end::bucket_in_agg[]
+;
+
+// tag::bucket_in_agg-result[]
+AVG(salary):double | bucket:date
+          46305.0  | 1985-02-01T00:00:00.000Z
+          44817.0  | 1985-05-01T00:00:00.000Z
+          62405.0  | 1985-07-01T00:00:00.000Z
+          49095.0  | 1985-09-01T00:00:00.000Z
+          51532.0  | 1985-10-01T00:00:00.000Z
+          54539.75 | 1985-11-01T00:00:00.000Z
+// end::bucket_in_agg-result[]
+;
+
+docsBucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketMonth[]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| SORT hire_date
+//end::docsBucketMonth[]
+;
+
+//tag::docsBucketMonth-result[]
+   hire_date:date                                                             |    month:date
+[1985-02-18T00:00:00.000Z, 1985-02-24T00:00:00.000Z]                          |1985-02-01T00:00:00.000Z
+1985-05-13T00:00:00.000Z                                                      |1985-05-01T00:00:00.000Z
+1985-07-09T00:00:00.000Z                                                      |1985-07-01T00:00:00.000Z
+1985-09-17T00:00:00.000Z                                                      |1985-09-01T00:00:00.000Z
+[1985-10-14T00:00:00.000Z, 1985-10-20T00:00:00.000Z]                          |1985-10-01T00:00:00.000Z
+[1985-11-19T00:00:00.000Z, 1985-11-20T00:00:00.000Z, 1985-11-21T00:00:00.000Z]|1985-11-01T00:00:00.000Z
+//end::docsBucketMonth-result[]
+;
+
+docsBucketMonthlyHistogram#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketMonthlyHistogram[]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| SORT month
+//end::docsBucketMonthlyHistogram[]
+;
+
+//tag::docsBucketMonthlyHistogram-result[]
+  hires_per_month:long    |    month:date
+2              |1985-02-01T00:00:00.000Z
+1              |1985-05-01T00:00:00.000Z
+1              |1985-07-01T00:00:00.000Z
+1              |1985-09-01T00:00:00.000Z
+2              |1985-10-01T00:00:00.000Z
+4              |1985-11-01T00:00:00.000Z
+//end::docsBucketMonthlyHistogram-result[]
+;
+
+docsBucketWeeklyHistogram#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketWeeklyHistogram[]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| SORT week
+//end::docsBucketWeeklyHistogram[]
+;
+
+//tag::docsBucketWeeklyHistogram-result[]
+  hires_per_week:long    |    week:date
+2              |1985-02-18T00:00:00.000Z
+1              |1985-05-13T00:00:00.000Z
+1              |1985-07-08T00:00:00.000Z
+1              |1985-09-16T00:00:00.000Z
+2              |1985-10-14T00:00:00.000Z
+4              |1985-11-18T00:00:00.000Z
+//end::docsBucketWeeklyHistogram-result[]
+;
+
+docsBucketLast24hr#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketLast24hr[]
+FROM sample_data 
+| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
+| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
+//end::docsBucketLast24hr[]
+;
+
+   COUNT(*):long    |    bucket:date
+;
+
+docsGettingStartedBucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::gs-bucket[]
+FROM sample_data
+| STATS BY bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
+// end::gs-bucket[]
+| LIMIT 0
+;
+
+bucket:date
+;
+
+docsGettingStartedBucketStatsBy#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::gs-bucket-stats-by[]
+FROM sample_data
+| STATS c = COUNT(*) BY bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+// end::gs-bucket-stats-by[]
+| SORT bucket
+;
+
+ c:long        | bucket:date
+2              |2023-10-23T12:00:00.000Z
+5              |2023-10-23T13:00:00.000Z
+;
+
+docsGettingStartedBucketStatsByMedian#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::gs-bucket-stats-by-median[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| STATS median_duration = MEDIAN(event_duration) BY bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+// end::gs-bucket-stats-by-median[]
+| SORT bucket
+;
+
+median_duration:double | bucket:date
+3107561.0              |2023-10-23T12:00:00.000Z
+1756467.0              |2023-10-23T13:00:00.000Z
+;
+
+bucketByTimeDuration#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM sample_data
+| STATS min = MAX(@timestamp), max = MAX(@timestamp) BY bucket = BUCKET(@timestamp, 30 minutes)
+| SORT min
+;
+
+          min:date      |          max:date      |         bucket:date
+2023-10-23T12:27:28.948Z|2023-10-23T12:27:28.948Z|2023-10-23T12:00:00.000Z
+2023-10-23T13:55:01.543Z|2023-10-23T13:55:01.543Z|2023-10-23T13:30:00.000Z
+
+;
+
+aggByTimeDuratinBucket#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM sample_data
+| STATS c = COUNT(1) BY b = BUCKET(@timestamp, 30 minutes)
+| SORT c
+;
+
+       c:long  |           b:date
+2              |2023-10-23T12:00:00.000Z
+5              |2023-10-23T13:30:00.000Z
+;
+
+bucketByDatePeriod#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM sample_data
+| EVAL adjusted = CASE(TO_LONG(@timestamp) % 2 == 0, @timestamp + 1 month, @timestamp + 2 years)
+| STATS c = COUNT(*) BY b = BUCKET(adjusted, 1 month)
+| SORT c
+;
+
+       c:long  |           b:date            
+3              |2025-10-01T00:00:00.000Z
+4              |2023-11-01T00:00:00.000Z
+
+;
+
+aggByDatePeriodBucket#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM sample_data
+| EVAL adjusted = CASE(TO_LONG(@timestamp) % 2 == 0, @timestamp + 1 month, @timestamp + 2 years)
+| STATS c = COUNT(1) BY b = BUCKET(adjusted, 1 month)
+| SORT c DESC
+;
+
+       c:long  |           b:date
+4              |2023-11-01T00:00:00.000Z
+3              |2025-10-01T00:00:00.000Z
+;
+
+//
+// Numeric bucketing
+//
+
+bucketNumeric#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS c = COUNT(*) BY b = BUCKET(salary, 20, 25324, 74999)
+| SORT b
+;
+
+ c:long        | b:double       
+1              |25000.0
+1              |30000.0
+1              |40000.0
+2              |45000.0
+2              |50000.0
+1              |55000.0
+1              |60000.0
+1              |65000.0
+1              |70000.0
+;
+
+docsBucketNumeric#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+//tag::docsBucketNumeric[]
+FROM employees
+| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)
+| SORT bs
+//end::docsBucketNumeric[]
+;
+
+//tag::docsBucketNumeric-result[]
+   COUNT(*):long    |    bs:double
+9              |25000.0
+9              |30000.0
+18             |35000.0
+11             |40000.0
+11             |45000.0
+10             |50000.0
+7              |55000.0
+9              |60000.0
+8              |65000.0
+8              |70000.0
+//end::docsBucketNumeric-result[]
+;
+
+// bucketing in span mode (identical results to above)
+bucketNumericWithSpan#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
+| SORT b
+;
+
+       c:long  |       b:double
+1              |25000.0
+1              |30000.0
+1              |40000.0
+2              |45000.0
+2              |50000.0
+1              |55000.0
+1              |60000.0
+1              |65000.0
+1              |70000.0
+;
+
+bucketNumericMixedTypes#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+ROW long = TO_LONG(100), double = 99., int = 100
+| STATS BY b1 = BUCKET(long, 99.), b2 = BUCKET(double, 100.), b3 = BUCKET(int, 49.5)
+;
+
+      b1:double|      b2:double|      b3:double
+99.0           |0.0            |99.0
+;
+
+bucketWithFloats#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS hire_date = MV_SORT(VALUES(hire_date)) BY  bh = ROUND(BUCKET(height, 20, 1.41, 2.10), 2)
+| SORT hire_date, bh
+;
+
+                     hire_date:date                 |      bh:double
+1985-02-18T00:00:00.000Z                            |1.85
+[1985-02-24T00:00:00.000Z, 1985-05-13T00:00:00.000Z]|2.0
+[1985-07-09T00:00:00.000Z, 1985-11-19T00:00:00.000Z]|1.8
+1985-09-17T00:00:00.000Z                            |1.4
+1985-10-14T00:00:00.000Z                            |1.75
+[1985-10-20T00:00:00.000Z, 1985-11-20T00:00:00.000Z]|1.9
+1985-11-20T00:00:00.000Z                            |1.95
+1985-11-21T00:00:00.000Z                            |2.05
+;
+
+bucketWithUnsignedLong#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM ul_logs
+| WHERE @timestamp >= "2017-11-10T20:30:00Z" AND @timestamp < "2017-11-10T20:35:00Z"
+| STATS ts = VALUES(@timestamp) BY bh = bucket(bytes_in, 20, 5480608687137202404, 17764691215469285192)
+| SORT ts
+;
+
+           ts:date      |      bh:double
+2017-11-10T20:32:57.000Z|8.0E18
+2017-11-10T20:33:06.000Z|5.0E18
+2017-11-10T20:34:43.000Z|1.75E19
+;
+
+bucketMultipleAndExpressions#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| STATS sumK = (b1k + b2k) / 1000 BY b1k = BUCKET(salary, 1000.), b2k = BUCKET(salary, 2000.)
+| SORT sumK
+| LIMIT 4
+;
+
+ sumK:double   | b1k:double    | b2k:double      
+49.0           |25000.0        |24000.0
+52.0           |26000.0        |26000.0
+53.0           |27000.0        |26000.0
+56.0           |28000.0        |28000.0
+;
+
+//
+// BIN copies
+//
+
+docsGettingStartedBin#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM sample_data
+| STATS BY bin = BIN(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
+| LIMIT 0
+;
+
+bin:date
+;
+
+aggByTimeDuratinBin#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+FROM sample_data
+| STATS c = COUNT(1) BY b = BIN(@timestamp, 30 minutes)
+| SORT c
+;
+
+       c:long  |           b:date
+2              |2023-10-23T12:00:00.000Z
+5              |2023-10-23T13:30:00.000Z
+;
+
+binNumeric#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| STATS COUNT(*) by bin = BIN(salary, 20, 25324, 74999)
+| SORT bin
+;
+
+   COUNT(*):long    |    bin:double
+9              |25000.0
+9              |30000.0
+18             |35000.0
+11             |40000.0
+11             |45000.0
+10             |50000.0
+7              |55000.0
+9              |60000.0
+8              |65000.0
+8              |70000.0
+;
+
+binNumericMixedTypes#[skip:-8.13.99, reason:BUCKET extended in 8.14]
+ROW long = TO_LONG(100), double = 99., int = 100
+| STATS BY b1 = BIN(long, 99.), b2 = BIN(double, 100.), b3 = BIN(int, 49.5)
+;
+
+      b1:double|      b2:double|      b3:double
+99.0           |0.0            |99.0
+;

+ 0 - 276
x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec

@@ -261,74 +261,6 @@ int:integer |dt:date
 // end::to_datetime-int-result[]
 ;
 
-bucketSimpleMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::bucket_month[]
-ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
-| EVAL bucket=BUCKET(date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-// end::bucket_month[]
-;
-
-// tag::bucket_month-result[]
-           date:datetime | bucket:datetime
-1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
-// end::bucket_month-result[]
-;
-
-bucketSimpleWeek#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::bucket_week[]
-ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
-| EVAL bucket=BUCKET(date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-// end::bucket_week[]
-;
-
-// tag::bucket_week-result[]
-           date:datetime | bucket:datetime
-1985-07-09T00:00:00.000Z | 1985-07-08T00:00:00.000Z
-// end::bucket_week-result[]
-;
-
-bucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-from employees
-| where hire_date >= "1985-01-01T00:00:00Z" and hire_date < "1986-01-01T00:00:00Z"
-| eval hd = bucket(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| sort hire_date
-| keep hire_date, hd;
-
-hire_date:date           | hd:date
-1985-02-18T00:00:00.000Z | 1985-02-01T00:00:00.000Z
-1985-02-24T00:00:00.000Z | 1985-02-01T00:00:00.000Z
-1985-05-13T00:00:00.000Z | 1985-05-01T00:00:00.000Z
-1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
-1985-09-17T00:00:00.000Z | 1985-09-01T00:00:00.000Z
-1985-10-14T00:00:00.000Z | 1985-10-01T00:00:00.000Z
-1985-10-20T00:00:00.000Z | 1985-10-01T00:00:00.000Z
-1985-11-19T00:00:00.000Z | 1985-11-01T00:00:00.000Z
-1985-11-20T00:00:00.000Z | 1985-11-01T00:00:00.000Z
-1985-11-20T00:00:00.000Z | 1985-11-01T00:00:00.000Z
-1985-11-21T00:00:00.000Z | 1985-11-01T00:00:00.000Z
-;
-
-bucketWeek#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-from employees
-| where hire_date >= "1985-01-01T00:00:00Z" and hire_date < "1986-01-01T00:00:00Z"
-| eval hd = bucket(hire_date, 55, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| sort hire_date
-| keep hire_date, hd;
-
-hire_date:date           | hd:date
-1985-02-18T00:00:00.000Z | 1985-02-18T00:00:00.000Z
-1985-02-24T00:00:00.000Z | 1985-02-18T00:00:00.000Z
-1985-05-13T00:00:00.000Z | 1985-05-13T00:00:00.000Z
-1985-07-09T00:00:00.000Z | 1985-07-08T00:00:00.000Z
-1985-09-17T00:00:00.000Z | 1985-09-16T00:00:00.000Z
-1985-10-14T00:00:00.000Z | 1985-10-14T00:00:00.000Z
-1985-10-20T00:00:00.000Z | 1985-10-14T00:00:00.000Z
-1985-11-19T00:00:00.000Z | 1985-11-18T00:00:00.000Z
-1985-11-20T00:00:00.000Z | 1985-11-18T00:00:00.000Z
-1985-11-20T00:00:00.000Z | 1985-11-18T00:00:00.000Z
-1985-11-21T00:00:00.000Z | 1985-11-18T00:00:00.000Z
-;
-
 now
 row a = now() | eval x = a == now(), y = substring(date_format("yyyy", a), 0, 2) | keep x, y;
 
@@ -350,92 +282,6 @@ from employees | where birth_date > now() | sort emp_no asc | keep emp_no, birth
 emp_no:integer  | birth_date:date
 ;
 
-bucketYearInAgg#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1999-01-01T00:00:00Z"
-| EVAL bucket = BUCKET(hire_date, 5, "1999-01-01T00:00:00Z", NOW())
-| STATS COUNT(*) by bucket
-| sort bucket;
-
-COUNT(*):long            | bucket:date
-1                        | 1999-01-01T00:00:00.000Z
-;
-
-bucketYearInAggConstRefsString#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1999-01-01T00:00:00Z"
-| EVAL bucket_start = "1999-01-01T00:00:00Z"
-| EVAL bucket_end = NOW()
-| EVAL bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
-| STATS COUNT(*) by bucket
-| sort bucket;
-
-COUNT(*):long            | bucket:date
-1                        | 1999-01-01T00:00:00.000Z
-;
-
-bucketYearInAggConstRefsConcat#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1999-01-01T00:00:00Z"
-| EVAL bucket_start = CONCAT("1999-01-01", "T", "00:00:00Z")
-| EVAL bucket_end = NOW()
-| EVAL bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
-| STATS COUNT(*) by bucket
-| sort bucket;
-
-COUNT(*):long            | bucket:date
-1                        | 1999-01-01T00:00:00.000Z
-;
-
-bucketYearInAggConstRefsDate#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1999-01-01T00:00:00Z"
-| EVAL bucket_start = TO_DATETIME("1999-01-01T00:00:00.000Z")
-| EVAL bucket_end = NOW()
-| EVAL bucket = BUCKET(hire_date, 5, bucket_start, bucket_end)
-| keep bucket_start, bucket_end, bucket
-| STATS COUNT(*) by bucket
-| sort bucket;
-
-COUNT(*):long            | bucket:date
-1                        | 1999-01-01T00:00:00.000Z
-;
-
-bucketYearInAggConstRefsRename#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1999-01-01T00:00:00Z"
-| EVAL bucket_start = "1999-01-01T00:00:00Z"
-| EVAL bucket_end = NOW()
-| RENAME bucket_end as be, bucket_start as bs
-| STATS c = COUNT(*) by BUCKET(hire_date, 5, bs, be)
-| SORT c
-;
-
-c:long            | BUCKET(hire_date, 5, bs, be):date
-1                 | 1999-01-01T00:00:00.000Z
-;
-
-bucketMonthInAgg#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::bucket_in_agg[]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| STATS AVG(salary) BY bucket
-| SORT bucket
-// end::bucket_in_agg[]
-;
-
-// tag::bucket_in_agg-result[]
-AVG(salary):double | bucket:date
-          46305.0  | 1985-02-01T00:00:00.000Z
-          44817.0  | 1985-05-01T00:00:00.000Z
-          62405.0  | 1985-07-01T00:00:00.000Z
-          49095.0  | 1985-09-01T00:00:00.000Z
-          51532.0  | 1985-10-01T00:00:00.000Z
-          54539.75 | 1985-11-01T00:00:00.000Z
-// end::bucket_in_agg-result[]
-;
-
 evalDateDiffInNanoAndMicroAndMilliSeconds#[skip:-8.12.99, reason:date_diff added in 8.13]
 ROW date1=to_datetime("2023-12-02T11:00:00.000Z"), date2=to_datetime("2023-12-02T11:00:00.001Z") 
 | EVAL dd_ns1=date_diff("nanoseconds", date1, date2), dd_ns2=date_diff("ns", date1, date2)
@@ -961,128 +807,6 @@ birth_date:datetime
 1953-04-21T00:00:00.000Z
 ;
 
-docsBucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-//tag::docsBucketMonth[]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| KEEP hire_date, month
-| SORT hire_date
-//end::docsBucketMonth[]
-;
-
-//tag::docsBucketMonth-result[]
-   hire_date:date    |    month:date
-1985-02-18T00:00:00.000Z|1985-02-01T00:00:00.000Z
-1985-02-24T00:00:00.000Z|1985-02-01T00:00:00.000Z
-1985-05-13T00:00:00.000Z|1985-05-01T00:00:00.000Z
-1985-07-09T00:00:00.000Z|1985-07-01T00:00:00.000Z
-1985-09-17T00:00:00.000Z|1985-09-01T00:00:00.000Z
-1985-10-14T00:00:00.000Z|1985-10-01T00:00:00.000Z
-1985-10-20T00:00:00.000Z|1985-10-01T00:00:00.000Z
-1985-11-19T00:00:00.000Z|1985-11-01T00:00:00.000Z
-1985-11-20T00:00:00.000Z|1985-11-01T00:00:00.000Z
-1985-11-20T00:00:00.000Z|1985-11-01T00:00:00.000Z
-1985-11-21T00:00:00.000Z|1985-11-01T00:00:00.000Z
-//end::docsBucketMonth-result[]
-;
-
-docsBucketMonthlyHistogram#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-//tag::docsBucketMonthlyHistogram[]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| STATS hires_per_month = COUNT(*) BY month
-| SORT month
-//end::docsBucketMonthlyHistogram[]
-;
-
-//tag::docsBucketMonthlyHistogram-result[]
-  hires_per_month:long    |    month:date
-2              |1985-02-01T00:00:00.000Z
-1              |1985-05-01T00:00:00.000Z
-1              |1985-07-01T00:00:00.000Z
-1              |1985-09-01T00:00:00.000Z
-2              |1985-10-01T00:00:00.000Z
-4              |1985-11-01T00:00:00.000Z
-//end::docsBucketMonthlyHistogram-result[]
-;
-
-docsBucketWeeklyHistogram#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-//tag::docsBucketWeeklyHistogram[]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-| STATS hires_per_week = COUNT(*) BY week
-| SORT week
-//end::docsBucketWeeklyHistogram[]
-;
-
-//tag::docsBucketWeeklyHistogram-result[]
-  hires_per_week:long    |    week:date
-2              |1985-02-18T00:00:00.000Z
-1              |1985-05-13T00:00:00.000Z
-1              |1985-07-08T00:00:00.000Z
-1              |1985-09-16T00:00:00.000Z
-2              |1985-10-14T00:00:00.000Z
-4              |1985-11-18T00:00:00.000Z
-//end::docsBucketWeeklyHistogram-result[]
-;
-
-docsBucketLast24hr#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-//tag::docsBucketLast24hr[]
-FROM sample_data 
-| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
-| EVAL bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
-| STATS COUNT(*) BY bucket
-//end::docsBucketLast24hr[]
-;
-
-   COUNT(*):long    |    bucket:date
-;
-
-docsGettingStartedBucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::gs-bucket[]
-FROM sample_data
-| KEEP @timestamp
-| EVAL bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
-// end::gs-bucket[]
-| LIMIT 0
-;
-
-@timestamp:date | bucket:date
-;
-
-docsGettingStartedBucketStatsBy#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::gs-bucket-stats-by[]
-FROM sample_data
-| KEEP @timestamp, event_duration
-| EVAL bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS COUNT(*) BY bucket
-// end::gs-bucket-stats-by[]
-| SORT bucket
-;
-
-COUNT(*):long | bucket:date
-2              |2023-10-23T12:00:00.000Z
-5              |2023-10-23T13:00:00.000Z
-;
-
-docsGettingStartedBucketStatsByMedian#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::gs-bucket-stats-by-median[]
-FROM sample_data
-| KEEP @timestamp, event_duration
-| EVAL bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS median_duration = MEDIAN(event_duration) BY bucket
-// end::gs-bucket-stats-by-median[]
-| SORT bucket
-;
-
-median_duration:double | bucket:date
-3107561.0              |2023-10-23T12:00:00.000Z
-1756467.0              |2023-10-23T13:00:00.000Z
-;
-
 dateExtract
 // tag::dateExtract[]
 ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06")

+ 0 - 22
x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec

@@ -257,28 +257,6 @@ emp_no:integer | salary_change:double    | a1:double
 10005          | [-2.14,13.07]           | [-2.14,13.07]
 ;
 
-bucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL bh = bucket(height, 20, 1.41, 2.10)
-| SORT hire_date, height
-| KEEP hire_date, height, bh
-;
-
-hire_date:date           | height:double | bh:double
-1985-02-18T00:00:00.000Z | 1.85          | 1.85
-1985-02-24T00:00:00.000Z | 2.0           | 2.0
-1985-05-13T00:00:00.000Z | 2.0           | 2.0
-1985-07-09T00:00:00.000Z | 1.83          | 1.8
-1985-09-17T00:00:00.000Z | 1.45          | 1.4000000000000001
-1985-10-14T00:00:00.000Z | 1.77          | 1.75
-1985-10-20T00:00:00.000Z | 1.94          | 1.9000000000000001
-1985-11-19T00:00:00.000Z | 1.8           | 1.8
-1985-11-20T00:00:00.000Z | 1.93          | 1.9000000000000001
-1985-11-20T00:00:00.000Z | 1.99          | 1.9500000000000002
-1985-11-21T00:00:00.000Z | 2.08          | 2.0500000000000003
-;
-
 cos
 // tag::cos[]
 ROW a=1.8 

+ 0 - 50
x-pack/plugin/esql/qa/testFixtures/src/main/resources/ints.csv-spec

@@ -642,56 +642,6 @@ emp_no:integer | salary_change.long:long | a1:long
 10005          | [-2, 13]                | [-2, 13]
 ;
 
-bucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-// tag::bucket[]
-FROM employees
-| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| EVAL bs = BUCKET(salary, 20, 25324, 74999)
-| SORT hire_date, salary
-| KEEP hire_date, salary, bs
-// end::bucket[]
-;
-
-// tag::bucket-result[]
-hire_date:date           | salary:integer | bs:double
-1985-02-18T00:00:00.000Z | 66174          | 65000.0
-1985-02-24T00:00:00.000Z | 26436          | 25000.0
-1985-05-13T00:00:00.000Z | 44817          | 40000.0
-1985-07-09T00:00:00.000Z | 62405          | 60000.0
-1985-09-17T00:00:00.000Z | 49095          | 45000.0
-1985-10-14T00:00:00.000Z | 54329          | 50000.0
-1985-10-20T00:00:00.000Z | 48735          | 45000.0
-1985-11-19T00:00:00.000Z | 52833          | 50000.0
-1985-11-20T00:00:00.000Z | 33956          | 30000.0
-1985-11-20T00:00:00.000Z | 74999          | 70000.0
-1985-11-21T00:00:00.000Z | 56371          | 55000.0
-// end::bucket-result[]
-;
-
-docsBucketNumeric#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-//tag::docsBucketNumeric[]
-FROM employees
-| EVAL bs = BUCKET(salary, 20, 25324, 74999)
-| STATS COUNT(*) by bs
-| SORT bs
-//end::docsBucketNumeric[]
-;
-
-//tag::docsBucketNumeric-result[]
-   COUNT(*):long    |    bs:double
-9              |25000.0        
-9              |30000.0        
-18             |35000.0        
-11             |40000.0        
-11             |45000.0        
-10             |50000.0        
-7              |55000.0        
-9              |60000.0        
-8              |65000.0        
-8              |70000.0        
-//end::docsBucketNumeric-result[]
-;
-
 cos
 ROW a=2 | EVAL cos=COS(a);
 

+ 10 - 6
x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec

@@ -8,7 +8,8 @@ synopsis:keyword
 "double atan(number:double|integer|long|unsigned_long)"
 "double atan2(y_coordinate:double|integer|long|unsigned_long, x_coordinate:double|integer|long|unsigned_long)"
 "double avg(number:double|integer|long)"
-"double|date bucket(field:integer|long|double|date, buckets:integer, from:integer|long|double|date, to:integer|long|double|date)"
+"double|date bin(field:integer|long|double|date, buckets:integer|double|date_period|time_duration, ?from:integer|long|double|date, ?to:integer|long|double|date)"
+"double|date bucket(field:integer|long|double|date, buckets:integer|double|date_period|time_duration, ?from:integer|long|double|date, ?to:integer|long|double|date)"
 "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version case(condition:boolean, trueValue...:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)"
 "double|integer|long|unsigned_long ceil(number:double|integer|long|unsigned_long)"
 "boolean cidr_match(ip:ip, blockX...:keyword|text)"
@@ -120,7 +121,8 @@ asin          |number                              |"double|integer|long|unsigne
 atan          |number                              |"double|integer|long|unsigned_long"                                                                                               |Numeric expression. If `null`, the function returns `null`.
 atan2         |[y_coordinate, x_coordinate]        |["double|integer|long|unsigned_long", "double|integer|long|unsigned_long"]                                                        |[y coordinate. If `null`\, the function returns `null`., x coordinate. If `null`\, the function returns `null`.]
 avg           |number                              |"double|integer|long"                                                                                                             |[""]
-bucket        |[field, buckets, from, to]          |["integer|long|double|date", integer, "integer|long|double|date", "integer|long|double|date"]           |["", "", "", ""]
+bin           |[field, buckets, from, to]          |["integer|long|double|date", "integer|double|date_period|time_duration", "integer|long|double|date", "integer|long|double|date"]  |["", "", "", ""]
+bucket        |[field, buckets, from, to]          |["integer|long|double|date", "integer|double|date_period|time_duration", "integer|long|double|date", "integer|long|double|date"]  |["", "", "", ""]
 case          |[condition, trueValue]              |[boolean, "boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"]                     |["", ""]
 ceil          |number                              |"double|integer|long|unsigned_long"                                                                                               |Numeric expression. If `null`, the function returns `null`.
 cidr_match    |[ip, blockX]                        |[ip, "keyword|text"]                                                                                                              |[, CIDR block to test the IP against.]
@@ -233,7 +235,8 @@ asin          |Returns the {wikipedia}/Inverse_trigonometric_functions[arcsine]
 atan          |Returns the {wikipedia}/Inverse_trigonometric_functions[arctangent] of the input numeric expression as an angle, expressed in radians.
 atan2         |The {wikipedia}/Atan2[angle] between the positive x-axis and the ray from the origin to the point (x , y) in the Cartesian plane, expressed in radians.
 avg           |The average of a numeric field.
-bucket        |Creates human-friendly buckets and returns a datetime value for each row that corresponds to the resulting bucket the row falls into.
+bin           |Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.
+bucket        |Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.
 case          |Accepts pairs of conditions and values. The function returns the value that belongs to the first condition that evaluates to true.
 ceil          |Round a number up to the nearest integer.
 cidr_match    |Returns true if the provided IP is contained in one of the provided CIDR blocks.
@@ -347,7 +350,8 @@ asin          |double
 atan          |double                                                                                                                      |false                       |false           |false
 atan2         |double                                                                                                                      |[false, false]              |false           |false
 avg           |double                                                                                                                      |false                       |false           |true
-bucket        |"double|date"                                                                                                               |[false, false, false, false]|false           |false
+bin           |"double|date"                                                                                                               |[false, false, true, true]  |false           |false
+bucket        |"double|date"                                                                                                               |[false, false, true, true]  |false           |false
 case          |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version"                          |[false, false]              |true            |false
 ceil          |"double|integer|long|unsigned_long"                                                                                         |false                       |false           |false
 cidr_match    |boolean                                                                                                                     |[false, false]              |true            |false
@@ -459,9 +463,9 @@ sinh         |"double sinh(angle:double|integer|long|unsigned_long)"  |angle
 
 
 // see https://github.com/elastic/elasticsearch/issues/102120
-countFunctions#[skip:-8.13.99]
+countFunctions#[skip:-8.13.99, reason:BIN added]
 meta functions |  stats  a = count(*), b = count(*), c = count(*) |  mv_expand c;
 
 a:long | b:long | c:long
-104    | 104    | 104
+105    | 105    | 105
 ;

+ 0 - 14
x-pack/plugin/esql/qa/testFixtures/src/main/resources/unsigned_long.csv-spec

@@ -150,20 +150,6 @@ warning:Line 1:27: java.lang.IllegalArgumentException: single-value function enc
 2017-11-10T20:21:58.000Z|154551962150890564|9382204513185396493|63             |OK
 ;
 
-bucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
-FROM ul_logs
-| WHERE @timestamp >= "2017-11-10T20:30:00Z" AND @timestamp < "2017-11-10T20:35:00Z"
-| EVAL bh = bucket(bytes_in, 20, 5480608687137202404, 17764691215469285192)
-| SORT @timestamp
-| KEEP @timestamp, bytes_in, bh
-;
-
-@timestamp:date          | bytes_in:ul          | bh:double
-2017-11-10T20:32:57.000Z |  8420006392678593250 | 8.0E18
-2017-11-10T20:33:06.000Z |  5480608687137202404 | 5.0E18
-2017-11-10T20:34:43.000Z | 17764691215469285192 | 1.75E19
-;
-
 toDegrees
 required_feature: esql.mv_warn
 

+ 39 - 6
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java

@@ -10,6 +10,7 @@ package org.elasticsearch.xpack.esql.analysis;
 import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.Equals;
 import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.NotEquals;
 import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
+import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg;
 import org.elasticsearch.xpack.esql.plan.logical.Enrich;
 import org.elasticsearch.xpack.esql.plan.logical.Eval;
@@ -140,7 +141,7 @@ public class Verifier {
                 return;
             }
             checkFilterConditionType(p, failures);
-            checkAggregate(p, failures, aliases);
+            checkAggregate(p, failures);
             checkRegexExtractOnlyOnStrings(p, failures);
 
             checkRow(p, failures);
@@ -160,7 +161,7 @@ public class Verifier {
         return failures;
     }
 
-    private static void checkAggregate(LogicalPlan p, Set<Failure> failures, AttributeMap<Expression> aliases) {
+    private static void checkAggregate(LogicalPlan p, Set<Failure> failures) {
         if (p instanceof Aggregate agg) {
             List<Expression> groupings = agg.groupings();
             AttributeSet groupRefs = new AttributeSet();
@@ -170,6 +171,21 @@ public class Verifier {
                 e.forEachUp(g -> {
                     if (g instanceof AggregateFunction af) {
                         failures.add(fail(g, "cannot use an aggregate [{}] for grouping", af));
+                    } else if (g instanceof GroupingFunction gf) {
+                        gf.children()
+                            .forEach(
+                                c -> c.forEachDown(
+                                    GroupingFunction.class,
+                                    inner -> failures.add(
+                                        fail(
+                                            inner,
+                                            "cannot nest grouping functions; found [{}] inside [{}]",
+                                            inner.sourceText(),
+                                            gf.sourceText()
+                                        )
+                                    )
+                                )
+                            );
                     }
                 });
                 // keep the grouping attributes (common case)
@@ -191,11 +207,16 @@ public class Verifier {
                 // traverse the tree to find invalid matches
                 checkInvalidNamedExpressionUsage(exp, groupings, groupRefs, failures, 0);
             });
+        } else {
+            p.forEachExpression(
+                GroupingFunction.class,
+                gf -> failures.add(fail(gf, "cannot use grouping function [{}] outside of a STATS command", gf.sourceText()))
+            );
         }
     }
 
     // traverse the expression and look either for an agg function or a grouping match
-    // stop either when no children are left, the leaves are literals or a reference attribute is given
+    // stop either when no children are left, the leafs are literals or a reference attribute is given
     private static void checkInvalidNamedExpressionUsage(
         Expression e,
         List<Expression> groups,
@@ -208,13 +229,19 @@ public class Verifier {
             af.field().forEachDown(AggregateFunction.class, f -> {
                 failures.add(fail(f, "nested aggregations [{}] not allowed inside other aggregations [{}]", f, af));
             });
+        } else if (e instanceof GroupingFunction gf) {
+            // optimizer will later unroll expressions with aggs and non-aggs with a grouping function into an EVAL, but that will no longer
+            // be verified (by check above in checkAggregate()), so do it explicitly here
+            if (groups.stream().anyMatch(ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) {
+                failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText()));
+            } else if (level == 0) {
+                addFailureOnGroupingUsedNakedInAggs(failures, gf, "function");
+            }
         } else if (e.foldable()) {
             // don't do anything
         } else if (groups.contains(e) || groupRefs.contains(e)) {
             if (level == 0) {
-                failures.add(
-                    fail(e, "grouping key [{}] cannot be used as an aggregate once declared in the STATS BY clause", e.sourceText())
-                );
+                addFailureOnGroupingUsedNakedInAggs(failures, e, "key");
             }
         }
         // if a reference is found, mark it as an error
@@ -246,6 +273,12 @@ public class Verifier {
         }
     }
 
+    private static void addFailureOnGroupingUsedNakedInAggs(Set<Failure> failures, Expression e, String element) {
+        failures.add(
+            fail(e, "grouping {} [{}] cannot be used as an aggregate once declared in the STATS BY clause", element, e.sourceText())
+        );
+    }
+
     private static void checkRegexExtractOnlyOnStrings(LogicalPlan p, Set<Failure> failures) {
         if (p instanceof RegexExtract re) {
             Expression expr = re.input();

+ 3 - 2
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

@@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Percentile;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialCentroid;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.Values;
+import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
@@ -50,7 +51,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Acos;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Asin;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan2;
-import org.elasticsearch.xpack.esql.expression.function.scalar.math.Bucket;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Ceil;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cos;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cosh;
@@ -176,6 +176,8 @@ public final class EsqlFunctionRegistry extends FunctionRegistry {
 
     private FunctionDefinition[][] functions() {
         return new FunctionDefinition[][] {
+            // grouping functions
+            new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), },
             // aggregate functions
             new FunctionDefinition[] {
                 def(Avg.class, Avg::new, "avg"),
@@ -195,7 +197,6 @@ public final class EsqlFunctionRegistry extends FunctionRegistry {
                 def(Asin.class, Asin::new, "asin"),
                 def(Atan.class, Atan::new, "atan"),
                 def(Atan2.class, Atan2::new, "atan2"),
-                def(Bucket.class, Bucket::new, "bucket"),
                 def(Ceil.class, Ceil::new, "ceil"),
                 def(Cos.class, Cos::new, "cos"),
                 def(Cosh.class, Cosh::new, "cosh"),

+ 99 - 45
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/Bucket.java → x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java

@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.esql.expression.function.scalar.math;
+package org.elasticsearch.xpack.esql.expression.function.grouping;
 
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.Rounding;
@@ -15,46 +15,44 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.capabilities.Validatable;
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.Param;
-import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
 import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
+import org.elasticsearch.xpack.esql.expression.function.scalar.math.Floor;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
+import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
 import org.elasticsearch.xpack.ql.common.Failures;
 import org.elasticsearch.xpack.ql.expression.Expression;
 import org.elasticsearch.xpack.ql.expression.Foldables;
 import org.elasticsearch.xpack.ql.expression.Literal;
 import org.elasticsearch.xpack.ql.expression.TypeResolutions;
+import org.elasticsearch.xpack.ql.expression.function.TwoOptionalArguments;
 import org.elasticsearch.xpack.ql.tree.NodeInfo;
 import org.elasticsearch.xpack.ql.tree.Source;
 import org.elasticsearch.xpack.ql.type.DataType;
 import org.elasticsearch.xpack.ql.type.DataTypes;
 
+import java.time.ZoneId;
+import java.time.ZoneOffset;
 import java.util.List;
-import java.util.function.BiFunction;
 import java.util.function.Function;
 
+import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable;
 import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToLong;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FOURTH;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.THIRD;
-import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;
 import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isType;
 
 /**
- * Buckets dates into a given number of buckets.
- * <p>
- *     Takes a date field and three constants and picks a bucket size based on the
- *     constants. The constants are "target bucket count", "from", and "to". It looks like:
- *     {@code bucket(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")}.
- *     We have a list of "human" bucket sizes like "one month" and "four hours". We pick
- *     the largest range that covers the range in fewer than the target bucket count. So
- *     in the above case we'll pick month long buckets, yielding 12 buckets.
- * </p>
+ * Splits dates and numbers into a given number of buckets. There are two ways to invoke
+ * this function: with a user-provided span (explicit invocation mode), or a span derived
+ * from a number of desired buckets (as a hint) and a range (auto mode).
+ * In the former case, two parameters will be provided, in the latter four.
  */
-public class Bucket extends EsqlScalarFunction implements Validatable {
+public class Bucket extends GroupingFunction implements Validatable, TwoOptionalArguments {
     // TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up.
     // That way you never end up with more than the target number of buckets.
     private static final Rounding LARGEST_HUMAN_DATE_ROUNDING = Rounding.builder(Rounding.DateTimeUnit.YEAR_OF_CENTURY).build();
@@ -78,22 +76,24 @@ public class Bucket extends EsqlScalarFunction implements Validatable {
         Rounding.builder(TimeValue.timeValueMillis(10)).build(),
         Rounding.builder(TimeValue.timeValueMillis(1)).build(), };
 
+    private static final ZoneId DEFAULT_TZ = ZoneOffset.UTC; // TODO: plug in the config
+
     private final Expression field;
     private final Expression buckets;
     private final Expression from;
     private final Expression to;
 
     @FunctionInfo(returnType = { "double", "date" }, description = """
-        Creates human-friendly buckets and returns a datetime value
-        for each row that corresponds to the resulting bucket the row falls into.""")
+        Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either
+        be provided directly, or chosen based on a recommended count and values range.""")
     public Bucket(
         Source source,
         @Param(name = "field", type = { "integer", "long", "double", "date" }) Expression field,
-        @Param(name = "buckets", type = { "integer" }) Expression buckets,
-        @Param(name = "from", type = { "integer", "long", "double", "date" }) Expression from,
-        @Param(name = "to", type = { "integer", "long", "double", "date" }) Expression to
+        @Param(name = "buckets", type = { "integer", "double", "date_period", "time_duration" }) Expression buckets,
+        @Param(name = "from", type = { "integer", "long", "double", "date" }, optional = true) Expression from,
+        @Param(name = "to", type = { "integer", "long", "double", "date" }, optional = true) Expression to
     ) {
-        super(source, List.of(field, buckets, from, to));
+        super(source, from != null && to != null ? List.of(field, buckets, from, to) : List.of(field, buckets));
         this.field = field;
         this.buckets = buckets;
         this.from = from;
@@ -102,28 +102,38 @@ public class Bucket extends EsqlScalarFunction implements Validatable {
 
     @Override
     public boolean foldable() {
-        return field.foldable() && buckets.foldable() && from.foldable() && to.foldable();
+        return field.foldable() && buckets.foldable() && (from == null || from.foldable()) && (to == null || to.foldable());
     }
 
     @Override
     public ExpressionEvaluator.Factory toEvaluator(Function<Expression, ExpressionEvaluator.Factory> toEvaluator) {
-        int b = ((Number) buckets.fold()).intValue();
-
         if (field.dataType() == DataTypes.DATETIME) {
-            long f = foldToLong(from);
-            long t = foldToLong(to);
-            return DateTrunc.evaluator(
-                source(),
-                toEvaluator.apply(field),
-                new DateRoundingPicker(b, f, t).pickRounding().prepareForUnknown()
-            );
+            Rounding.Prepared preparedRounding;
+            if (buckets.dataType().isInteger()) {
+                int b = ((Number) buckets.fold()).intValue();
+                long f = foldToLong(from);
+                long t = foldToLong(to);
+                preparedRounding = new DateRoundingPicker(b, f, t).pickRounding().prepareForUnknown();
+            } else {
+                assert EsqlDataTypes.isTemporalAmount(buckets.dataType()) : "Unexpected span data type [" + buckets.dataType() + "]";
+                preparedRounding = DateTrunc.createRounding(buckets.fold(), DEFAULT_TZ);
+            }
+            return DateTrunc.evaluator(source(), toEvaluator.apply(field), preparedRounding);
         }
         if (field.dataType().isNumeric()) {
-            double f = ((Number) from.fold()).doubleValue();
-            double t = ((Number) to.fold()).doubleValue();
+            double roundTo;
+            if (from != null) {
+                int b = ((Number) buckets.fold()).intValue();
+                double f = ((Number) from.fold()).doubleValue();
+                double t = ((Number) to.fold()).doubleValue();
+                roundTo = pickRounding(b, f, t);
+            } else {
+                assert buckets.dataType().isRational() : "Unexpected rounding data type [" + buckets.dataType() + "]";
+                roundTo = ((Number) buckets.fold()).doubleValue();
+            }
+            Literal rounding = new Literal(source(), roundTo, DataTypes.DOUBLE);
 
             // We could make this more efficient, either by generating the evaluators with byte code or hand rolling this one.
-            Literal rounding = new Literal(source(), pickRounding(b, f, t), DataTypes.DOUBLE);
             Div div = new Div(source(), field, rounding);
             Floor floor = new Floor(source(), div);
             Mul mul = new Mul(source(), floor, rounding);
@@ -170,30 +180,70 @@ public class Bucket extends EsqlScalarFunction implements Validatable {
         return precise < halfPower ? halfPower : nextPowerOfTen;
     }
 
+    // supported parameter type combinations (1st, 2nd, 3rd, 4th):
+    // datetime, integer, string/datetime, string/datetime
+    // datetime, rounding/duration, -, -
+    // numeric, integer, numeric, numeric
+    // numeric, double, -, -
     @Override
     protected TypeResolution resolveType() {
         if (childrenResolved() == false) {
             return new TypeResolution("Unresolved children");
         }
+        var fieldType = field.dataType();
+        var bucketsType = buckets.dataType();
+        if (fieldType == DataTypes.NULL || bucketsType == DataTypes.NULL) {
+            return TypeResolution.TYPE_RESOLVED;
+        }
 
-        if (field.dataType() == DataTypes.DATETIME) {
-            return resolveType((e, o) -> isStringOrDate(e, sourceText(), o));
+        if (fieldType == DataTypes.DATETIME) {
+            TypeResolution resolution = isType(
+                buckets,
+                dt -> dt.isInteger() || EsqlDataTypes.isTemporalAmount(dt),
+                sourceText(),
+                SECOND,
+                "integral",
+                "date_period",
+                "time_duration"
+            );
+            return bucketsType.isInteger()
+                ? resolution.and(checkArgsCount(4))
+                    .and(() -> isStringOrDate(from, sourceText(), THIRD))
+                    .and(() -> isStringOrDate(to, sourceText(), FOURTH))
+                : resolution.and(checkArgsCount(2)); // temporal amount
         }
-        if (field.dataType().isNumeric()) {
-            return resolveType((e, o) -> isNumeric(e, sourceText(), o));
+        if (fieldType.isNumeric()) {
+            return bucketsType.isInteger()
+                ? checkArgsCount(4).and(() -> isNumeric(from, sourceText(), THIRD)).and(() -> isNumeric(to, sourceText(), FOURTH))
+                : isNumeric(buckets, sourceText(), SECOND).and(checkArgsCount(2));
         }
         return isType(field, e -> false, sourceText(), FIRST, "datetime", "numeric");
     }
 
-    private TypeResolution resolveType(BiFunction<Expression, TypeResolutions.ParamOrdinal, TypeResolution> checkThirdAndForth) {
-        TypeResolution resolution = isInteger(buckets, sourceText(), SECOND);
-        if (resolution.unresolved()) {
-            return resolution;
+    private TypeResolution checkArgsCount(int expectedCount) {
+        String expected = null;
+        if (expectedCount == 2 && (from != null || to != null)) {
+            expected = "two";
+        } else if (expectedCount == 4 && (from == null || to == null)) {
+            expected = "four";
+        } else if ((from == null && to != null) || (from != null && to == null)) {
+            expected = "two or four";
         }
-        return checkThirdAndForth.apply(from, THIRD).and(checkThirdAndForth.apply(to, FOURTH));
+
+        return expected == null
+            ? TypeResolution.TYPE_RESOLVED
+            : new TypeResolution(
+                format(
+                    null,
+                    "function expects exactly {} arguments when the first one is of type [{}] and the second of type [{}]",
+                    expected,
+                    field.dataType(),
+                    buckets.dataType()
+                )
+            );
     }
 
-    public static TypeResolution isStringOrDate(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
+    private static TypeResolution isStringOrDate(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
         return TypeResolutions.isType(
             e,
             exp -> DataTypes.isString(exp) || DataTypes.isDateTime(exp),
@@ -208,7 +258,9 @@ public class Bucket extends EsqlScalarFunction implements Validatable {
     public void validate(Failures failures) {
         String operation = sourceText();
 
-        failures.add(isFoldable(buckets, operation, SECOND)).add(isFoldable(from, operation, THIRD)).add(isFoldable(to, operation, FOURTH));
+        failures.add(isFoldable(buckets, operation, SECOND))
+            .add(from != null ? isFoldable(from, operation, THIRD) : null)
+            .add(to != null ? isFoldable(to, operation, FOURTH) : null);
     }
 
     private long foldToLong(Expression e) {
@@ -226,7 +278,9 @@ public class Bucket extends EsqlScalarFunction implements Validatable {
 
     @Override
     public Expression replaceChildren(List<Expression> newChildren) {
-        return new Bucket(source(), newChildren.get(0), newChildren.get(1), newChildren.get(2), newChildren.get(3));
+        Expression from = newChildren.size() > 2 ? newChildren.get(2) : null;
+        Expression to = newChildren.size() > 3 ? newChildren.get(3) : null;
+        return new Bucket(source(), newChildren.get(0), newChildren.get(1), from, to);
     }
 
     @Override

+ 33 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java

@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.grouping;
+
+import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.function.Function;
+import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
+import org.elasticsearch.xpack.ql.tree.Source;
+
+import java.util.List;
+
+public abstract class GroupingFunction extends Function implements EvaluatorMapper {
+
+    protected GroupingFunction(Source source, List<Expression> fields) {
+        super(source, fields);
+    }
+
+    @Override
+    public Object fold() {
+        return EvaluatorMapper.super.fold();
+    }
+
+    @Override
+    public final ScriptTemplate asScript() {
+        throw new UnsupportedOperationException("functions do not support scripting");
+    }
+}

+ 13 - 5
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java

@@ -38,6 +38,8 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Percentile;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialCentroid;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.Values;
+import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
+import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
 import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
@@ -71,7 +73,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Acos;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Asin;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Atan2;
-import org.elasticsearch.xpack.esql.expression.function.scalar.math.Bucket;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Ceil;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cos;
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cosh;
@@ -382,7 +383,6 @@ public final class PlanNamedTypes {
             of(ESQL_UNARY_SCLR_CLS, Trim.class, PlanNamedTypes::writeESQLUnaryScalar, PlanNamedTypes::readESQLUnaryScalar),
             // ScalarFunction
             of(ScalarFunction.class, Atan2.class, PlanNamedTypes::writeAtan2, PlanNamedTypes::readAtan2),
-            of(ScalarFunction.class, Bucket.class, PlanNamedTypes::writeBucket, PlanNamedTypes::readBucket),
             of(ScalarFunction.class, Case.class, PlanNamedTypes::writeVararg, PlanNamedTypes::readVarag),
             of(ScalarFunction.class, CIDRMatch.class, PlanNamedTypes::writeCIDRMatch, PlanNamedTypes::readCIDRMatch),
             of(ScalarFunction.class, Coalesce.class, PlanNamedTypes::writeVararg, PlanNamedTypes::readVarag),
@@ -421,6 +421,8 @@ public final class PlanNamedTypes {
             of(ArithmeticOperation.class, Mul.class, PlanNamedTypes::writeArithmeticOperation, PlanNamedTypes::readArithmeticOperation),
             of(ArithmeticOperation.class, Div.class, PlanNamedTypes::writeArithmeticOperation, PlanNamedTypes::readArithmeticOperation),
             of(ArithmeticOperation.class, Mod.class, PlanNamedTypes::writeArithmeticOperation, PlanNamedTypes::readArithmeticOperation),
+            // GroupingFunctions
+            of(GroupingFunction.class, Bucket.class, PlanNamedTypes::writeBucket, PlanNamedTypes::readBucket),
             // AggregateFunctions
             of(AggregateFunction.class, Avg.class, PlanNamedTypes::writeAggFunction, PlanNamedTypes::readAggFunction),
             of(AggregateFunction.class, Count.class, PlanNamedTypes::writeAggFunction, PlanNamedTypes::readAggFunction),
@@ -1402,15 +1404,21 @@ public final class PlanNamedTypes {
     }
 
     static Bucket readBucket(PlanStreamInput in) throws IOException {
-        return new Bucket(in.readSource(), in.readExpression(), in.readExpression(), in.readExpression(), in.readExpression());
+        return new Bucket(
+            in.readSource(),
+            in.readExpression(),
+            in.readExpression(),
+            in.readOptionalNamed(Expression.class),
+            in.readOptionalNamed(Expression.class)
+        );
     }
 
     static void writeBucket(PlanStreamOutput out, Bucket bucket) throws IOException {
         out.writeSource(bucket.source());
         out.writeExpression(bucket.field());
         out.writeExpression(bucket.buckets());
-        out.writeExpression(bucket.from());
-        out.writeExpression(bucket.to());
+        out.writeOptionalExpression(bucket.from());
+        out.writeOptionalExpression(bucket.to());
     }
 
     static final Map<String, TriFunction<Source, Expression, List<Expression>, ScalarFunction>> VARARG_CTORS = Map.ofEntries(

+ 68 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java

@@ -121,6 +121,59 @@ public class VerifierTests extends ESTestCase {
         );
     }
 
+    public void testGroupingInsideAggsAsAgg() {
+        assertEquals(
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            error("from test| stats bucket(emp_no, 5.) by emp_no")
+        );
+        assertEquals(
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            error("from test| stats bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            error("from test| stats bucket(emp_no, 5.) by bucket(emp_no, 6.)")
+        );
+        assertEquals(
+            "1:22: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            error("from test| stats 3 + bucket(emp_no, 5.) by bucket(emp_no, 6.)")
+        );
+    }
+
+    public void testGroupingInsideAggsAsGrouping() {
+        assertEquals(
+            "1:18: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats bucket(emp_no, 5.) by bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:18: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats bucket(emp_no, 5.) by emp_no, bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:18: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats bucket(emp_no, 5.) by x = bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:22: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats z = bucket(emp_no, 5.) by x = bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:22: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats y = bucket(emp_no, 5.) by y = bucket(emp_no, 5.)")
+        );
+        assertEquals(
+            "1:22: grouping function [bucket(emp_no, 5.)] cannot be used as an aggregate once declared in the STATS BY clause",
+            error("from test| stats z = bucket(emp_no, 5.) by bucket(emp_no, 5.)")
+        );
+    }
+
+    public void testGroupingInsideGrouping() {
+        assertEquals(
+            "1:40: cannot nest grouping functions; found [bucket(emp_no, 5.)] inside [bucket(bucket(emp_no, 5.), 6.)]",
+            error("from test| stats max(emp_no) by bucket(bucket(emp_no, 5.), 6.)")
+        );
+    }
+
     public void testAggsWithInvalidGrouping() {
         assertEquals(
             "1:35: column [languages] cannot be used as an aggregate once declared in the STATS BY grouping key [l = languages % 3]",
@@ -177,6 +230,21 @@ public class VerifierTests extends ESTestCase {
             """));
     }
 
+    public void testBucketOnlyInAggs() {
+        assertEquals(
+            "1:23: cannot use grouping function [BUCKET(emp_no, 100.)] outside of a STATS command",
+            error("FROM test | WHERE ABS(BUCKET(emp_no, 100.)) > 0")
+        );
+        assertEquals(
+            "1:22: cannot use grouping function [BUCKET(emp_no, 100.)] outside of a STATS command",
+            error("FROM test | EVAL 3 + BUCKET(emp_no, 100.)")
+        );
+        assertEquals(
+            "1:18: cannot use grouping function [BUCKET(emp_no, 100.)] outside of a STATS command",
+            error("FROM test | SORT BUCKET(emp_no, 100.)")
+        );
+    }
+
     public void testDoubleRenamingField() {
         assertEquals(
             "1:44: Column [emp_no] renamed to [r1] and is no longer available [emp_no as r3]",

+ 4 - 6
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java

@@ -90,6 +90,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
@@ -548,12 +549,13 @@ public abstract class AbstractFunctionTestCase extends ESTestCase {
         for (int i = 0; i < args.size(); i++) {
             typesFromSignature.add(new HashSet<>());
         }
+        Function<DataType, String> typeName = dt -> dt.esType() != null ? dt.esType() : dt.typeName();
         for (Map.Entry<List<DataType>, DataType> entry : signatures().entrySet()) {
             List<DataType> types = entry.getKey();
             for (int i = 0; i < args.size() && i < types.size(); i++) {
-                typesFromSignature.get(i).add(signatureType(types.get(i)));
+                typesFromSignature.get(i).add(typeName.apply(types.get(i)));
             }
-            returnFromSignature.add(entry.getValue().esType());
+            returnFromSignature.add(typeName.apply(entry.getValue()));
         }
 
         for (int i = 0; i < args.size(); i++) {
@@ -573,10 +575,6 @@ public abstract class AbstractFunctionTestCase extends ESTestCase {
 
     }
 
-    private static String signatureType(DataType type) {
-        return type.esType() != null ? type.esType() : type.typeName();
-    }
-
     /**
      * Adds cases with {@code null} and asserts that the result is {@code null}.
      * <p>

+ 76 - 1
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/math/BucketTests.java

@@ -15,12 +15,16 @@ import org.elasticsearch.common.Rounding;
 import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase;
 import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
+import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
+import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
 import org.elasticsearch.xpack.ql.expression.Expression;
 import org.elasticsearch.xpack.ql.tree.Source;
 import org.elasticsearch.xpack.ql.type.DataType;
 import org.elasticsearch.xpack.ql.type.DataTypes;
 import org.hamcrest.Matcher;
 
+import java.time.Duration;
+import java.time.Period;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.function.LongSupplier;
@@ -37,9 +41,26 @@ public class BucketTests extends AbstractFunctionTestCase {
     public static Iterable<Object[]> parameters() {
         List<TestCaseSupplier> suppliers = new ArrayList<>();
         dateCases(suppliers, "fixed date", () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z"));
+        dateCasesWithSpan(
+            suppliers,
+            "fixed date with period",
+            () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00.00Z"),
+            EsqlDataTypes.DATE_PERIOD,
+            Period.ofYears(1)
+        );
+        dateCasesWithSpan(
+            suppliers,
+            "fixed date with duration",
+            () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z"),
+            EsqlDataTypes.TIME_DURATION,
+            Duration.ofDays(1L)
+        );
         numberCases(suppliers, "fixed long", DataTypes.LONG, () -> 100L);
+        numberCasesWithSpan(suppliers, "fixed long with span", DataTypes.LONG, () -> 100L);
         numberCases(suppliers, "fixed int", DataTypes.INTEGER, () -> 100);
+        numberCasesWithSpan(suppliers, "fixed int with span", DataTypes.INTEGER, () -> 100);
         numberCases(suppliers, "fixed double", DataTypes.DOUBLE, () -> 100.0);
+        numberCasesWithSpan(suppliers, "fixed double with span", DataTypes.DOUBLE, () -> 100.);
         // TODO make errorsForCasesWithoutExamples do something sensible for 4+ parameters
         return parameterSuppliersFromTypedData(
             anyNullIsNull(
@@ -86,6 +107,26 @@ public class BucketTests extends AbstractFunctionTestCase {
         return new TestCaseSupplier.TypedData(value, type, name).forceLiteral();
     }
 
+    private static void dateCasesWithSpan(
+        List<TestCaseSupplier> suppliers,
+        String name,
+        LongSupplier date,
+        DataType spanType,
+        Object span
+    ) {
+        suppliers.add(new TestCaseSupplier(name, List.of(DataTypes.DATETIME, spanType), () -> {
+            List<TestCaseSupplier.TypedData> args = new ArrayList<>();
+            args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataTypes.DATETIME, "field"));
+            args.add(new TestCaseSupplier.TypedData(span, spanType, "buckets").forceLiteral());
+            return new TestCaseSupplier.TestCase(
+                args,
+                "DateTruncEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding[DAY_OF_MONTH in Z][fixed to midnight]]",
+                DataTypes.DATETIME,
+                dateResultsMatcher(args)
+            );
+        }));
+    }
+
     private static final DataType[] NUMBER_BOUNDS_TYPES = new DataType[] { DataTypes.INTEGER, DataTypes.LONG, DataTypes.DOUBLE };
 
     private static void numberCases(List<TestCaseSupplier> suppliers, String name, DataType numberType, Supplier<Number> number) {
@@ -131,6 +172,34 @@ public class BucketTests extends AbstractFunctionTestCase {
         return new TestCaseSupplier.TypedData(v, type, name).forceLiteral();
     }
 
+    private static void numberCasesWithSpan(List<TestCaseSupplier> suppliers, String name, DataType numberType, Supplier<Number> number) {
+        suppliers.add(new TestCaseSupplier(name, List.of(numberType, DataTypes.DOUBLE), () -> {
+            List<TestCaseSupplier.TypedData> args = new ArrayList<>();
+            args.add(new TestCaseSupplier.TypedData(number.get(), "field"));
+            args.add(new TestCaseSupplier.TypedData(50., DataTypes.DOUBLE, "span").forceLiteral());
+            String attr = "Attribute[channel=0]";
+            if (numberType == DataTypes.INTEGER) {
+                attr = "CastIntToDoubleEvaluator[v=" + attr + "]";
+            } else if (numberType == DataTypes.LONG) {
+                attr = "CastLongToDoubleEvaluator[v=" + attr + "]";
+            }
+            return new TestCaseSupplier.TestCase(
+                args,
+                "MulDoublesEvaluator[lhs=FloorDoubleEvaluator[val=DivDoublesEvaluator[lhs="
+                    + attr
+                    + ", "
+                    + "rhs=LiteralsEvaluator[lit=50.0]]], rhs=LiteralsEvaluator[lit=50.0]]",
+                DataTypes.DOUBLE,
+                dateResultsMatcher(args)
+            );
+        }));
+
+    }
+
+    private static TestCaseSupplier.TypedData keywordDateLiteral(String name, DataType type, String date) {
+        return new TestCaseSupplier.TypedData(date, type, name).forceLiteral();
+    }
+
     private static Matcher<Object> dateResultsMatcher(List<TestCaseSupplier.TypedData> typedData) {
         if (typedData.get(0).type() == DataTypes.DATETIME) {
             long millis = ((Number) typedData.get(0).data()).longValue();
@@ -141,7 +210,13 @@ public class BucketTests extends AbstractFunctionTestCase {
 
     @Override
     protected Expression build(Source source, List<Expression> args) {
-        return new Bucket(source, args.get(0), args.get(1), args.get(2), args.get(3));
+        Expression from = null;
+        Expression to = null;
+        if (args.size() > 2) {
+            from = args.get(2);
+            to = args.get(3);
+        }
+        return new Bucket(source, args.get(0), args.get(1), from, to);
     }
 
     @Override

+ 4 - 4
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

@@ -3303,9 +3303,9 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         var plan = plan("""
             from test
             | eval bucket_start = 1, bucket_end = 100000
-            | eval bucket(salary, 10, bucket_start, bucket_end)
+            | stats by bucket(salary, 10, bucket_start, bucket_end)
             """);
-        var ab = as(plan, Eval.class);
+        var ab = as(plan, Limit.class);
         assertTrue(ab.optimized());
     }
 
@@ -3313,12 +3313,12 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         VerificationException e = expectThrows(VerificationException.class, () -> plan("""
             from test
             | eval bucket_end = 100000
-            | eval bucket(salary, 10, emp_no, bucket_end)
+            | stats by bucket(salary, 10, emp_no, bucket_end)
             """));
         assertTrue(e.getMessage().startsWith("Found "));
         final String header = "Found 1 problem\nline ";
         assertEquals(
-            "3:27: third argument of [bucket(salary, 10, emp_no, bucket_end)] must be a constant, received [emp_no]",
+            "3:31: third argument of [bucket(salary, 10, emp_no, bucket_end)] must be a constant, received [emp_no]",
             e.getMessage().substring(header.length())
         );
     }

+ 5 - 0
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/Expression.java

@@ -15,6 +15,7 @@ import org.elasticsearch.xpack.ql.type.DataType;
 import org.elasticsearch.xpack.ql.util.StringUtils;
 
 import java.util.List;
+import java.util.function.Supplier;
 
 /**
  * In a SQL statement, an Expression is whatever a user specifies inside an
@@ -54,6 +55,10 @@ public abstract class Expression extends Node<Expression> implements Resolvable
             return failed ? this : other;
         }
 
+        public TypeResolution and(Supplier<TypeResolution> other) {
+            return failed ? this : other.get();
+        }
+
         public String message() {
             return message;
         }