1 year ago · a21242054b
--- a/docs/reference/esql/esql-functions-operators.asciidoc
+++ b/docs/reference/esql/esql-functions-operators.asciidoc
@@ -16,6 +16,12 @@ The reference documentation is divided into the following categories:
 
				 include::functions/aggregation-functions.asciidoc[tag=agg_list]
			
 
				 ====
			
 
				 
			
 
				+.*Grouping functions*
			
 
				+[%collapsible]
			
 
				+====
			
 
				+include::functions/grouping-functions.asciidoc[tag=group_list]
			
 
				+====
			
 
				+
			
 
				 .*Math functions*
			
 
				 [%collapsible]
			
 
				 ====
			
@@ -68,6 +74,7 @@ include::functions/operators.asciidoc[tag=op_list]
 
				 ====
			
 
				 
			
 
				 include::functions/aggregation-functions.asciidoc[]
			
 
				+include::functions/grouping-functions.asciidoc[]
			
 
				 include::functions/math-functions.asciidoc[]
			
 
				 include::functions/string-functions.asciidoc[]
			
 
				 include::functions/date-time-functions.asciidoc[]
			
--- a/docs/reference/esql/esql-get-started.asciidoc
+++ b/docs/reference/esql/esql-get-started.asciidoc
@@ -244,13 +244,6 @@ To track statistics over time, {esql} enables you to create histograms using the
 
				 and returns a value for each row that corresponds to the resulting bucket the
			
 
				 row falls into.
			
 
				 
			
 
				-For example, to create hourly buckets for the data on October 23rd:
			
 
				-
			
 
				-[source,esql]
			
 
				-----
			
 
				-include::{esql-specs}/bucket.csv-spec[tag=gs-bucket]
			
 
				-----
			
 
				-
			
 
				 Combine `BUCKET` with <<esql-stats-by>> to create a histogram. For example,
			
 
				 to count the number of events per hour:
			
 
				 
			
--- a/docs/reference/esql/functions/aggregation-functions.asciidoc
+++ b/docs/reference/esql/functions/aggregation-functions.asciidoc
@@ -5,7 +5,7 @@
 
				 <titleabbrev>Aggregate functions</titleabbrev>
			
 
				 ++++
			
 
				 
			
 
				-The <<esql-stats-by>> function supports these aggregate functions:
			
 
				+The <<esql-stats-by>> command supports these aggregate functions:
			
 
				 
			
 
				 // tag::agg_list[]
			
 
				 * <<esql-agg-avg>>
			
--- a/docs/reference/esql/functions/date-time-functions.asciidoc
+++ b/docs/reference/esql/functions/date-time-functions.asciidoc
@@ -8,7 +8,6 @@
 
				 {esql} supports these date-time functions:
			
 
				 
			
 
				 // tag::date_list[]
			
 
				-* <<esql-bucket>>
			
 
				 * <<esql-date_diff>>
			
 
				 * <<esql-date_extract>>
			
 
				 * <<esql-date_format>>
			
@@ -17,7 +16,6 @@
 
				 * <<esql-now>>
			
 
				 // end::date_list[]
			
 
				 
			
 
				-include::layout/bucket.asciidoc[]
			
 
				 include::layout/date_diff.asciidoc[]
			
 
				 include::layout/date_extract.asciidoc[]
			
 
				 include::layout/date_format.asciidoc[]
			
--- a/docs/reference/esql/functions/examples/bucket.asciidoc
+++ b/docs/reference/esql/functions/examples/bucket.asciidoc
@@ -2,6 +2,10 @@
 
				 
			
 
				 *Examples*
			
 
				 
			
 
				+`BUCKET` can work in two modes: one in which the size of the bucket is computed
			
 
				+based on a buckets count recommendation (four parameters) and a range, and
			
 
				+another in which the bucket size is provided directly (two parameters).
			
 
				+
			
 
				 Using a target number of buckets, a start of a range, and an end of a range,
			
 
				 `BUCKET` picks an appropriate bucket size to generate the target number of buckets or fewer.
			
 
				 For example, asking for at most 20 buckets over a year results in monthly buckets:
			
@@ -17,7 +21,7 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonth-result]
 
				 The goal isn't to provide *exactly* the target number of buckets,
			
 
				 it's to pick a range that people are comfortable with that provides at most the target number of buckets.
			
 
				 
			
 
				-Combine `BUCKET` with <<esql-stats-by>> to create a histogram:
			
 
				+Combine `BUCKET` with an <<esql-agg-functions,aggregation>> to create a histogram:
			
 
				 [source.merge.styled,esql]
			
 
				 ----
			
 
				 include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram]
			
@@ -28,7 +32,7 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketMonthlyHistogram-result]
 
				 |===
			
 
				 
			
 
				 NOTE: `BUCKET` does not create buckets that don't match any documents.
			
 
				-+ "That's why this example is missing `1985-03-01` and other dates.
			
 
				+That's why this example is missing `1985-03-01` and other dates.
			
 
				 
			
 
				 Asking for more buckets can result in a smaller range.
			
 
				 For example, asking for at most 100 buckets in a year results in weekly buckets:
			
@@ -45,6 +49,20 @@ NOTE: `BUCKET` does not filter any rows. It only uses the provided range to pick
 
				 For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range.
			
 
				 Combine`BUCKET` with <<esql-where>> to filter rows.
			
 
				 
			
 
				+If the desired bucket size is known in advance, simply provide it as the second
			
 
				+argument, leaving the range out:
			
 
				+[source.merge.styled,esql]
			
 
				+----
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan]
			
 
				+----
			
 
				+[%header.monospaced.styled,format=dsv,separator=|]
			
 
				+|===
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan-result]
			
 
				+|===
			
 
				+
			
 
				+NOTE: When providing the bucket size as the second parameter, it must be a time
			
 
				+duration or date period.
			
 
				+
			
 
				 `BUCKET` can also operate on numeric fields. For example, to create a salary histogram:
			
 
				 [source.merge.styled,esql]
			
 
				 ----
			
@@ -58,6 +76,20 @@ include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumeric-result]
 
				 Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range.
			
 
				 You have to find the `min` and `max` separately. {esql} doesn't yet have an easy way to do that automatically.
			
 
				 
			
 
				+The range can be omitted if the desired bucket size is known in advance. Simply
			
 
				+provide it as the second argument:
			
 
				+[source.merge.styled,esql]
			
 
				+----
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumericWithSpan]
			
 
				+----
			
 
				+[%header.monospaced.styled,format=dsv,separator=|]
			
 
				+|===
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=docsBucketNumericWithSpan-result]
			
 
				+|===
			
 
				+
			
 
				+NOTE: When providing the bucket size as the second parameter, it must be
			
 
				+of a floating point type.
			
 
				+
			
 
				 Create hourly buckets for the last 24 hours, and calculate the number of events per hour:
			
 
				 [source.merge.styled,esql]
			
 
				 ----
			
@@ -77,3 +109,15 @@ include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg]
 
				 include::{esql-specs}/bucket.csv-spec[tag=bucket_in_agg-result]
			
 
				 |===
			
 
				 
			
 
				+`BUCKET` may be used in both the aggregating and grouping part of the
			
 
				+<<esql-stats-by, STATS ... BY ...>> command provided that in the aggregating
			
 
				+part the function is referenced by an alias defined in the
			
 
				+grouping part, or that it is invoked with the exact same expression:
			
 
				+[source.merge.styled,esql]
			
 
				+----
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression]
			
 
				+----
			
 
				+[%header.monospaced.styled,format=dsv,separator=|]
			
 
				+|===
			
 
				+include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression-result]
			
 
				+|===
			
--- a/docs/reference/esql/functions/grouping-functions.asciidoc
+++ b/docs/reference/esql/functions/grouping-functions.asciidoc
@@ -0,0 +1,14 @@
 
				+[[esql-group-functions]]
			
 
				+==== {esql} grouping functions
			
 
				+
			
 
				+++++
			
 
				+<titleabbrev>Grouping functions</titleabbrev>
			
 
				+++++
			
 
				+
			
 
				+The <<esql-stats-by>> command supports these grouping functions:
			
 
				+
			
 
				+// tag::group_list[]
			
 
				+* <<esql-bucket>>
			
 
				+// end::group_list[]
			
 
				+
			
 
				+include::layout/bucket.asciidoc[]
			
--- a/docs/reference/esql/functions/kibana/definition/bucket.json
+++ b/docs/reference/esql/functions/kibana/definition/bucket.json
@@ -939,7 +939,9 @@
 
				     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT hire_date",
			
 
				     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT month",
			
 
				     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT week",
			
 
				+    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)\n| SORT week",
			
 
				     "FROM employees\n| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999)\n| SORT bs",
			
 
				+    "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)\n| SORT b",
			
 
				     "FROM sample_data \n| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()\n| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())",
			
 
				     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket"
			
 
				   ]
			
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
@@ -175,13 +175,13 @@ FROM employees
 
				 ;
			
 
				 
			
 
				 //tag::docsBucketMonthlyHistogram-result[]
			
 
				-  hires_per_month:long    |    month:date
			
 
				-2              |1985-02-01T00:00:00.000Z
			
 
				-1              |1985-05-01T00:00:00.000Z
			
 
				-1              |1985-07-01T00:00:00.000Z
			
 
				-1              |1985-09-01T00:00:00.000Z
			
 
				-2              |1985-10-01T00:00:00.000Z
			
 
				-4              |1985-11-01T00:00:00.000Z
			
 
				+  hires_per_month:long |    month:date
			
 
				+2                      |1985-02-01T00:00:00.000Z
			
 
				+1                      |1985-05-01T00:00:00.000Z
			
 
				+1                      |1985-07-01T00:00:00.000Z
			
 
				+1                      |1985-09-01T00:00:00.000Z
			
 
				+2                      |1985-10-01T00:00:00.000Z
			
 
				+4                      |1985-11-01T00:00:00.000Z
			
 
				 //end::docsBucketMonthlyHistogram-result[]
			
 
				 ;
			
 
				 
			
@@ -196,15 +196,36 @@ FROM employees
 
				 
			
 
				 //tag::docsBucketWeeklyHistogram-result[]
			
 
				   hires_per_week:long    |    week:date
			
 
				-2              |1985-02-18T00:00:00.000Z
			
 
				-1              |1985-05-13T00:00:00.000Z
			
 
				-1              |1985-07-08T00:00:00.000Z
			
 
				-1              |1985-09-16T00:00:00.000Z
			
 
				-2              |1985-10-14T00:00:00.000Z
			
 
				-4              |1985-11-18T00:00:00.000Z
			
 
				+2                        |1985-02-18T00:00:00.000Z
			
 
				+1                        |1985-05-13T00:00:00.000Z
			
 
				+1                        |1985-07-08T00:00:00.000Z
			
 
				+1                        |1985-09-16T00:00:00.000Z
			
 
				+2                        |1985-10-14T00:00:00.000Z
			
 
				+4                        |1985-11-18T00:00:00.000Z
			
 
				 //end::docsBucketWeeklyHistogram-result[]
			
 
				 ;
			
 
				 
			
 
				+// bucketing in span mode (identical results to above)
			
 
				+docsBucketWeeklyHistogramWithSpan#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
 
				+//tag::docsBucketWeeklyHistogramWithSpan[]
			
 
				+FROM employees
			
 
				+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
			
 
				+| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week)
			
 
				+| SORT week
			
 
				+//end::docsBucketWeeklyHistogramWithSpan[]
			
 
				+;
			
 
				+
			
 
				+//tag::docsBucketWeeklyHistogramWithSpan-result[]
			
 
				+  hires_per_week:long    |    week:date
			
 
				+2                        |1985-02-18T00:00:00.000Z
			
 
				+1                        |1985-05-13T00:00:00.000Z
			
 
				+1                        |1985-07-08T00:00:00.000Z
			
 
				+1                        |1985-09-16T00:00:00.000Z
			
 
				+2                        |1985-10-14T00:00:00.000Z
			
 
				+4                        |1985-11-18T00:00:00.000Z
			
 
				+//end::docsBucketWeeklyHistogramWithSpan-result[]
			
 
				+;
			
 
				+
			
 
				 docsBucketLast24hr#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
 
				 //tag::docsBucketLast24hr[]
			
 
				 FROM sample_data 
			
@@ -218,17 +239,6 @@ FROM sample_data
 
				 //end::docsBucketLast24hr-result[]
			
 
				 ;
			
 
				 
			
 
				-docsGettingStartedBucket#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
 
				-// tag::gs-bucket[]
			
 
				-FROM sample_data
			
 
				-| STATS BY bucket = BUCKET(@timestamp, 24, "2023-10-23T00:00:00Z", NOW())
			
 
				-// end::gs-bucket[]
			
 
				-| LIMIT 0
			
 
				-;
			
 
				-
			
 
				-bucket:date
			
 
				-;
			
 
				-
			
 
				 docsGettingStartedBucketStatsBy#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
 
				 // tag::gs-bucket-stats-by[]
			
 
				 FROM sample_data
			
@@ -352,12 +362,15 @@ FROM employees
 
				 
			
 
				 // bucketing in span mode (identical results to above)
			
 
				 bucketNumericWithSpan#[skip:-8.13.99, reason:BUCKET extended in 8.14]
			
 
				+//tag::docsBucketNumericWithSpan[]
			
 
				 FROM employees
			
 
				 | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
			
 
				 | STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
			
 
				 | SORT b
			
 
				+//end::docsBucketNumericWithSpan[]
			
 
				 ;
			
 
				 
			
 
				+//tag::docsBucketNumericWithSpan-result[]
			
 
				        c:long  |       b:double
			
 
				 1              |25000.0
			
 
				 1              |30000.0
			
@@ -368,6 +381,7 @@ FROM employees
 
				 1              |60000.0
			
 
				 1              |65000.0
			
 
				 1              |70000.0
			
 
				+//end::docsBucketNumericWithSpan-result[]
			
 
				 ;
			
 
				 
			
 
				 bucketNumericMixedTypes#[skip:-8.13.99, reason:BUCKET extended in 8.14]
			
@@ -439,14 +453,28 @@ FROM employees
 
				 ;
			
 
				 
			
 
				 reuseGroupingFunctionWithExpression#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
 
				+//tag::reuseGroupingFunctionWithExpression[]
			
 
				 FROM employees
			
 
				-| STATS sum = BUCKET(salary % 2 + 13, 1.) + 1 BY bucket = BUCKET(salary % 2 + 13, 1.)
			
 
				-| SORT sum
			
 
				-;
			
 
				-
			
 
				- sum:double    | bucket:double
			
 
				-14.0           |13.0
			
 
				-15.0           |14.0
			
 
				+| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
			
 
				+| SORT b1, b2
			
 
				+| KEEP s1, b1, s2, b2
			
 
				+//end::reuseGroupingFunctionWithExpression[]
			
 
				+;
			
 
				+
			
 
				+//tag::reuseGroupingFunctionWithExpression-result[]
			
 
				+ s1:double | b1:double | s2:double   | b2:double
			
 
				+351.0      |350.0      |1002.0       |1000.0
			
 
				+401.0      |400.0      |1002.0       |1000.0
			
 
				+451.0      |450.0      |1002.0       |1000.0
			
 
				+501.0      |500.0      |1002.0       |1000.0
			
 
				+551.0      |550.0      |1002.0       |1000.0
			
 
				+601.0      |600.0      |1002.0       |1000.0
			
 
				+601.0      |600.0      |1052.0       |1050.0
			
 
				+651.0      |650.0      |1052.0       |1050.0
			
 
				+701.0      |700.0      |1052.0       |1050.0
			
 
				+751.0      |750.0      |1052.0       |1050.0
			
 
				+801.0      |800.0      |1052.0       |1050.0
			
 
				+//end::reuseGroupingFunctionWithExpression-result[]
			
 
				 ;
			
 
				 
			
 
				 reuseGroupingFunctionWithinAggs#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
@@ -92,6 +92,10 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
 
				         examples = {
			
 
				             @Example(
			
 
				                 description = """
			
 
				+                    `BUCKET` can work in two modes: one in which the size of the bucket is computed
			
 
				+                    based on a buckets count recommendation (four parameters) and a range and
			
 
				+                    another in which the bucket size is provided directly (two parameters).
			
 
				+
			
 
				                     Using a target number of buckets, a start of a range, and an end of a range,
			
 
				                     `BUCKET` picks an appropriate bucket size to generate the target number of buckets or fewer.
			
 
				                     For example, asking for at most 20 buckets over a year results in monthly buckets:""",
			
@@ -102,12 +106,12 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
 
				                     it's to pick a range that people are comfortable with that provides at most the target number of buckets."""
			
 
				             ),
			
 
				             @Example(
			
 
				-                description = "Combine `BUCKET` with <<esql-stats-by>> to create a histogram:",
			
 
				+                description = "Combine `BUCKET` with an <<esql-agg-functions,aggregation>> to create a histogram:",
			
 
				                 file = "bucket",
			
 
				                 tag = "docsBucketMonthlyHistogram",
			
 
				                 explanation = """
			
 
				                     NOTE: `BUCKET` does not create buckets that don't match any documents.
			
 
				-                    + "That's why this example is missing `1985-03-01` and other dates."""
			
 
				+                    That's why this example is missing `1985-03-01` and other dates."""
			
 
				             ),
			
 
				             @Example(
			
 
				                 description = """
			
@@ -120,6 +124,11 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
 
				                     For rows with a value outside of the range, it returns a bucket value that corresponds to a bucket outside the range.
			
 
				                     Combine`BUCKET` with <<esql-where>> to filter rows."""
			
 
				             ),
			
 
				+            @Example(description = """
			
 
				+                If the desired bucket size is known in advance, simply provide it as the second
			
 
				+                argument, leaving the range out:""", file = "bucket", tag = "docsBucketWeeklyHistogramWithSpan", explanation = """
			
 
				+                NOTE: When providing the bucket size as the second parameter, its type must be
			
 
				+                of a time duration or date period type."""),
			
 
				             @Example(
			
 
				                 description = "`BUCKET` can also operate on numeric fields. For example, to create a salary histogram:",
			
 
				                 file = "bucket",
			
@@ -128,6 +137,11 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional
 
				                     Unlike the earlier example that intentionally filters on a date range, you rarely want to filter on a numeric range.
			
 
				                     You have to find the `min` and `max` separately. {esql} doesn't yet have an easy way to do that automatically."""
			
 
				             ),
			
 
				+            @Example(description = """
			
 
				+                If the desired bucket size is known in advance, simply provide it as the second
			
 
				+                argument, leaving the range out:""", file = "bucket", tag = "docsBucketNumericWithSpan", explanation = """
			
 
				+                NOTE: When providing the bucket size as the second parameter, its type must be
			
 
				+                of a floating type."""),
			
 
				             @Example(
			
 
				                 description = "Create hourly buckets for the last 24 hours, and calculate the number of events per hour:",
			
 
				                 file = "bucket",