Selaa lähdekoodia

[8.19] Backport ES|QL sample agg function (#129627)

* ES|QL SAMPLE aggregation function (#127629)

* ES|QL SAMPLE aggregation function

* [CI] Auto commit changes from spotless

* ThreadLocalRandom -> SplittableRandom

* Update docs/changelog/127629.yaml

* fix yaml test

* Add SampleTests

* docs + example

* polish code

* mark generated imports

* comment with algorith description

* use Randomness.get()

* close properly

* type checks

* reuse hash

* regen some files

* [CI] Auto commit changes from spotless

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>

* Fix + unmute SampleTests (#127959)

* Fix memory tracking of ES|QL sample agg (#128467)

* Fix memory tracking of ES|QL sample agg

* [CI] Auto commit changes from spotless

* polish code

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>

* ESQL: Unclean generated imports (#127723)

This removes a ton of the tricky juggling we do for generated java files
to keep the imports in order. Instead, we just live with them being out
of order a little. It's not great, but it's so so so much easier than
the terrible juggling we had been doing.

* ESQL: Disable format checks on generated imports (#127648)

This builds the infrastructure to disable spotless and some checkstyle
rules on generated imports. This works around the most frustrating part
of ESQL's string template generated files - the imports. It allows
unused and out of order imports. This can let us remove a lot of
cumbersome, tricky, and fairly useless `$if$` blocks from the templates.

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Nik Everett <nik9000@gmail.com>
Jan Kuipers 4 kuukautta sitten
vanhempi
commit
2db9127320
100 muutettua tiedostoa jossa 4744 lisäystä ja 32 poistoa
  1. 5 0
      build-tools-internal/src/main/resources/checkstyle.xml
  2. 5 0
      docs/changelog/127629.yaml
  3. 5 0
      docs/reference/esql/functions/description/sample.asciidoc
  4. 13 0
      docs/reference/esql/functions/examples/sample.asciidoc
  5. 265 0
      docs/reference/esql/functions/kibana/definition/sample.json
  6. 11 0
      docs/reference/esql/functions/kibana/docs/sample.md
  7. 15 0
      docs/reference/esql/functions/layout/sample.asciidoc
  8. 9 0
      docs/reference/esql/functions/parameters/sample.asciidoc
  9. 1 0
      docs/reference/esql/functions/signature/sample.svg
  10. 22 0
      docs/reference/esql/functions/types/sample.asciidoc
  11. 1 1
      server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java
  12. 65 0
      test/framework/src/main/java/org/elasticsearch/test/MixWithIncrement.java
  13. 1 0
      x-pack/plugin/esql/build.gradle
  14. 29 1
      x-pack/plugin/esql/compute/build.gradle
  15. 208 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleBooleanAggregator.java
  16. 208 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleBytesRefAggregator.java
  17. 208 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleDoubleAggregator.java
  18. 208 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleIntAggregator.java
  19. 208 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleLongAggregator.java
  20. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java
  21. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java
  22. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java
  23. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlockBuilder.java
  24. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanLookup.java
  25. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java
  26. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java
  27. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java
  28. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java
  29. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlock.java
  30. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java
  31. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefLookup.java
  32. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java
  33. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java
  34. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java
  35. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java
  36. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java
  37. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantFloatVector.java
  38. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java
  39. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java
  40. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java
  41. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java
  42. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlock.java
  43. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlockBuilder.java
  44. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleLookup.java
  45. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java
  46. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java
  47. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatArrayBlock.java
  48. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatArrayVector.java
  49. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatBlock.java
  50. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatBlockBuilder.java
  51. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatLookup.java
  52. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatVector.java
  53. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatVectorBlock.java
  54. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java
  55. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java
  56. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java
  57. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java
  58. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntLookup.java
  59. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java
  60. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java
  61. 3 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java
  62. 5 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java
  63. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlock.java
  64. 4 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlockBuilder.java
  65. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongLookup.java
  66. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java
  67. 2 0
      x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java
  68. 167 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunction.java
  69. 50 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunctionSupplier.java
  70. 214 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanGroupingAggregatorFunction.java
  71. 171 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunction.java
  72. 50 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunctionSupplier.java
  73. 217 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefGroupingAggregatorFunction.java
  74. 168 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunction.java
  75. 50 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunctionSupplier.java
  76. 214 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleGroupingAggregatorFunction.java
  77. 168 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunction.java
  78. 50 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunctionSupplier.java
  79. 212 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntGroupingAggregatorFunction.java
  80. 168 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunction.java
  81. 50 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunctionSupplier.java
  82. 214 0
      x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongGroupingAggregatorFunction.java
  83. 208 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-SampleAggregator.java.st
  84. 3 5
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st
  85. 2 11
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st
  86. 2 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st
  87. 4 9
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st
  88. 2 4
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st
  89. 2 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Lookup.java.st
  90. 2 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st
  91. 2 0
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st
  92. 1 1
      x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoder.java
  93. 92 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunctionTests.java
  94. 99 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunctionTests.java
  95. 94 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunctionTests.java
  96. 91 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunctionTests.java
  97. 92 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunctionTests.java
  98. 5 0
      x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/SequenceBooleanBlockSourceOperator.java
  99. 249 0
      x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_sample.csv-spec
  100. 2 0
      x-pack/plugin/esql/src/main/generated-src/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceBooleanEvaluator.java

+ 5 - 0
build-tools-internal/src/main/resources/checkstyle.xml

@@ -68,6 +68,11 @@
 
     <!-- Unused imports are forbidden -->
     <module name="UnusedImports" />
+    <module name="SuppressionCommentFilter">
+      <property name="offCommentFormat" value="begin generated imports"/>
+      <property name="onCommentFormat" value="end generated imports"/>
+      <property name="checkFormat" value="UnusedImports"/>
+    </module>
 
     <!-- Non-inner classes must be in files that match their names. -->
     <module name="OuterTypeFilename" />

+ 5 - 0
docs/changelog/127629.yaml

@@ -0,0 +1,5 @@
+pr: 127629
+summary: ES|QL SAMPLE aggregation function
+area: Machine Learning
+type: feature
+issues: []

+ 5 - 0
docs/reference/esql/functions/description/sample.asciidoc

@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Collects sample values for a field.

+ 13 - 0
docs/reference/esql/functions/examples/sample.asciidoc

@@ -0,0 +1,13 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Example*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/stats_sample.csv-spec[tag=doc]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/stats_sample.csv-spec[tag=doc-result]
+|===
+

+ 265 - 0
docs/reference/esql/functions/kibana/definition/sample.json

@@ -0,0 +1,265 @@
+{
+  "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+  "type" : "agg",
+  "name" : "sample",
+  "description" : "Collects sample values for a field.",
+  "signatures" : [
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "boolean",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "boolean"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "cartesian_point",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "cartesian_point"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "cartesian_shape",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "cartesian_shape"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "date_nanos",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "date_nanos"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "double",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "double"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "geo_point",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "geo_point"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "geo_shape",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "geo_shape"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "integer"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "ip",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "ip"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "keyword",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "keyword"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "long",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "long"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "text",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "keyword"
+    },
+    {
+      "params" : [
+        {
+          "name" : "field",
+          "type" : "version",
+          "optional" : false,
+          "description" : "The field to collect sample values for."
+        },
+        {
+          "name" : "limit",
+          "type" : "integer",
+          "optional" : false,
+          "description" : "The maximum number of values to collect."
+        }
+      ],
+      "variadic" : false,
+      "returnType" : "version"
+    }
+  ],
+  "examples" : [
+    "FROM employees\n| STATS sample = SAMPLE(gender, 5)"
+  ],
+  "preview" : false,
+  "snapshot_only" : false
+}

+ 11 - 0
docs/reference/esql/functions/kibana/docs/sample.md

@@ -0,0 +1,11 @@
+<!--
+This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+-->
+
+### SAMPLE
+Collects sample values for a field.
+
+```
+FROM employees
+| STATS sample = SAMPLE(gender, 5)
+```

+ 15 - 0
docs/reference/esql/functions/layout/sample.asciidoc

@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-sample]]
+=== `SAMPLE`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/sample.svg[Embedded,opts=inline]
+
+include::../parameters/sample.asciidoc[]
+include::../description/sample.asciidoc[]
+include::../types/sample.asciidoc[]
+include::../examples/sample.asciidoc[]

+ 9 - 0
docs/reference/esql/functions/parameters/sample.asciidoc

@@ -0,0 +1,9 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`field`::
+The field to collect sample values for.
+
+`limit`::
+The maximum number of values to collect.

+ 1 - 0
docs/reference/esql/functions/signature/sample.svg

@@ -0,0 +1 @@
+<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="408" height="46" viewbox="0 0 408 46"><defs><style type="text/css">#guide .c{fill:none;stroke:#222222;}#guide .k{fill:#000000;font-family:Roboto Mono,Sans-serif;font-size:20px;}#guide .s{fill:#e4f4ff;stroke:#222222;}#guide .syn{fill:#8D8D8D;font-family:Roboto Mono,Sans-serif;font-size:20px;}</style></defs><path class="c" d="M0 31h5m92 0h10m32 0h10m80 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="92" height="36"/><text class="k" x="15" y="31">SAMPLE</text><rect class="s" x="107" y="5" width="32" height="36" rx="7"/><text class="syn" x="117" y="31">(</text><rect class="s" x="149" y="5" width="80" height="36" rx="7"/><text class="k" x="159" y="31">field</text><rect class="s" x="239" y="5" width="32" height="36" rx="7"/><text class="syn" x="249" y="31">,</text><rect class="s" x="281" y="5" width="80" height="36" rx="7"/><text class="k" x="291" y="31">limit</text><rect class="s" x="371" y="5" width="32" height="36" rx="7"/><text class="syn" x="381" y="31">)</text></svg>

+ 22 - 0
docs/reference/esql/functions/types/sample.asciidoc

@@ -0,0 +1,22 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+field | limit | result
+boolean | integer | boolean
+cartesian_point | integer | cartesian_point
+cartesian_shape | integer | cartesian_shape
+date | integer | date
+date_nanos | integer | date_nanos
+double | integer | double
+geo_point | integer | geo_point
+geo_shape | integer | geo_shape
+integer | integer | integer
+ip | integer | ip
+keyword | integer | keyword
+long | integer | long
+text | integer | keyword
+version | integer | version
+|===

+ 1 - 1
server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java

@@ -81,7 +81,7 @@ public enum MurmurHash3 {
     private static long C1 = 0x87c37b91114253d5L;
     private static long C2 = 0x4cf5ad432745937fL;
 
-    protected static long fmix(long k) {
+    public static long fmix(long k) {
         k ^= k >>> 33;
         k *= 0xff51afd7ed558ccdL;
         k ^= k >>> 33;

+ 65 - 0
test/framework/src/main/java/org/elasticsearch/test/MixWithIncrement.java

@@ -0,0 +1,65 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.test;
+
+import com.carrotsearch.randomizedtesting.SeedDecorator;
+
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.hash.MurmurHash3;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * The {@link Randomness} class creates random generators with the same seed
+ * in every thread.
+ * <p>
+ * This means that repeatedly calling:
+ * <pre>
+ *   {@code
+ *     new Thread(() -> System.out.println(Randomness.get().nextInt())).start();
+ *   }
+ * </pre>
+ * will print the same number in every thread.
+ * <p>
+ * For some use cases, this is not desirable, e.g. when testing that the random
+ * behavior obeys certain statistical properties.
+ * <p>
+ * To fix this, annotate a test class with:
+ * <pre>
+ *   {@code
+ *     @SeedDecorators(MixWithIncrement.class)
+ *   }
+ * </pre>
+ * In this way, an additional seed is mixed into the seed of the random generators.
+ * This additional seed can be updated be calling:
+ * <pre>
+ *   {@code
+ *     MixWithIncrement.next()
+ *   }
+ * </pre>
+ * to make sure that new threads will get a different seed.
+ */
+public class MixWithIncrement implements SeedDecorator {
+
+    private static final AtomicLong mix = new AtomicLong(1);
+
+    @Override
+    public void initialize(Class<?> aClass) {
+        next();
+    }
+
+    public long decorate(long seed) {
+        return seed ^ mix.get();
+    }
+
+    public static void next() {
+        mix.updateAndGet(MurmurHash3::fmix);
+    }
+}

+ 1 - 0
x-pack/plugin/esql/build.gradle

@@ -183,6 +183,7 @@ pluginManager.withPlugin('com.diffplug.spotless') {
         "src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser*.java",
         "src/main/generated/**/*.java",
         "src/main/generated-src/generated/**/*.java"
+      toggleOffOn('begin generated imports', 'end generated imports')
     }
   }
 }

+ 29 - 1
x-pack/plugin/esql/compute/build.gradle

@@ -49,6 +49,7 @@ spotless {
     /*
      * Generated files go here.
      */
+    toggleOffOn('begin generated imports', 'end generated imports')
     targetExclude "src/main/generated/**/*.java"
   }
 }
@@ -92,7 +93,7 @@ tasks.named('stringTemplates').configure {
     "",
     "BYTES_REF",
     "org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF",
-    "",
+    "BytesRefArray",
     "BytesRefHash"
   )
   var ipProperties = prop("Ip", "BytesRef", "BytesRef", "", "BYTES_REF", "16", "", "")
@@ -640,6 +641,33 @@ tasks.named('stringTemplates').configure {
     it.outputFile = "org/elasticsearch/compute/aggregation/StdDevDoubleAggregator.java"
   }
 
+  File sampleAggregatorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/aggregation/X-SampleAggregator.java.st")
+  template {
+    it.properties = booleanProperties
+    it.inputFile = sampleAggregatorInputFile
+    it.outputFile = "org/elasticsearch/compute/aggregation/SampleBooleanAggregator.java"
+  }
+  template {
+    it.properties = bytesRefProperties
+    it.inputFile = sampleAggregatorInputFile
+    it.outputFile = "org/elasticsearch/compute/aggregation/SampleBytesRefAggregator.java"
+  }
+  template {
+    it.properties = doubleProperties
+    it.inputFile = sampleAggregatorInputFile
+    it.outputFile = "org/elasticsearch/compute/aggregation/SampleDoubleAggregator.java"
+  }
+  template {
+    it.properties = intProperties
+    it.inputFile = sampleAggregatorInputFile
+    it.outputFile = "org/elasticsearch/compute/aggregation/SampleIntAggregator.java"
+  }
+  template {
+    it.properties = longProperties
+    it.inputFile = sampleAggregatorInputFile
+    it.outputFile = "org/elasticsearch/compute/aggregation/SampleLongAggregator.java"
+  }
+
   File topAggregatorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st")
   template {
     it.properties = intProperties

+ 208 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleBooleanAggregator.java

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for boolean.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class SampleBooleanAggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, boolean value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, boolean value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            BooleanBlock.Builder booleanBlock = driverContext.blockFactory().newBooleanBlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    booleanBlock.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        booleanBlock.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        booleanBlock.appendBoolean(ENCODER.decodeBoolean(value));
+                    }
+                    if (valueCount > 1) {
+                        booleanBlock.endPositionEntry();
+                    }
+                }
+            }
+            return booleanBlock.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, boolean value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encodeBoolean(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add(boolean value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 208 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleBytesRefAggregator.java

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for BytesRef.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class SampleBytesRefAggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, BytesRef value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, BytesRef value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            BytesRefBlock.Builder BytesRefBlock = driverContext.blockFactory().newBytesRefBlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    BytesRefBlock.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        BytesRefBlock.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        BytesRefBlock.appendBytesRef(ENCODER.decodeBytesRef(value, scratch));
+                    }
+                    if (valueCount > 1) {
+                        BytesRefBlock.endPositionEntry();
+                    }
+                }
+            }
+            return BytesRefBlock.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, BytesRef value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encodeBytesRef(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add(BytesRef value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 208 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleDoubleAggregator.java

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for double.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class SampleDoubleAggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, double value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, double value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            DoubleBlock.Builder doubleBlock = driverContext.blockFactory().newDoubleBlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    doubleBlock.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        doubleBlock.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        doubleBlock.appendDouble(ENCODER.decodeDouble(value));
+                    }
+                    if (valueCount > 1) {
+                        doubleBlock.endPositionEntry();
+                    }
+                }
+            }
+            return doubleBlock.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, double value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encodeDouble(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add(double value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 208 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleIntAggregator.java

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for int.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class SampleIntAggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, int value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, int value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            IntBlock.Builder intBlock = driverContext.blockFactory().newIntBlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    intBlock.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        intBlock.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        intBlock.appendInt(ENCODER.decodeInt(value));
+                    }
+                    if (valueCount > 1) {
+                        intBlock.endPositionEntry();
+                    }
+                }
+            }
+            return intBlock.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, int value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encodeInt(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add(int value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 208 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/SampleLongAggregator.java

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for long.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class SampleLongAggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, long value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, long value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            LongBlock.Builder longBlock = driverContext.blockFactory().newLongBlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    longBlock.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        longBlock.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        longBlock.appendLong(ENCODER.decodeLong(value));
+                    }
+                    if (valueCount > 1) {
+                        longBlock.endPositionEntry();
+                    }
+                }
+            }
+            return longBlock.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, long value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encodeLong(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add(long value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java

@@ -7,14 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link BooleanArrayVector}.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java

@@ -7,15 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of boolean values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -14,6 +15,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores boolean values.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlockBuilder.java

@@ -7,12 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
+// end generated imports
 
 /**
  * Block build of BooleanBlocks.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanLookup.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link BooleanBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores boolean values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link BooleanVector}. Cannot represent multi-values or nulls.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -17,6 +18,7 @@ import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link BytesRefArrayVector}.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -17,6 +18,9 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of BytesRef values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -15,6 +16,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores BytesRef values.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefBlockBuilder.java

@@ -7,12 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.Releasables;
 
+import java.util.Arrays;
+// end generated imports
+
 /**
  * Block build of BytesRefBlocks.
  * This class is generated. Edit {@code X-BlockBuilder.java.st} instead.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefLookup.java

@@ -7,11 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link BytesRefBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -15,6 +16,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores BytesRef values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link BytesRefVector}. Cannot represent multi-values or nulls.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java

@@ -7,9 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant boolean value.

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java

@@ -7,11 +7,14 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant BytesRef value.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java

@@ -7,9 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant double value.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantFloatVector.java

@@ -7,9 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant float value.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java

@@ -7,9 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant int value.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java

@@ -7,9 +7,13 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
 /**
  * Vector implementation that stores a constant long value.

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java

@@ -7,14 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link DoubleArrayVector}.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java

@@ -7,15 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of double values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -14,6 +15,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores double values.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBlockBuilder.java

@@ -7,12 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
+// end generated imports
 
 /**
  * Block build of DoubleBlocks.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleLookup.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link DoubleBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores double values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link DoubleVector}. Cannot represent multi-values or nulls.

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatArrayBlock.java

@@ -7,14 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link FloatArrayVector}.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatArrayVector.java

@@ -7,15 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of float values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -14,6 +15,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores float values.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatBlockBuilder.java

@@ -7,12 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
+// end generated imports
 
 /**
  * Block build of FloatBlocks.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatLookup.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link FloatBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores float values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatVectorBlock.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link FloatVector}. Cannot represent multi-values or nulls.

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java

@@ -7,14 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link IntArrayVector}.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java

@@ -7,15 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of int values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -14,6 +15,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores int values.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBlockBuilder.java

@@ -7,12 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
+// end generated imports
 
 /**
  * Block build of IntBlocks.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntLookup.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link IntBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores int values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link IntVector}. Cannot represent multi-values or nulls.

+ 3 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java

@@ -7,14 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link LongArrayVector}.

+ 5 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java

@@ -7,15 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
+import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * Vector implementation that stores an array of long values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlock.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
@@ -14,6 +15,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores long values.

+ 4 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBlockBuilder.java

@@ -7,12 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
+// end generated imports
 
 /**
  * Block build of LongBlocks.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongLookup.java

@@ -7,10 +7,12 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link LongBlock}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores long values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link LongVector}. Cannot represent multi-values or nulls.

+ 167 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunction.java

@@ -0,0 +1,167 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunction} implementation for {@link SampleBooleanAggregator}.
+ * This class is generated. Edit {@code AggregatorImplementer} instead.
+ */
+public final class SampleBooleanAggregatorFunction implements AggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final DriverContext driverContext;
+
+  private final SampleBooleanAggregator.SingleState state;
+
+  private final List<Integer> channels;
+
+  private final int limit;
+
+  public SampleBooleanAggregatorFunction(DriverContext driverContext, List<Integer> channels,
+      SampleBooleanAggregator.SingleState state, int limit) {
+    this.driverContext = driverContext;
+    this.channels = channels;
+    this.state = state;
+    this.limit = limit;
+  }
+
+  public static SampleBooleanAggregatorFunction create(DriverContext driverContext,
+      List<Integer> channels, int limit) {
+    return new SampleBooleanAggregatorFunction(driverContext, channels, SampleBooleanAggregator.initSingle(driverContext.bigArrays(), limit), limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public void addRawInput(Page page, BooleanVector mask) {
+    if (mask.allFalse()) {
+      // Entire page masked away
+      return;
+    }
+    if (mask.allTrue()) {
+      // No masking
+      BooleanBlock block = page.getBlock(channels.get(0));
+      BooleanVector vector = block.asVector();
+      if (vector != null) {
+        addRawVector(vector);
+      } else {
+        addRawBlock(block);
+      }
+      return;
+    }
+    // Some positions masked away, others kept
+    BooleanBlock block = page.getBlock(channels.get(0));
+    BooleanVector vector = block.asVector();
+    if (vector != null) {
+      addRawVector(vector, mask);
+    } else {
+      addRawBlock(block, mask);
+    }
+  }
+
+  private void addRawVector(BooleanVector vector) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      SampleBooleanAggregator.combine(state, vector.getBoolean(i));
+    }
+  }
+
+  private void addRawVector(BooleanVector vector, BooleanVector mask) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      if (mask.getBoolean(i) == false) {
+        continue;
+      }
+      SampleBooleanAggregator.combine(state, vector.getBoolean(i));
+    }
+  }
+
+  private void addRawBlock(BooleanBlock block) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleBooleanAggregator.combine(state, block.getBoolean(i));
+      }
+    }
+  }
+
+  private void addRawBlock(BooleanBlock block, BooleanVector mask) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (mask.getBoolean(p) == false) {
+        continue;
+      }
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleBooleanAggregator.combine(state, block.getBoolean(i));
+      }
+    }
+  }
+
+  @Override
+  public void addIntermediateInput(Page page) {
+    assert channels.size() == intermediateBlockCount();
+    assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    assert sample.getPositionCount() == 1;
+    BytesRef scratch = new BytesRef();
+    SampleBooleanAggregator.combineIntermediate(state, sample);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+    state.toIntermediate(blocks, offset, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) {
+    blocks[offset] = SampleBooleanAggregator.evaluateFinal(state, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 50 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunctionSupplier.java

@@ -0,0 +1,50 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.util.List;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunctionSupplier} implementation for {@link SampleBooleanAggregator}.
+ * This class is generated. Edit {@code AggregatorFunctionSupplierImplementer} instead.
+ */
+public final class SampleBooleanAggregatorFunctionSupplier implements AggregatorFunctionSupplier {
+  private final int limit;
+
+  public SampleBooleanAggregatorFunctionSupplier(int limit) {
+    this.limit = limit;
+  }
+
+  @Override
+  public List<IntermediateStateDesc> nonGroupingIntermediateStateDesc() {
+    return SampleBooleanAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public List<IntermediateStateDesc> groupingIntermediateStateDesc() {
+    return SampleBooleanGroupingAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public SampleBooleanAggregatorFunction aggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleBooleanAggregatorFunction.create(driverContext, channels, limit);
+  }
+
+  @Override
+  public SampleBooleanGroupingAggregatorFunction groupingAggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleBooleanGroupingAggregatorFunction.create(channels, driverContext, limit);
+  }
+
+  @Override
+  public String describe() {
+    return "sample of booleans";
+  }
+}

+ 214 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBooleanGroupingAggregatorFunction.java

@@ -0,0 +1,214 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link GroupingAggregatorFunction} implementation for {@link SampleBooleanAggregator}.
+ * This class is generated. Edit {@code GroupingAggregatorImplementer} instead.
+ */
+public final class SampleBooleanGroupingAggregatorFunction implements GroupingAggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final SampleBooleanAggregator.GroupingState state;
+
+  private final List<Integer> channels;
+
+  private final DriverContext driverContext;
+
+  private final int limit;
+
+  public SampleBooleanGroupingAggregatorFunction(List<Integer> channels,
+      SampleBooleanAggregator.GroupingState state, DriverContext driverContext, int limit) {
+    this.channels = channels;
+    this.state = state;
+    this.driverContext = driverContext;
+    this.limit = limit;
+  }
+
+  public static SampleBooleanGroupingAggregatorFunction create(List<Integer> channels,
+      DriverContext driverContext, int limit) {
+    return new SampleBooleanGroupingAggregatorFunction(channels, SampleBooleanAggregator.initGrouping(driverContext.bigArrays(), limit), driverContext, limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds,
+      Page page) {
+    BooleanBlock valuesBlock = page.getBlock(channels.get(0));
+    BooleanVector valuesVector = valuesBlock.asVector();
+    if (valuesVector == null) {
+      if (valuesBlock.mayHaveNulls()) {
+        state.enableGroupIdTracking(seenGroupIds);
+      }
+      return new GroupingAggregatorFunction.AddInput() {
+        @Override
+        public void add(int positionOffset, IntBlock groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void add(int positionOffset, IntVector groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void close() {
+        }
+      };
+    }
+    return new GroupingAggregatorFunction.AddInput() {
+      @Override
+      public void add(int positionOffset, IntBlock groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void add(int positionOffset, IntVector groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, BooleanBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      if (values.isNull(groupPosition + positionOffset)) {
+        continue;
+      }
+      int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+      int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+      for (int v = valuesStart; v < valuesEnd; v++) {
+        SampleBooleanAggregator.combine(state, groupId, values.getBoolean(v));
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, BooleanVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleBooleanAggregator.combine(state, groupId, values.getBoolean(groupPosition + positionOffset));
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, BooleanBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        if (values.isNull(groupPosition + positionOffset)) {
+          continue;
+        }
+        int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+        int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+        for (int v = valuesStart; v < valuesEnd; v++) {
+          SampleBooleanAggregator.combine(state, groupId, values.getBoolean(v));
+        }
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, BooleanVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        SampleBooleanAggregator.combine(state, groupId, values.getBoolean(groupPosition + positionOffset));
+      }
+    }
+  }
+
+  @Override
+  public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {
+    state.enableGroupIdTracking(seenGroupIds);
+  }
+
+  @Override
+  public void addIntermediateInput(int positionOffset, IntVector groups, Page page) {
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    assert channels.size() == intermediateBlockCount();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleBooleanAggregator.combineIntermediate(state, groupId, sample, groupPosition + positionOffset);
+    }
+  }
+
+  @Override
+  public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) {
+    if (input.getClass() != getClass()) {
+      throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass());
+    }
+    SampleBooleanAggregator.GroupingState inState = ((SampleBooleanGroupingAggregatorFunction) input).state;
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    SampleBooleanAggregator.combineStates(state, groupId, inState, position);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) {
+    state.toIntermediate(blocks, offset, selected, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, IntVector selected,
+      DriverContext driverContext) {
+    blocks[offset] = SampleBooleanAggregator.evaluateFinal(state, selected, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 171 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunction.java

@@ -0,0 +1,171 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunction} implementation for {@link SampleBytesRefAggregator}.
+ * This class is generated. Edit {@code AggregatorImplementer} instead.
+ */
+public final class SampleBytesRefAggregatorFunction implements AggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final DriverContext driverContext;
+
+  private final SampleBytesRefAggregator.SingleState state;
+
+  private final List<Integer> channels;
+
+  private final int limit;
+
+  public SampleBytesRefAggregatorFunction(DriverContext driverContext, List<Integer> channels,
+      SampleBytesRefAggregator.SingleState state, int limit) {
+    this.driverContext = driverContext;
+    this.channels = channels;
+    this.state = state;
+    this.limit = limit;
+  }
+
+  public static SampleBytesRefAggregatorFunction create(DriverContext driverContext,
+      List<Integer> channels, int limit) {
+    return new SampleBytesRefAggregatorFunction(driverContext, channels, SampleBytesRefAggregator.initSingle(driverContext.bigArrays(), limit), limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public void addRawInput(Page page, BooleanVector mask) {
+    if (mask.allFalse()) {
+      // Entire page masked away
+      return;
+    }
+    if (mask.allTrue()) {
+      // No masking
+      BytesRefBlock block = page.getBlock(channels.get(0));
+      BytesRefVector vector = block.asVector();
+      if (vector != null) {
+        addRawVector(vector);
+      } else {
+        addRawBlock(block);
+      }
+      return;
+    }
+    // Some positions masked away, others kept
+    BytesRefBlock block = page.getBlock(channels.get(0));
+    BytesRefVector vector = block.asVector();
+    if (vector != null) {
+      addRawVector(vector, mask);
+    } else {
+      addRawBlock(block, mask);
+    }
+  }
+
+  private void addRawVector(BytesRefVector vector) {
+    BytesRef scratch = new BytesRef();
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      SampleBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch));
+    }
+  }
+
+  private void addRawVector(BytesRefVector vector, BooleanVector mask) {
+    BytesRef scratch = new BytesRef();
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      if (mask.getBoolean(i) == false) {
+        continue;
+      }
+      SampleBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch));
+    }
+  }
+
+  private void addRawBlock(BytesRefBlock block) {
+    BytesRef scratch = new BytesRef();
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleBytesRefAggregator.combine(state, block.getBytesRef(i, scratch));
+      }
+    }
+  }
+
+  private void addRawBlock(BytesRefBlock block, BooleanVector mask) {
+    BytesRef scratch = new BytesRef();
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (mask.getBoolean(p) == false) {
+        continue;
+      }
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleBytesRefAggregator.combine(state, block.getBytesRef(i, scratch));
+      }
+    }
+  }
+
+  @Override
+  public void addIntermediateInput(Page page) {
+    assert channels.size() == intermediateBlockCount();
+    assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    assert sample.getPositionCount() == 1;
+    BytesRef scratch = new BytesRef();
+    SampleBytesRefAggregator.combineIntermediate(state, sample);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+    state.toIntermediate(blocks, offset, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) {
+    blocks[offset] = SampleBytesRefAggregator.evaluateFinal(state, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 50 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunctionSupplier.java

@@ -0,0 +1,50 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.util.List;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunctionSupplier} implementation for {@link SampleBytesRefAggregator}.
+ * This class is generated. Edit {@code AggregatorFunctionSupplierImplementer} instead.
+ */
+public final class SampleBytesRefAggregatorFunctionSupplier implements AggregatorFunctionSupplier {
+  private final int limit;
+
+  public SampleBytesRefAggregatorFunctionSupplier(int limit) {
+    this.limit = limit;
+  }
+
+  @Override
+  public List<IntermediateStateDesc> nonGroupingIntermediateStateDesc() {
+    return SampleBytesRefAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public List<IntermediateStateDesc> groupingIntermediateStateDesc() {
+    return SampleBytesRefGroupingAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public SampleBytesRefAggregatorFunction aggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleBytesRefAggregatorFunction.create(driverContext, channels, limit);
+  }
+
+  @Override
+  public SampleBytesRefGroupingAggregatorFunction groupingAggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleBytesRefGroupingAggregatorFunction.create(channels, driverContext, limit);
+  }
+
+  @Override
+  public String describe() {
+    return "sample of bytes";
+  }
+}

+ 217 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleBytesRefGroupingAggregatorFunction.java

@@ -0,0 +1,217 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link GroupingAggregatorFunction} implementation for {@link SampleBytesRefAggregator}.
+ * This class is generated. Edit {@code GroupingAggregatorImplementer} instead.
+ */
+public final class SampleBytesRefGroupingAggregatorFunction implements GroupingAggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final SampleBytesRefAggregator.GroupingState state;
+
+  private final List<Integer> channels;
+
+  private final DriverContext driverContext;
+
+  private final int limit;
+
+  public SampleBytesRefGroupingAggregatorFunction(List<Integer> channels,
+      SampleBytesRefAggregator.GroupingState state, DriverContext driverContext, int limit) {
+    this.channels = channels;
+    this.state = state;
+    this.driverContext = driverContext;
+    this.limit = limit;
+  }
+
+  public static SampleBytesRefGroupingAggregatorFunction create(List<Integer> channels,
+      DriverContext driverContext, int limit) {
+    return new SampleBytesRefGroupingAggregatorFunction(channels, SampleBytesRefAggregator.initGrouping(driverContext.bigArrays(), limit), driverContext, limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds,
+      Page page) {
+    BytesRefBlock valuesBlock = page.getBlock(channels.get(0));
+    BytesRefVector valuesVector = valuesBlock.asVector();
+    if (valuesVector == null) {
+      if (valuesBlock.mayHaveNulls()) {
+        state.enableGroupIdTracking(seenGroupIds);
+      }
+      return new GroupingAggregatorFunction.AddInput() {
+        @Override
+        public void add(int positionOffset, IntBlock groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void add(int positionOffset, IntVector groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void close() {
+        }
+      };
+    }
+    return new GroupingAggregatorFunction.AddInput() {
+      @Override
+      public void add(int positionOffset, IntBlock groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void add(int positionOffset, IntVector groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, BytesRefBlock values) {
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      if (values.isNull(groupPosition + positionOffset)) {
+        continue;
+      }
+      int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+      int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+      for (int v = valuesStart; v < valuesEnd; v++) {
+        SampleBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch));
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, BytesRefVector values) {
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch));
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, BytesRefBlock values) {
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        if (values.isNull(groupPosition + positionOffset)) {
+          continue;
+        }
+        int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+        int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+        for (int v = valuesStart; v < valuesEnd; v++) {
+          SampleBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch));
+        }
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, BytesRefVector values) {
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        SampleBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch));
+      }
+    }
+  }
+
+  @Override
+  public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {
+    state.enableGroupIdTracking(seenGroupIds);
+  }
+
+  @Override
+  public void addIntermediateInput(int positionOffset, IntVector groups, Page page) {
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    assert channels.size() == intermediateBlockCount();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleBytesRefAggregator.combineIntermediate(state, groupId, sample, groupPosition + positionOffset);
+    }
+  }
+
+  @Override
+  public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) {
+    if (input.getClass() != getClass()) {
+      throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass());
+    }
+    SampleBytesRefAggregator.GroupingState inState = ((SampleBytesRefGroupingAggregatorFunction) input).state;
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    SampleBytesRefAggregator.combineStates(state, groupId, inState, position);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) {
+    state.toIntermediate(blocks, offset, selected, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, IntVector selected,
+      DriverContext driverContext) {
+    blocks[offset] = SampleBytesRefAggregator.evaluateFinal(state, selected, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 168 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunction.java

@@ -0,0 +1,168 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.DoubleVector;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunction} implementation for {@link SampleDoubleAggregator}.
+ * This class is generated. Edit {@code AggregatorImplementer} instead.
+ */
+public final class SampleDoubleAggregatorFunction implements AggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final DriverContext driverContext;
+
+  private final SampleDoubleAggregator.SingleState state;
+
+  private final List<Integer> channels;
+
+  private final int limit;
+
+  public SampleDoubleAggregatorFunction(DriverContext driverContext, List<Integer> channels,
+      SampleDoubleAggregator.SingleState state, int limit) {
+    this.driverContext = driverContext;
+    this.channels = channels;
+    this.state = state;
+    this.limit = limit;
+  }
+
+  public static SampleDoubleAggregatorFunction create(DriverContext driverContext,
+      List<Integer> channels, int limit) {
+    return new SampleDoubleAggregatorFunction(driverContext, channels, SampleDoubleAggregator.initSingle(driverContext.bigArrays(), limit), limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public void addRawInput(Page page, BooleanVector mask) {
+    if (mask.allFalse()) {
+      // Entire page masked away
+      return;
+    }
+    if (mask.allTrue()) {
+      // No masking
+      DoubleBlock block = page.getBlock(channels.get(0));
+      DoubleVector vector = block.asVector();
+      if (vector != null) {
+        addRawVector(vector);
+      } else {
+        addRawBlock(block);
+      }
+      return;
+    }
+    // Some positions masked away, others kept
+    DoubleBlock block = page.getBlock(channels.get(0));
+    DoubleVector vector = block.asVector();
+    if (vector != null) {
+      addRawVector(vector, mask);
+    } else {
+      addRawBlock(block, mask);
+    }
+  }
+
+  private void addRawVector(DoubleVector vector) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      SampleDoubleAggregator.combine(state, vector.getDouble(i));
+    }
+  }
+
+  private void addRawVector(DoubleVector vector, BooleanVector mask) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      if (mask.getBoolean(i) == false) {
+        continue;
+      }
+      SampleDoubleAggregator.combine(state, vector.getDouble(i));
+    }
+  }
+
+  private void addRawBlock(DoubleBlock block) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleDoubleAggregator.combine(state, block.getDouble(i));
+      }
+    }
+  }
+
+  private void addRawBlock(DoubleBlock block, BooleanVector mask) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (mask.getBoolean(p) == false) {
+        continue;
+      }
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleDoubleAggregator.combine(state, block.getDouble(i));
+      }
+    }
+  }
+
+  @Override
+  public void addIntermediateInput(Page page) {
+    assert channels.size() == intermediateBlockCount();
+    assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    assert sample.getPositionCount() == 1;
+    BytesRef scratch = new BytesRef();
+    SampleDoubleAggregator.combineIntermediate(state, sample);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+    state.toIntermediate(blocks, offset, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) {
+    blocks[offset] = SampleDoubleAggregator.evaluateFinal(state, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 50 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunctionSupplier.java

@@ -0,0 +1,50 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.util.List;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunctionSupplier} implementation for {@link SampleDoubleAggregator}.
+ * This class is generated. Edit {@code AggregatorFunctionSupplierImplementer} instead.
+ */
+public final class SampleDoubleAggregatorFunctionSupplier implements AggregatorFunctionSupplier {
+  private final int limit;
+
+  public SampleDoubleAggregatorFunctionSupplier(int limit) {
+    this.limit = limit;
+  }
+
+  @Override
+  public List<IntermediateStateDesc> nonGroupingIntermediateStateDesc() {
+    return SampleDoubleAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public List<IntermediateStateDesc> groupingIntermediateStateDesc() {
+    return SampleDoubleGroupingAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public SampleDoubleAggregatorFunction aggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleDoubleAggregatorFunction.create(driverContext, channels, limit);
+  }
+
+  @Override
+  public SampleDoubleGroupingAggregatorFunction groupingAggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleDoubleGroupingAggregatorFunction.create(channels, driverContext, limit);
+  }
+
+  @Override
+  public String describe() {
+    return "sample of doubles";
+  }
+}

+ 214 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleDoubleGroupingAggregatorFunction.java

@@ -0,0 +1,214 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.DoubleVector;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link GroupingAggregatorFunction} implementation for {@link SampleDoubleAggregator}.
+ * This class is generated. Edit {@code GroupingAggregatorImplementer} instead.
+ */
+public final class SampleDoubleGroupingAggregatorFunction implements GroupingAggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final SampleDoubleAggregator.GroupingState state;
+
+  private final List<Integer> channels;
+
+  private final DriverContext driverContext;
+
+  private final int limit;
+
+  public SampleDoubleGroupingAggregatorFunction(List<Integer> channels,
+      SampleDoubleAggregator.GroupingState state, DriverContext driverContext, int limit) {
+    this.channels = channels;
+    this.state = state;
+    this.driverContext = driverContext;
+    this.limit = limit;
+  }
+
+  public static SampleDoubleGroupingAggregatorFunction create(List<Integer> channels,
+      DriverContext driverContext, int limit) {
+    return new SampleDoubleGroupingAggregatorFunction(channels, SampleDoubleAggregator.initGrouping(driverContext.bigArrays(), limit), driverContext, limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds,
+      Page page) {
+    DoubleBlock valuesBlock = page.getBlock(channels.get(0));
+    DoubleVector valuesVector = valuesBlock.asVector();
+    if (valuesVector == null) {
+      if (valuesBlock.mayHaveNulls()) {
+        state.enableGroupIdTracking(seenGroupIds);
+      }
+      return new GroupingAggregatorFunction.AddInput() {
+        @Override
+        public void add(int positionOffset, IntBlock groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void add(int positionOffset, IntVector groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void close() {
+        }
+      };
+    }
+    return new GroupingAggregatorFunction.AddInput() {
+      @Override
+      public void add(int positionOffset, IntBlock groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void add(int positionOffset, IntVector groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, DoubleBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      if (values.isNull(groupPosition + positionOffset)) {
+        continue;
+      }
+      int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+      int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+      for (int v = valuesStart; v < valuesEnd; v++) {
+        SampleDoubleAggregator.combine(state, groupId, values.getDouble(v));
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, DoubleVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleDoubleAggregator.combine(state, groupId, values.getDouble(groupPosition + positionOffset));
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, DoubleBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        if (values.isNull(groupPosition + positionOffset)) {
+          continue;
+        }
+        int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+        int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+        for (int v = valuesStart; v < valuesEnd; v++) {
+          SampleDoubleAggregator.combine(state, groupId, values.getDouble(v));
+        }
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, DoubleVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        SampleDoubleAggregator.combine(state, groupId, values.getDouble(groupPosition + positionOffset));
+      }
+    }
+  }
+
+  @Override
+  public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {
+    state.enableGroupIdTracking(seenGroupIds);
+  }
+
+  @Override
+  public void addIntermediateInput(int positionOffset, IntVector groups, Page page) {
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    assert channels.size() == intermediateBlockCount();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleDoubleAggregator.combineIntermediate(state, groupId, sample, groupPosition + positionOffset);
+    }
+  }
+
+  @Override
+  public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) {
+    if (input.getClass() != getClass()) {
+      throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass());
+    }
+    SampleDoubleAggregator.GroupingState inState = ((SampleDoubleGroupingAggregatorFunction) input).state;
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    SampleDoubleAggregator.combineStates(state, groupId, inState, position);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) {
+    state.toIntermediate(blocks, offset, selected, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, IntVector selected,
+      DriverContext driverContext) {
+    blocks[offset] = SampleDoubleAggregator.evaluateFinal(state, selected, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 168 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunction.java

@@ -0,0 +1,168 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunction} implementation for {@link SampleIntAggregator}.
+ * This class is generated. Edit {@code AggregatorImplementer} instead.
+ */
+public final class SampleIntAggregatorFunction implements AggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final DriverContext driverContext;
+
+  private final SampleIntAggregator.SingleState state;
+
+  private final List<Integer> channels;
+
+  private final int limit;
+
+  public SampleIntAggregatorFunction(DriverContext driverContext, List<Integer> channels,
+      SampleIntAggregator.SingleState state, int limit) {
+    this.driverContext = driverContext;
+    this.channels = channels;
+    this.state = state;
+    this.limit = limit;
+  }
+
+  public static SampleIntAggregatorFunction create(DriverContext driverContext,
+      List<Integer> channels, int limit) {
+    return new SampleIntAggregatorFunction(driverContext, channels, SampleIntAggregator.initSingle(driverContext.bigArrays(), limit), limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public void addRawInput(Page page, BooleanVector mask) {
+    if (mask.allFalse()) {
+      // Entire page masked away
+      return;
+    }
+    if (mask.allTrue()) {
+      // No masking
+      IntBlock block = page.getBlock(channels.get(0));
+      IntVector vector = block.asVector();
+      if (vector != null) {
+        addRawVector(vector);
+      } else {
+        addRawBlock(block);
+      }
+      return;
+    }
+    // Some positions masked away, others kept
+    IntBlock block = page.getBlock(channels.get(0));
+    IntVector vector = block.asVector();
+    if (vector != null) {
+      addRawVector(vector, mask);
+    } else {
+      addRawBlock(block, mask);
+    }
+  }
+
+  private void addRawVector(IntVector vector) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      SampleIntAggregator.combine(state, vector.getInt(i));
+    }
+  }
+
+  private void addRawVector(IntVector vector, BooleanVector mask) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      if (mask.getBoolean(i) == false) {
+        continue;
+      }
+      SampleIntAggregator.combine(state, vector.getInt(i));
+    }
+  }
+
+  private void addRawBlock(IntBlock block) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleIntAggregator.combine(state, block.getInt(i));
+      }
+    }
+  }
+
+  private void addRawBlock(IntBlock block, BooleanVector mask) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (mask.getBoolean(p) == false) {
+        continue;
+      }
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleIntAggregator.combine(state, block.getInt(i));
+      }
+    }
+  }
+
+  @Override
+  public void addIntermediateInput(Page page) {
+    assert channels.size() == intermediateBlockCount();
+    assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    assert sample.getPositionCount() == 1;
+    BytesRef scratch = new BytesRef();
+    SampleIntAggregator.combineIntermediate(state, sample);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+    state.toIntermediate(blocks, offset, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) {
+    blocks[offset] = SampleIntAggregator.evaluateFinal(state, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 50 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunctionSupplier.java

@@ -0,0 +1,50 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.util.List;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunctionSupplier} implementation for {@link SampleIntAggregator}.
+ * This class is generated. Edit {@code AggregatorFunctionSupplierImplementer} instead.
+ */
+public final class SampleIntAggregatorFunctionSupplier implements AggregatorFunctionSupplier {
+  private final int limit;
+
+  public SampleIntAggregatorFunctionSupplier(int limit) {
+    this.limit = limit;
+  }
+
+  @Override
+  public List<IntermediateStateDesc> nonGroupingIntermediateStateDesc() {
+    return SampleIntAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public List<IntermediateStateDesc> groupingIntermediateStateDesc() {
+    return SampleIntGroupingAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public SampleIntAggregatorFunction aggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleIntAggregatorFunction.create(driverContext, channels, limit);
+  }
+
+  @Override
+  public SampleIntGroupingAggregatorFunction groupingAggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleIntGroupingAggregatorFunction.create(channels, driverContext, limit);
+  }
+
+  @Override
+  public String describe() {
+    return "sample of ints";
+  }
+}

+ 212 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleIntGroupingAggregatorFunction.java

@@ -0,0 +1,212 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link GroupingAggregatorFunction} implementation for {@link SampleIntAggregator}.
+ * This class is generated. Edit {@code GroupingAggregatorImplementer} instead.
+ */
+public final class SampleIntGroupingAggregatorFunction implements GroupingAggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final SampleIntAggregator.GroupingState state;
+
+  private final List<Integer> channels;
+
+  private final DriverContext driverContext;
+
+  private final int limit;
+
+  public SampleIntGroupingAggregatorFunction(List<Integer> channels,
+      SampleIntAggregator.GroupingState state, DriverContext driverContext, int limit) {
+    this.channels = channels;
+    this.state = state;
+    this.driverContext = driverContext;
+    this.limit = limit;
+  }
+
+  public static SampleIntGroupingAggregatorFunction create(List<Integer> channels,
+      DriverContext driverContext, int limit) {
+    return new SampleIntGroupingAggregatorFunction(channels, SampleIntAggregator.initGrouping(driverContext.bigArrays(), limit), driverContext, limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds,
+      Page page) {
+    IntBlock valuesBlock = page.getBlock(channels.get(0));
+    IntVector valuesVector = valuesBlock.asVector();
+    if (valuesVector == null) {
+      if (valuesBlock.mayHaveNulls()) {
+        state.enableGroupIdTracking(seenGroupIds);
+      }
+      return new GroupingAggregatorFunction.AddInput() {
+        @Override
+        public void add(int positionOffset, IntBlock groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void add(int positionOffset, IntVector groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void close() {
+        }
+      };
+    }
+    return new GroupingAggregatorFunction.AddInput() {
+      @Override
+      public void add(int positionOffset, IntBlock groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void add(int positionOffset, IntVector groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, IntBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      if (values.isNull(groupPosition + positionOffset)) {
+        continue;
+      }
+      int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+      int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+      for (int v = valuesStart; v < valuesEnd; v++) {
+        SampleIntAggregator.combine(state, groupId, values.getInt(v));
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, IntVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleIntAggregator.combine(state, groupId, values.getInt(groupPosition + positionOffset));
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, IntBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        if (values.isNull(groupPosition + positionOffset)) {
+          continue;
+        }
+        int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+        int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+        for (int v = valuesStart; v < valuesEnd; v++) {
+          SampleIntAggregator.combine(state, groupId, values.getInt(v));
+        }
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, IntVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        SampleIntAggregator.combine(state, groupId, values.getInt(groupPosition + positionOffset));
+      }
+    }
+  }
+
+  @Override
+  public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {
+    state.enableGroupIdTracking(seenGroupIds);
+  }
+
+  @Override
+  public void addIntermediateInput(int positionOffset, IntVector groups, Page page) {
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    assert channels.size() == intermediateBlockCount();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleIntAggregator.combineIntermediate(state, groupId, sample, groupPosition + positionOffset);
+    }
+  }
+
+  @Override
+  public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) {
+    if (input.getClass() != getClass()) {
+      throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass());
+    }
+    SampleIntAggregator.GroupingState inState = ((SampleIntGroupingAggregatorFunction) input).state;
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    SampleIntAggregator.combineStates(state, groupId, inState, position);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) {
+    state.toIntermediate(blocks, offset, selected, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, IntVector selected,
+      DriverContext driverContext) {
+    blocks[offset] = SampleIntAggregator.evaluateFinal(state, selected, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 168 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunction.java

@@ -0,0 +1,168 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.LongVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunction} implementation for {@link SampleLongAggregator}.
+ * This class is generated. Edit {@code AggregatorImplementer} instead.
+ */
+public final class SampleLongAggregatorFunction implements AggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final DriverContext driverContext;
+
+  private final SampleLongAggregator.SingleState state;
+
+  private final List<Integer> channels;
+
+  private final int limit;
+
+  public SampleLongAggregatorFunction(DriverContext driverContext, List<Integer> channels,
+      SampleLongAggregator.SingleState state, int limit) {
+    this.driverContext = driverContext;
+    this.channels = channels;
+    this.state = state;
+    this.limit = limit;
+  }
+
+  public static SampleLongAggregatorFunction create(DriverContext driverContext,
+      List<Integer> channels, int limit) {
+    return new SampleLongAggregatorFunction(driverContext, channels, SampleLongAggregator.initSingle(driverContext.bigArrays(), limit), limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public void addRawInput(Page page, BooleanVector mask) {
+    if (mask.allFalse()) {
+      // Entire page masked away
+      return;
+    }
+    if (mask.allTrue()) {
+      // No masking
+      LongBlock block = page.getBlock(channels.get(0));
+      LongVector vector = block.asVector();
+      if (vector != null) {
+        addRawVector(vector);
+      } else {
+        addRawBlock(block);
+      }
+      return;
+    }
+    // Some positions masked away, others kept
+    LongBlock block = page.getBlock(channels.get(0));
+    LongVector vector = block.asVector();
+    if (vector != null) {
+      addRawVector(vector, mask);
+    } else {
+      addRawBlock(block, mask);
+    }
+  }
+
+  private void addRawVector(LongVector vector) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      SampleLongAggregator.combine(state, vector.getLong(i));
+    }
+  }
+
+  private void addRawVector(LongVector vector, BooleanVector mask) {
+    for (int i = 0; i < vector.getPositionCount(); i++) {
+      if (mask.getBoolean(i) == false) {
+        continue;
+      }
+      SampleLongAggregator.combine(state, vector.getLong(i));
+    }
+  }
+
+  private void addRawBlock(LongBlock block) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleLongAggregator.combine(state, block.getLong(i));
+      }
+    }
+  }
+
+  private void addRawBlock(LongBlock block, BooleanVector mask) {
+    for (int p = 0; p < block.getPositionCount(); p++) {
+      if (mask.getBoolean(p) == false) {
+        continue;
+      }
+      if (block.isNull(p)) {
+        continue;
+      }
+      int start = block.getFirstValueIndex(p);
+      int end = start + block.getValueCount(p);
+      for (int i = start; i < end; i++) {
+        SampleLongAggregator.combine(state, block.getLong(i));
+      }
+    }
+  }
+
+  @Override
+  public void addIntermediateInput(Page page) {
+    assert channels.size() == intermediateBlockCount();
+    assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    assert sample.getPositionCount() == 1;
+    BytesRef scratch = new BytesRef();
+    SampleLongAggregator.combineIntermediate(state, sample);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+    state.toIntermediate(blocks, offset, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) {
+    blocks[offset] = SampleLongAggregator.evaluateFinal(state, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 50 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunctionSupplier.java

@@ -0,0 +1,50 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.util.List;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link AggregatorFunctionSupplier} implementation for {@link SampleLongAggregator}.
+ * This class is generated. Edit {@code AggregatorFunctionSupplierImplementer} instead.
+ */
+public final class SampleLongAggregatorFunctionSupplier implements AggregatorFunctionSupplier {
+  private final int limit;
+
+  public SampleLongAggregatorFunctionSupplier(int limit) {
+    this.limit = limit;
+  }
+
+  @Override
+  public List<IntermediateStateDesc> nonGroupingIntermediateStateDesc() {
+    return SampleLongAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public List<IntermediateStateDesc> groupingIntermediateStateDesc() {
+    return SampleLongGroupingAggregatorFunction.intermediateStateDesc();
+  }
+
+  @Override
+  public SampleLongAggregatorFunction aggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleLongAggregatorFunction.create(driverContext, channels, limit);
+  }
+
+  @Override
+  public SampleLongGroupingAggregatorFunction groupingAggregator(DriverContext driverContext,
+      List<Integer> channels) {
+    return SampleLongGroupingAggregatorFunction.create(channels, driverContext, limit);
+  }
+
+  @Override
+  public String describe() {
+    return "sample of longs";
+  }
+}

+ 214 - 0
x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/SampleLongGroupingAggregatorFunction.java

@@ -0,0 +1,214 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.compute.aggregation;
+
+import java.lang.Integer;
+import java.lang.Override;
+import java.lang.String;
+import java.lang.StringBuilder;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.LongVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+
+/**
+ * {@link GroupingAggregatorFunction} implementation for {@link SampleLongAggregator}.
+ * This class is generated. Edit {@code GroupingAggregatorImplementer} instead.
+ */
+public final class SampleLongGroupingAggregatorFunction implements GroupingAggregatorFunction {
+  private static final List<IntermediateStateDesc> INTERMEDIATE_STATE_DESC = List.of(
+      new IntermediateStateDesc("sample", ElementType.BYTES_REF)  );
+
+  private final SampleLongAggregator.GroupingState state;
+
+  private final List<Integer> channels;
+
+  private final DriverContext driverContext;
+
+  private final int limit;
+
+  public SampleLongGroupingAggregatorFunction(List<Integer> channels,
+      SampleLongAggregator.GroupingState state, DriverContext driverContext, int limit) {
+    this.channels = channels;
+    this.state = state;
+    this.driverContext = driverContext;
+    this.limit = limit;
+  }
+
+  public static SampleLongGroupingAggregatorFunction create(List<Integer> channels,
+      DriverContext driverContext, int limit) {
+    return new SampleLongGroupingAggregatorFunction(channels, SampleLongAggregator.initGrouping(driverContext.bigArrays(), limit), driverContext, limit);
+  }
+
+  public static List<IntermediateStateDesc> intermediateStateDesc() {
+    return INTERMEDIATE_STATE_DESC;
+  }
+
+  @Override
+  public int intermediateBlockCount() {
+    return INTERMEDIATE_STATE_DESC.size();
+  }
+
+  @Override
+  public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds,
+      Page page) {
+    LongBlock valuesBlock = page.getBlock(channels.get(0));
+    LongVector valuesVector = valuesBlock.asVector();
+    if (valuesVector == null) {
+      if (valuesBlock.mayHaveNulls()) {
+        state.enableGroupIdTracking(seenGroupIds);
+      }
+      return new GroupingAggregatorFunction.AddInput() {
+        @Override
+        public void add(int positionOffset, IntBlock groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void add(int positionOffset, IntVector groupIds) {
+          addRawInput(positionOffset, groupIds, valuesBlock);
+        }
+
+        @Override
+        public void close() {
+        }
+      };
+    }
+    return new GroupingAggregatorFunction.AddInput() {
+      @Override
+      public void add(int positionOffset, IntBlock groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void add(int positionOffset, IntVector groupIds) {
+        addRawInput(positionOffset, groupIds, valuesVector);
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, LongBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      if (values.isNull(groupPosition + positionOffset)) {
+        continue;
+      }
+      int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+      int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+      for (int v = valuesStart; v < valuesEnd; v++) {
+        SampleLongAggregator.combine(state, groupId, values.getLong(v));
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntVector groups, LongVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleLongAggregator.combine(state, groupId, values.getLong(groupPosition + positionOffset));
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, LongBlock values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        if (values.isNull(groupPosition + positionOffset)) {
+          continue;
+        }
+        int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset);
+        int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset);
+        for (int v = valuesStart; v < valuesEnd; v++) {
+          SampleLongAggregator.combine(state, groupId, values.getLong(v));
+        }
+      }
+    }
+  }
+
+  private void addRawInput(int positionOffset, IntBlock groups, LongVector values) {
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      if (groups.isNull(groupPosition)) {
+        continue;
+      }
+      int groupStart = groups.getFirstValueIndex(groupPosition);
+      int groupEnd = groupStart + groups.getValueCount(groupPosition);
+      for (int g = groupStart; g < groupEnd; g++) {
+        int groupId = groups.getInt(g);
+        SampleLongAggregator.combine(state, groupId, values.getLong(groupPosition + positionOffset));
+      }
+    }
+  }
+
+  @Override
+  public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) {
+    state.enableGroupIdTracking(seenGroupIds);
+  }
+
+  @Override
+  public void addIntermediateInput(int positionOffset, IntVector groups, Page page) {
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    assert channels.size() == intermediateBlockCount();
+    Block sampleUncast = page.getBlock(channels.get(0));
+    if (sampleUncast.areAllValuesNull()) {
+      return;
+    }
+    BytesRefBlock sample = (BytesRefBlock) sampleUncast;
+    BytesRef scratch = new BytesRef();
+    for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) {
+      int groupId = groups.getInt(groupPosition);
+      SampleLongAggregator.combineIntermediate(state, groupId, sample, groupPosition + positionOffset);
+    }
+  }
+
+  @Override
+  public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) {
+    if (input.getClass() != getClass()) {
+      throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass());
+    }
+    SampleLongAggregator.GroupingState inState = ((SampleLongGroupingAggregatorFunction) input).state;
+    state.enableGroupIdTracking(new SeenGroupIds.Empty());
+    SampleLongAggregator.combineStates(state, groupId, inState, position);
+  }
+
+  @Override
+  public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) {
+    state.toIntermediate(blocks, offset, selected, driverContext);
+  }
+
+  @Override
+  public void evaluateFinal(Block[] blocks, int offset, IntVector selected,
+      DriverContext driverContext) {
+    blocks[offset] = SampleLongAggregator.evaluateFinal(state, selected, driverContext);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getClass().getSimpleName()).append("[");
+    sb.append("channels=").append(channels);
+    sb.append("]");
+    return sb.toString();
+  }
+
+  @Override
+  public void close() {
+    state.close();
+  }
+}

+ 208 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-SampleAggregator.java.st

@@ -0,0 +1,208 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.$Type$Block;
+import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+
+import org.elasticsearch.common.Randomness;
+import java.util.random.RandomGenerator;
+// end generated imports
+
+/**
+ * Sample N field values for $type$.
+ * <p>
+ *     This class is generated. Edit `X-SampleAggregator.java.st` to edit this file.
+ * </p>
+ * <p>
+ *     This works by prepending a random long to the value, and then collecting the
+ *     top values. This gives a uniform random sample of the values. See also:
+ *     <a href="https://en.wikipedia.org/wiki/Reservoir_sampling#With_random_sort">Wikipedia Reservoir Sampling</a>
+ * </p>
+ */
+@Aggregator({ @IntermediateState(name = "sample", type = "BYTES_REF_BLOCK") })
+@GroupingAggregator
+class Sample$Type$Aggregator {
+    private static final DefaultUnsortableTopNEncoder ENCODER = new DefaultUnsortableTopNEncoder();
+
+    public static SingleState initSingle(BigArrays bigArrays, int limit) {
+        return new SingleState(bigArrays, limit);
+    }
+
+    public static void combine(SingleState state, $type$ value) {
+        state.add(value);
+    }
+
+    public static void combineIntermediate(SingleState state, BytesRefBlock values) {
+        int start = values.getFirstValueIndex(0);
+        int end = start + values.getValueCount(0);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.internalState.sort.collect(values.getBytesRef(i, scratch), 0);
+        }
+    }
+
+    public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory()));
+    }
+
+    public static GroupingState initGrouping(BigArrays bigArrays, int limit) {
+        return new GroupingState(bigArrays, limit);
+    }
+
+    public static void combine(GroupingState state, int groupId, $type$ value) {
+        state.add(groupId, value);
+    }
+
+    public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) {
+        int start = values.getFirstValueIndex(valuesPosition);
+        int end = start + values.getValueCount(valuesPosition);
+        BytesRef scratch = new BytesRef();
+        for (int i = start; i < end; i++) {
+            state.sort.collect(values.getBytesRef(i, scratch), groupId);
+        }
+    }
+
+    public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) {
+        current.merge(groupId, state, statePosition);
+    }
+
+    public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) {
+        return stripWeights(driverContext, state.toBlock(driverContext.blockFactory(), selected));
+    }
+
+    private static Block stripWeights(DriverContext driverContext, Block block) {
+        if (block.areAllValuesNull()) {
+            return block;
+        }
+        try (
+            BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
+            $Type$Block.Builder $type$Block = driverContext.blockFactory().new$Type$BlockBuilder(bytesRefBlock.getPositionCount())
+        ) {
+            BytesRef scratch = new BytesRef();
+            for (int position = 0; position < block.getPositionCount(); position++) {
+                if (bytesRefBlock.isNull(position)) {
+                    $type$Block.appendNull();
+                } else {
+                    int valueCount = bytesRefBlock.getValueCount(position);
+                    if (valueCount > 1) {
+                        $type$Block.beginPositionEntry();
+                    }
+                    int start = bytesRefBlock.getFirstValueIndex(position);
+                    int end = start + valueCount;
+                    for (int i = start; i < end; i++) {
+                        BytesRef value = bytesRefBlock.getBytesRef(i, scratch).clone();
+                        ENCODER.decodeLong(value);
+                        $type$Block.append$Type$(ENCODER.decode$Type$(value$if(BytesRef)$, scratch$endif$));
+                    }
+                    if (valueCount > 1) {
+                        $type$Block.endPositionEntry();
+                    }
+                }
+            }
+            return $type$Block.build();
+        }
+    }
+
+    public static class GroupingState implements GroupingAggregatorState {
+        private final BytesRefBucketedSort sort;
+        private final BreakingBytesRefBuilder bytesRefBuilder;
+
+        private GroupingState(BigArrays bigArrays, int limit) {
+            CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
+            this.sort = new BytesRefBucketedSort(breaker, "sample", bigArrays, SortOrder.ASC, limit);
+            boolean success = false;
+            try {
+                this.bytesRefBuilder = new BreakingBytesRefBuilder(breaker, "sample");
+                success = true;
+            } finally {
+                if (success == false) {
+                    Releasables.closeExpectNoException(sort);
+                }
+            }
+        }
+
+        public void add(int groupId, $type$ value) {
+            ENCODER.encodeLong(Randomness.get().nextLong(), bytesRefBuilder);
+            ENCODER.encode$Type$(value, bytesRefBuilder);
+            sort.collect(bytesRefBuilder.bytesRefView(), groupId);
+            bytesRefBuilder.clear();
+        }
+
+        public void merge(int groupId, GroupingState other, int otherGroupId) {
+            sort.merge(groupId, other.sort, otherGroupId);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory(), selected);
+        }
+
+        Block toBlock(BlockFactory blockFactory, IntVector selected) {
+            return sort.toBlock(blockFactory, selected);
+        }
+
+        @Override
+        public void enableGroupIdTracking(SeenGroupIds seen) {
+            // we figure out seen values from nulls on the values block
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(sort, bytesRefBuilder);
+        }
+    }
+
+    public static class SingleState implements AggregatorState {
+        private final GroupingState internalState;
+
+        private SingleState(BigArrays bigArrays, int limit) {
+            this.internalState = new GroupingState(bigArrays, limit);
+        }
+
+        public void add($type$ value) {
+            internalState.add(0, value);
+        }
+
+        public void merge(GroupingState other) {
+            internalState.merge(0, other, 0);
+        }
+
+        @Override
+        public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+            blocks[offset] = toBlock(driverContext.blockFactory());
+        }
+
+        Block toBlock(BlockFactory blockFactory) {
+            try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+                return internalState.toBlock(blockFactory, intValues);
+            }
+        }
+
+        @Override
+        public void close() {
+            Releasables.closeExpectNoException(internalState);
+        }
+    }
+}

+ 3 - 5
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st

@@ -7,22 +7,20 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
+$endif$
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.BytesRefArray;
-$else$
-import org.apache.lucene.util.RamUsageEstimator;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.unit.ByteSizeValue;
-$endif$
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
 import java.io.IOException;
 import java.util.BitSet;
+// end generated imports
 
 /**
  * Block implementation that stores values in a {@link $Type$ArrayVector}.

+ 2 - 11
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st

@@ -7,7 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
-$if(BytesRef)$
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -17,19 +17,10 @@ import org.elasticsearch.common.util.BytesRefArray;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
 
-import java.io.IOException;
-
-$else$
-import org.apache.lucene.util.RamUsageEstimator;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.core.ReleasableIterator;
-
 import java.io.IOException;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
-$endif$
+// end generated imports
 
 /**
  * Vector implementation that stores an array of $type$ values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
 $endif$
@@ -17,6 +18,7 @@ import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Block that stores $type$ values.

+ 4 - 9
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BlockBuilder.java.st

@@ -7,21 +7,16 @@
 
 package org.elasticsearch.compute.data;
 
-$if(BytesRef)$
-import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.breaker.CircuitBreakingException;
-import org.elasticsearch.common.util.BigArrays;
-import org.elasticsearch.common.util.BytesRefArray;
-import org.elasticsearch.core.Releasables;
-
-$else$
+// begin generated imports
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
+import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.$Array$;
+import org.elasticsearch.core.Releasables;
 
 import java.util.Arrays;
-$endif$
+// end generated imports
 
 /**
  * Block build of $Type$Blocks.

+ 2 - 4
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st

@@ -7,19 +7,17 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
 $endif$
 import org.apache.lucene.util.RamUsageEstimator;
 import org.elasticsearch.common.unit.ByteSizeValue;
-$if(BytesRef)$
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
-
-$else$
 import org.elasticsearch.core.ReleasableIterator;
+// end generated imports
 
-$endif$
 /**
  * Vector implementation that stores a constant $type$ value.
  * This class is generated. Edit {@code X-ConstantVector.java.st} instead.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Lookup.java.st

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
 $endif$
@@ -14,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.compute.operator.Operator;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Generic {@link Block#lookup} implementation {@link $Type$Block}s.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
 $endif$
@@ -17,6 +18,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 
 import java.io.IOException;
+// end generated imports
 
 /**
  * Vector that stores $type$ values.

+ 2 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st

@@ -7,12 +7,14 @@
 
 package org.elasticsearch.compute.data;
 
+// begin generated imports
 $if(BytesRef)$
 import org.apache.lucene.util.BytesRef;
 $endif$
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.core.ReleasableIterator;
 import org.elasticsearch.core.Releasables;
+// end generated imports
 
 /**
  * Block view of a {@link $Type$Vector}. Cannot represent multi-values or nulls.

+ 1 - 1
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/topn/DefaultUnsortableTopNEncoder.java

@@ -18,7 +18,7 @@ import java.nio.ByteOrder;
  * A {@link TopNEncoder} that doesn't encode values so they are sortable but is
  * capable of encoding any values.
  */
-final class DefaultUnsortableTopNEncoder implements TopNEncoder {
+public final class DefaultUnsortableTopNEncoder implements TopNEncoder {
     public static final VarHandle LONG = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder());
     public static final VarHandle INT = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder());
     public static final VarHandle FLOAT = MethodHandles.byteArrayViewVarHandle(float[].class, ByteOrder.nativeOrder());

+ 92 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleBooleanAggregatorFunctionTests.java

@@ -0,0 +1,92 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.AggregationOperator;
+import org.elasticsearch.compute.operator.SequenceBooleanBlockSourceOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.test.CannedSourceOperator;
+import org.elasticsearch.test.MixWithIncrement;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+
+@SeedDecorators(MixWithIncrement.class)
+public class SampleBooleanAggregatorFunctionTests extends AggregatorFunctionTestCase {
+    private static final int LIMIT = 50;
+
+    @Override
+    protected SourceOperator simpleInput(BlockFactory blockFactory, int size) {
+        return new SequenceBooleanBlockSourceOperator(blockFactory, IntStream.range(0, size).mapToObj(l -> randomBoolean()));
+    }
+
+    @Override
+    protected AggregatorFunctionSupplier aggregatorFunction() {
+        return new SampleBooleanAggregatorFunctionSupplier(LIMIT);
+    }
+
+    @Override
+    protected String expectedDescriptionOfAggregator() {
+        return "sample of booleans";
+    }
+
+    @Override
+    public void assertSimpleOutput(List<Block> input, Block result) {
+        List<Boolean> inputValues = input.stream().flatMap(AggregatorFunctionTestCase::allBooleans).collect(Collectors.toList());
+        Boolean[] resultValues = AggregatorFunctionTestCase.allBooleans(result).toArray(Boolean[]::new);
+        assertThat(resultValues, arrayWithSize(Math.min(inputValues.size(), LIMIT)));
+    }
+
+    public void testDistribution() {
+        // Sample from the numbers 50x true and 50x false.
+        int N = 100;
+        Aggregator.Factory aggregatorFactory = aggregatorFunction().aggregatorFactory(AggregatorMode.SINGLE, List.of(0));
+        AggregationOperator.AggregationOperatorFactory operatorFactory = new AggregationOperator.AggregationOperatorFactory(
+            List.of(aggregatorFactory),
+            AggregatorMode.SINGLE
+        );
+
+        // Repeat 1000x, count how often each value is sampled.
+        int trueCount = 0;
+        int falseCount = 0;
+        for (int iteration = 0; iteration < 1000; iteration++) {
+            List<Page> input = CannedSourceOperator.collectPages(
+                new SequenceBooleanBlockSourceOperator(driverContext().blockFactory(), IntStream.range(0, N).mapToObj(i -> i % 2 == 0))
+            );
+            List<Page> results = drive(operatorFactory.get(driverContext()), input.iterator(), driverContext());
+            for (Page page : results) {
+                BooleanBlock block = page.getBlock(0);
+                for (int i = 0; i < block.getTotalValueCount(); i++) {
+                    if (block.getBoolean(i)) {
+                        trueCount++;
+                    } else {
+                        falseCount++;
+                    }
+                }
+            }
+            MixWithIncrement.next();
+        }
+
+        // On average, both boolean values should be sampled 25000x.
+        // The interval [23000,27000] is at least 10 sigma, so this should never fail.
+        assertThat(trueCount, both(greaterThan(23000)).and(lessThan(27000)));
+        assertThat(falseCount, both(greaterThan(23000)).and(lessThan(27000)));
+    }
+}

+ 99 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleBytesRefAggregatorFunctionTests.java

@@ -0,0 +1,99 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.AggregationOperator;
+import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.test.CannedSourceOperator;
+import org.elasticsearch.test.MixWithIncrement;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.lessThan;
+
+@SeedDecorators(MixWithIncrement.class)
+public class SampleBytesRefAggregatorFunctionTests extends AggregatorFunctionTestCase {
+    private static final int LIMIT = 50;
+
+    @Override
+    protected SourceOperator simpleInput(BlockFactory blockFactory, int size) {
+        return new SequenceBytesRefBlockSourceOperator(
+            blockFactory,
+            IntStream.range(0, size).mapToObj(l -> new BytesRef(randomAlphanumericOfLength(100)))
+        );
+    }
+
+    @Override
+    protected AggregatorFunctionSupplier aggregatorFunction() {
+        return new SampleBytesRefAggregatorFunctionSupplier(LIMIT);
+    }
+
+    @Override
+    protected String expectedDescriptionOfAggregator() {
+        return "sample of bytes";
+    }
+
+    @Override
+    public void assertSimpleOutput(List<Block> input, Block result) {
+        Set<BytesRef> inputValues = input.stream().flatMap(AggregatorFunctionTestCase::allBytesRefs).collect(Collectors.toSet());
+        BytesRef[] resultValues = AggregatorFunctionTestCase.allBytesRefs(result).toArray(BytesRef[]::new);
+        assertThat(resultValues, arrayWithSize(Math.min(inputValues.size(), LIMIT)));
+        assertThat(inputValues, hasItems(resultValues));
+    }
+
+    public void testDistribution() {
+        // Sample from the numbers 0...99.
+        int N = 100;
+        Aggregator.Factory aggregatorFactory = aggregatorFunction().aggregatorFactory(AggregatorMode.SINGLE, List.of(0));
+        AggregationOperator.AggregationOperatorFactory operatorFactory = new AggregationOperator.AggregationOperatorFactory(
+            List.of(aggregatorFactory),
+            AggregatorMode.SINGLE
+        );
+
+        // Repeat 1000x, count how often each number is sampled.
+        int[] sampledCounts = new int[N];
+        for (int iteration = 0; iteration < 1000; iteration++) {
+            List<Page> input = CannedSourceOperator.collectPages(
+                new SequenceBytesRefBlockSourceOperator(
+                    driverContext().blockFactory(),
+                    IntStream.range(0, N).mapToObj(i -> new BytesRef(Integer.toString(i)))
+                )
+            );
+            List<Page> results = drive(operatorFactory.get(driverContext()), input.iterator(), driverContext());
+            for (Page page : results) {
+                BytesRefBlock block = page.getBlock(0);
+                BytesRef scratch = new BytesRef();
+                for (int i = 0; i < block.getTotalValueCount(); i++) {
+                    sampledCounts[Integer.parseInt(block.getBytesRef(i, scratch).utf8ToString())]++;
+                }
+            }
+            MixWithIncrement.next();
+        }
+
+        // On average, each string should be sampled 500x.
+        // The interval [300,700] is approx. 10 sigma, so this should never fail.
+        for (int i = 0; i < N; i++) {
+            assertThat(sampledCounts[i], both(greaterThan(300)).and(lessThan(700)));
+        }
+    }
+}

+ 94 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleDoubleAggregatorFunctionTests.java

@@ -0,0 +1,94 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.AggregationOperator;
+import org.elasticsearch.compute.operator.SequenceDoubleBlockSourceOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.test.CannedSourceOperator;
+import org.elasticsearch.test.MixWithIncrement;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.lessThan;
+
+@SeedDecorators(MixWithIncrement.class)
+public class SampleDoubleAggregatorFunctionTests extends AggregatorFunctionTestCase {
+    private static final int LIMIT = 50;
+
+    @Override
+    protected SourceOperator simpleInput(BlockFactory blockFactory, int size) {
+        return new SequenceDoubleBlockSourceOperator(blockFactory, IntStream.range(0, size).mapToDouble(l -> randomDouble()));
+    }
+
+    @Override
+    protected AggregatorFunctionSupplier aggregatorFunction() {
+        return new SampleDoubleAggregatorFunctionSupplier(LIMIT);
+    }
+
+    @Override
+    protected String expectedDescriptionOfAggregator() {
+        return "sample of doubles";
+    }
+
+    @Override
+    public void assertSimpleOutput(List<Block> input, Block result) {
+        Set<Double> inputValues = input.stream()
+            .flatMapToDouble(AggregatorFunctionTestCase::allDoubles)
+            .boxed()
+            .collect(Collectors.toSet());
+        Double[] resultValues = AggregatorFunctionTestCase.allDoubles(result).boxed().toArray(Double[]::new);
+        assertThat(resultValues, arrayWithSize(Math.min(inputValues.size(), LIMIT)));
+        assertThat(inputValues, hasItems(resultValues));
+    }
+
+    public void testDistribution() {
+        // Sample from the numbers 0...99.
+        int N = 100;
+        Aggregator.Factory aggregatorFactory = aggregatorFunction().aggregatorFactory(AggregatorMode.SINGLE, List.of(0));
+        AggregationOperator.AggregationOperatorFactory operatorFactory = new AggregationOperator.AggregationOperatorFactory(
+            List.of(aggregatorFactory),
+            AggregatorMode.SINGLE
+        );
+
+        // Repeat 1000x, count how often each number is sampled.
+        int[] sampledCounts = new int[N];
+        for (int iteration = 0; iteration < 1000; iteration++) {
+            List<Page> input = CannedSourceOperator.collectPages(
+                new SequenceDoubleBlockSourceOperator(driverContext().blockFactory(), IntStream.range(0, N).asDoubleStream())
+            );
+            List<Page> results = drive(operatorFactory.get(driverContext()), input.iterator(), driverContext());
+            for (Page page : results) {
+                DoubleBlock block = page.getBlock(0);
+                for (int i = 0; i < block.getTotalValueCount(); i++) {
+                    sampledCounts[(int) block.getDouble(i)]++;
+                }
+            }
+            MixWithIncrement.next();
+        }
+
+        // On average, each number should be sampled 500x.
+        // The interval [300,700] is approx. 10 sigma, so this should never fail.
+        for (int i = 0; i < N; i++) {
+            assertThat(sampledCounts[i], both(greaterThan(300)).and(lessThan(700)));
+        }
+    }
+}

+ 91 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleIntAggregatorFunctionTests.java

@@ -0,0 +1,91 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.AggregationOperator;
+import org.elasticsearch.compute.operator.SequenceIntBlockSourceOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.test.CannedSourceOperator;
+import org.elasticsearch.test.MixWithIncrement;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.lessThan;
+
+@SeedDecorators(MixWithIncrement.class)
+public class SampleIntAggregatorFunctionTests extends AggregatorFunctionTestCase {
+    private static final int LIMIT = 50;
+
+    @Override
+    protected SourceOperator simpleInput(BlockFactory blockFactory, int size) {
+        return new SequenceIntBlockSourceOperator(blockFactory, IntStream.range(0, size).map(l -> randomInt()));
+    }
+
+    @Override
+    protected AggregatorFunctionSupplier aggregatorFunction() {
+        return new SampleIntAggregatorFunctionSupplier(LIMIT);
+    }
+
+    @Override
+    protected String expectedDescriptionOfAggregator() {
+        return "sample of ints";
+    }
+
+    @Override
+    public void assertSimpleOutput(List<Block> input, Block result) {
+        Set<Integer> inputValues = input.stream().flatMapToInt(AggregatorFunctionTestCase::allInts).boxed().collect(Collectors.toSet());
+        Integer[] resultValues = AggregatorFunctionTestCase.allInts(result).boxed().toArray(Integer[]::new);
+        assertThat(resultValues, arrayWithSize(Math.min(inputValues.size(), LIMIT)));
+        assertThat(inputValues, hasItems(resultValues));
+    }
+
+    public void testDistribution() {
+        // Sample from the numbers 0...99.
+        int N = 100;
+        Aggregator.Factory aggregatorFactory = aggregatorFunction().aggregatorFactory(AggregatorMode.SINGLE, List.of(0));
+        AggregationOperator.AggregationOperatorFactory operatorFactory = new AggregationOperator.AggregationOperatorFactory(
+            List.of(aggregatorFactory),
+            AggregatorMode.SINGLE
+        );
+
+        // Repeat 1000x, count how often each number is sampled.
+        int[] sampledCounts = new int[N];
+        for (int iteration = 0; iteration < 1000; iteration++) {
+            List<Page> input = CannedSourceOperator.collectPages(
+                new SequenceIntBlockSourceOperator(driverContext().blockFactory(), IntStream.range(0, N))
+            );
+            List<Page> results = drive(operatorFactory.get(driverContext()), input.iterator(), driverContext());
+            for (Page page : results) {
+                IntBlock block = page.getBlock(0);
+                for (int i = 0; i < block.getTotalValueCount(); i++) {
+                    sampledCounts[block.getInt(i)]++;
+                }
+            }
+            MixWithIncrement.next();
+        }
+
+        // On average, each number should be sampled 500x.
+        // The interval [300,700] is approx. 10 sigma, so this should never fail.
+        for (int i = 0; i < N; i++) {
+            assertThat(sampledCounts[i], both(greaterThan(300)).and(lessThan(700)));
+        }
+    }
+}

+ 92 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/SampleLongAggregatorFunctionTests.java

@@ -0,0 +1,92 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
+
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.AggregationOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.test.CannedSourceOperator;
+import org.elasticsearch.compute.test.SequenceLongBlockSourceOperator;
+import org.elasticsearch.test.MixWithIncrement;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.lessThan;
+
+@SeedDecorators(MixWithIncrement.class)
+public class SampleLongAggregatorFunctionTests extends AggregatorFunctionTestCase {
+
+    private static final int LIMIT = 50;
+
+    @Override
+    protected SourceOperator simpleInput(BlockFactory blockFactory, int size) {
+        return new SequenceLongBlockSourceOperator(blockFactory, LongStream.range(0, size).map(l -> randomLong()));
+    }
+
+    @Override
+    protected AggregatorFunctionSupplier aggregatorFunction() {
+        return new SampleLongAggregatorFunctionSupplier(LIMIT);
+    }
+
+    @Override
+    protected String expectedDescriptionOfAggregator() {
+        return "sample of longs";
+    }
+
+    @Override
+    public void assertSimpleOutput(List<Block> input, Block result) {
+        Set<Long> inputValues = input.stream().flatMapToLong(AggregatorFunctionTestCase::allLongs).boxed().collect(Collectors.toSet());
+        Long[] resultValues = AggregatorFunctionTestCase.allLongs(result).boxed().toArray(Long[]::new);
+        assertThat(resultValues, arrayWithSize(Math.min(inputValues.size(), LIMIT)));
+        assertThat(inputValues, hasItems(resultValues));
+    }
+
+    public void testDistribution() {
+        // Sample from the numbers 0...99.
+        int N = 100;
+        Aggregator.Factory aggregatorFactory = aggregatorFunction().aggregatorFactory(AggregatorMode.SINGLE, List.of(0));
+        AggregationOperator.AggregationOperatorFactory operatorFactory = new AggregationOperator.AggregationOperatorFactory(
+            List.of(aggregatorFactory),
+            AggregatorMode.SINGLE
+        );
+
+        // Repeat 1000x, count how often each number is sampled.
+        int[] sampledCounts = new int[N];
+        for (int iteration = 0; iteration < 1000; iteration++) {
+            List<Page> input = CannedSourceOperator.collectPages(
+                new SequenceLongBlockSourceOperator(driverContext().blockFactory(), LongStream.range(0, N))
+            );
+            List<Page> results = drive(operatorFactory.get(driverContext()), input.iterator(), driverContext());
+            for (Page page : results) {
+                LongBlock block = page.getBlock(0);
+                for (int i = 0; i < block.getTotalValueCount(); i++) {
+                    sampledCounts[(int) block.getLong(i)]++;
+                }
+            }
+            MixWithIncrement.next();
+        }
+
+        // On average, each number should be sampled 500x.
+        // The interval [300,700] is approx. 10 sigma, so this should never fail.
+        for (int i = 0; i < N; i++) {
+            assertThat(sampledCounts[i], both(greaterThan(300)).and(lessThan(700)));
+        }
+    }
+}

+ 5 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/SequenceBooleanBlockSourceOperator.java

@@ -13,6 +13,7 @@ import org.elasticsearch.compute.data.Page;
 import org.elasticsearch.compute.test.AbstractBlockSourceOperator;
 
 import java.util.List;
+import java.util.stream.Stream;
 
 /**
  * A source operator whose output is the given boolean values. This operator produces pages
@@ -24,6 +25,10 @@ public class SequenceBooleanBlockSourceOperator extends AbstractBlockSourceOpera
 
     private final boolean[] values;
 
+    public SequenceBooleanBlockSourceOperator(BlockFactory blockFactory, Stream<Boolean> values) {
+        this(blockFactory, values.toList());
+    }
+
     public SequenceBooleanBlockSourceOperator(BlockFactory blockFactory, List<Boolean> values) {
         this(blockFactory, values, DEFAULT_MAX_PAGE_POSITIONS);
     }

+ 249 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_sample.csv-spec

@@ -0,0 +1,249 @@
+// Tests focused on the SAMPLE aggregation function
+// Note: this tests only basic behavior, because of the non-deterministic
+// behavior of SAMPLE and limitations of the CSV tests.
+
+
+documentation
+required_capability: agg_sample
+
+// tag::doc[]
+FROM employees
+| STATS sample = SAMPLE(gender, 5)
+// end::doc[]
+// Hardcode the sample values to work around the limitations of the CSV tests in the 
+// presence of randomness, and be able to specify an expected result for the docs.
+| EVAL sample = ["F", "M", "M", "F", "M"]
+;
+
+// tag::doc-result[]
+sample:keyword
+[F, M, M, F, M]
+// end::doc-result[]
+;
+
+
+sample size
+required_capability: agg_sample
+
+FROM employees 
+| STATS sample_boolean  = SAMPLE(still_hired,    1),
+        sample_datetime = SAMPLE(hire_date,      2),
+        sample_double   = SAMPLE(height,         3),
+        sample_integer  = SAMPLE(emp_no,         4),
+        sample_keyword  = SAMPLE(first_name,     5),
+        sample_long     = SAMPLE(languages.long, 6)
+| EVAL  count_boolean   = MV_COUNT(sample_boolean),
+        count_datetime  = MV_COUNT(sample_datetime),
+        count_double    = MV_COUNT(sample_double),
+        count_integer   = MV_COUNT(sample_integer),
+        count_keyword   = MV_COUNT(sample_keyword),
+        count_long      = MV_COUNT(sample_long)
+| KEEP count_*
+;
+        
+count_boolean:integer | count_datetime:integer | count_double:integer | count_integer:integer | count_keyword:integer | count_long:integer
+1                     | 2                      | 3                    | 4                     | 5                     | 6
+;
+
+
+sample values (boolean, datetime, double, integer, keyword, long)
+required_capability: agg_sample
+
+FROM employees
+| SORT emp_no
+| LIMIT 3
+| STATS sample_boolean  = MV_SORT(SAMPLE(still_hired,    99)),
+        sample_datetime = MV_SORT(SAMPLE(hire_date,      99)),
+        sample_double   = MV_SORT(SAMPLE(height,         99)),
+        sample_integer  = MV_SORT(SAMPLE(emp_no,         99)),
+        sample_keyword  = MV_SORT(SAMPLE(first_name,     99)),
+        sample_long     = MV_SORT(SAMPLE(languages.long, 99))
+;
+        
+sample_boolean:boolean | sample_datetime:datetime                                                       | sample_double:double | sample_integer:integer | sample_keyword:keyword   | sample_long:long
+[false, true, true]    | [1985-11-21T00:00:00.000Z, 1986-06-26T00:00:00.000Z, 1986-08-28T00:00:00.000Z] | [1.83, 2.03, 2.08]   | [10001, 10002, 10003]  | [Bezalel, Georgi, Parto] | [2, 4, 5] 
+;
+
+
+multivalued
+required_capability: agg_sample
+
+FROM mv_sample_data 
+| STATS sample = SAMPLE(message, 20)
+| EVAL sample = MV_SORT(sample)
+;
+
+sample:keyword
+[Banana, Banana, Banana, Banana, Banana, Banana, Banana, Connected to 10.1.0.1, Connected to 10.1.0.2, Connected to 10.1.0.3, Connection error, Connection error, Connection error, Disconnected]
+;
+
+
+some null input
+required_capability: agg_sample
+
+FROM employees
+| SORT emp_no
+| LIMIT 15
+| STATS sample = MV_SORT(SAMPLE(gender, 999)) 
+;
+
+sample:keyword
+[F, F, F, F, M, M, M, M, M]
+;
+
+
+some null output
+required_capability: agg_sample
+
+FROM employees
+| WHERE emp_no >= 10008 AND emp_no <= 10011
+| STATS sample = SAMPLE(gender, 1) BY emp_no
+| SORT emp_no
+;
+
+sample:keyword | emp_no:integer
+M              | 10008
+F              | 10009
+null           | 10010
+null           | 10011
+;
+
+
+stats by
+required_capability: agg_sample
+
+FROM employees 
+| STATS sample_keyword = MV_SORT(SAMPLE(gender, 999)),
+        sample_integer = MV_SORT(SAMPLE(salary, 999)) BY job_positions 
+| SORT job_positions
+;
+
+sample_keyword:keyword                                       | sample_integer:integer                                                                                                                       | job_positions:keyword     
+[F, F, F, F, F, M, M, M, M, M, M, M, M, M, M, M]             | [25976, 31897, 35742, 37691, 39356, 39728, 39878, 43026, 43602, 47411, 47896, 48942, 50128, 57305, 58121, 61358, 66817, 74970]               | Accountant                
+[F, F, F, F, F, F, F, M, M, M, M]                            | [28941, 30404, 31120, 37716, 42716, 43889, 44307, 44817, 45797, 54518, 62233, 62405, 69904]                                                  | Architect                 
+[F, F, F, F, M, M, M, M, M, M, M]                            | [29175, 30404, 35742, 36051, 37853, 39638, 39878, 40612, 41933, 50249, 58121]                                                                | Business Analyst          
+[F, M, M, M, M, M, M, M, M, M, M]                            | [25945, 29175, 31897, 34341, 37137, 39878, 42716, 48233, 50249, 56415, 58715, 67492, 74999]                                                  | Data Scientist            
+[F, F, M, M, M, M]                                           | [25324, 27215, 36174, 37137, 39110, 48942, 49281, 50064, 56415, 58715]                                                                       | Head Human Resources      
+[F, F, F, F, F, F, M, M, M, M, M, M, M, M, M]                | [26436, 30404, 31897, 32272, 39356, 43906, 44817, 46595, 48233, 49281, 50064, 50128, 56415, 66174, 69904]                                    | Internship                
+[F, F, F, F, F, F, F, M, M, M, M, M]                         | [25324, 25976, 30404, 32272, 32568, 41933, 43026, 43602, 43906, 50064, 56760, 62233, 64675, 74970]                                           | Junior Developer          
+[F, F, F, F, F, F, M, M, M, M, M, M, M, M, M, M, M, M, M]    | [25324, 28035, 32568, 36051, 37112, 38376, 39728, 42716, 44307, 45656, 49818, 50064, 50249, 52044, 60335, 65367, 66817, 69904, 74970, 74999] | Principal Support Engineer
+[F, F, F, F, F, F, M, M, M, M, M, M, M, M]                   | [32568, 33956, 37716, 41933, 43906, 44307, 45656, 45797, 47896, 49095, 51956, 58121, 58715, 61358, 62233, 68431, 73717, 74970]               | Purchase Manager          
+[F, F, F, M, M, M, M, M, M, M, M, M]                         | [27215, 32568, 34341, 35222, 36051, 38645, 38992, 39356, 39878, 48233, 54518, 61358, 65030]                                                  | Python Developer          
+[F, M, M, M, M, M, M, M, M, M]                               | [28336, 31120, 36174, 37137, 38645, 39638, 40612, 43026, 43889, 45656, 45797, 48233, 48735, 61358, 71165]                                    | Reporting Analyst         
+[F, F, F, F, F, F, F, M, M, M, M, M, M, M, M, M, M, M, M, M] | [25945, 31897, 35222, 35742, 37691, 37716, 37853, 38992, 43906, 49281, 52833, 57305, 60781, 62233, 62405, 66174, 66817, 68547, 73851, 74999] | Senior Python Developer   
+[F, F, F, F, F, F, F, M, M, M, M, M, M, M]                   | [29175, 31120, 33370, 37716, 40612, 42716, 44307, 44817, 49095, 54518, 56371, 56415, 60335, 65030, 67492]                                    | Senior Team Lead          
+[F, F, F, F, M, M, M, M, M, M]                               | [25324, 34341, 35222, 36174, 39728, 41933, 43026, 47896, 49281, 54462, 60408]                                                                | Support Engineer          
+[F, F, F, F, M, M, M, M, M, M, M, M, M]                      | [31120, 35742, 36174, 37691, 39356, 39638, 39728, 40031, 45656, 45797, 52044, 54518, 60335, 67492, 71165]                                    | Tech Lead                 
+[F, F, F, F, M, M, M, M, M, M, M]                            | [32263, 37702, 44956, 52121, 54329, 55360, 61805, 63528, 70011, 73578, 74572]                                                                | null                      
+;
+
+
+multiple samples are different
+required_capability: agg_sample
+
+FROM employees
+| STATS sample1 = MV_SORT(SAMPLE(last_name, 50)),
+        sample2 = MV_SORT(SAMPLE(last_name, 50))
+| EVAL samples = MV_ZIP(sample1, sample2, "|")
+| KEEP samples
+| MV_EXPAND samples
+| EVAL tokens = SPLIT(samples, "|"),
+       token_different = MV_SLICE(tokens, 0) != MV_SLICE(tokens, 1)
+| WHERE token_different == true
+| STATS token_different_count = COUNT()
+| EVAL samples_different = token_different_count > 0
+| KEEP samples_different
+;
+
+samples_different:boolean
+true
+;
+
+
+sample cartesian_point
+required_capability: agg_sample
+ 
+FROM airports_web | SORT abbrev | LIMIT 3 | STATS sample = SAMPLE(location, 999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:cartesian_point
+[POINT (809321.6344269889 1006514.3393965173), POINT (-1.1868515102256078E7 4170563.5012235222), POINT (-437732.64923689933 585738.5549131387)]
+;
+
+
+sample cartesian_shape
+required_capability: agg_sample
+ 
+FROM cartesian_multipolygons | SORT id | LIMIT 1 | STATS sample = SAMPLE(shape, 999) | MV_EXPAND sample
+;
+
+sample:cartesian_shape
+MULTIPOLYGON (((0.0 0.0, 1.0 0.0, 1.0 1.0, 0.0 1.0, 0.0 0.0)),((2.0 0.0, 3.0 0.0, 3.0 1.0, 2.0 1.0, 2.0 0.0)),((2.0 2.0, 3.0 2.0, 3.0 3.0, 2.0 3.0, 2.0 2.0)),((0.0 2.0, 1.0 2.0, 1.0 3.0, 0.0 3.0, 0.0 2.0)))
+;
+
+
+sample date_nanos
+required_capability: agg_sample
+
+FROM date_nanos | STATS sample = SAMPLE(nanos,999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:date_nanos
+[2023-01-23T13:55:01.543123456Z, 2023-02-23T13:33:34.937193Z, 2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z, 2023-10-23T12:15:03.360103847Z, 2023-10-23T12:15:03.360103847Z, 2023-10-23T12:27:28.948Z, 2023-10-23T13:33:34.937193Z, 2023-10-23T13:51:54.732102837Z, 2023-10-23T13:52:55.015787878Z, 2023-10-23T13:53:55.832987654Z, 2023-10-23T13:55:01.543123456Z]
+;
+
+
+sample geo_point
+required_capability: agg_sample
+ 
+FROM airports | SORT abbrev | LIMIT 2 | STATS sample = SAMPLE(location, 999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:geo_point
+[POINT (-106.6166851616 35.0491578018276), POINT (-3.93221929167636 5.2543984451492)]
+;
+
+
+sample geo_shape
+required_capability: agg_sample
+ 
+FROM countries_bbox | SORT id | LIMIT 1 | STATS sample = SAMPLE(shape, 999)
+;
+
+sample:geo_shape
+BBOX (-70.059664, -69.874864, 12.627773, 12.411109)
+;
+
+
+sample ip
+required_capability: agg_sample
+
+FROM k8s | SORT @timestamp | LIMIT 5 | STATS sample = SAMPLE(client.ip,999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:ip
+[10.10.20.30, 10.10.20.30, 10.10.20.31, 10.10.20.34, 10.10.20.34]
+;
+
+
+sample text
+required_capability: agg_sample
+
+FROM books | SORT book_no | LIMIT 3 | STATS sample = SAMPLE(title,999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:keyword
+[Realms of Tolkien: Images of Middle-earth, Selected Passages from Correspondence with Friends, The brothers Karamazov]
+;
+
+
+
+sample version
+required_capability: agg_sample
+
+FROM apps | STATS sample = SAMPLE(version,999) | EVAL sample = MV_SORT(sample)
+;
+
+sample:version
+[1, 1.2.3.4, 1.2.3.4, 1.11.0, 2.1, 2.3.4, 2.12.0, 5.2.9-SNAPSHOT, 5.2.9, 5.2.9, 5.2.9, bad]
+;

+ 2 - 0
x-pack/plugin/esql/src/main/generated-src/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceBooleanEvaluator.java

@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.esql.expression.function.scalar.nulls;
 
+// begin generated imports
 import org.elasticsearch.compute.data.Block;
 import org.elasticsearch.compute.data.BooleanBlock;
 import org.elasticsearch.compute.data.Page;
@@ -20,6 +21,7 @@ import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
 
 import java.util.List;
 import java.util.stream.IntStream;
+// end generated imports
 
 /**
  * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Coalesce}.

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä