2 months ago · ec7f77becb
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
@@ -191,7 +191,7 @@ public class AggregatorBenchmark {
 
				                 new BlockHash.GroupSpec(2, ElementType.BYTES_REF)
			
 
				             );
			
 
				             case TOP_N_LONGS -> List.of(
			
 
				-                new BlockHash.GroupSpec(0, ElementType.LONG, false, new BlockHash.TopNDef(0, true, true, TOP_N_LIMIT))
			
 
				+                new BlockHash.GroupSpec(0, ElementType.LONG, null, new BlockHash.TopNDef(0, true, true, TOP_N_LIMIT))
			
 
				             );
			
 
				             default -> throw new IllegalArgumentException("unsupported grouping [" + grouping + "]");
			
 
				         };
			
--- a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md
@@ -0,0 +1,13 @@
 
				+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
			
 
				+
			
 
				+**Supported function named parameters**
			
 
				+
			
 
				+`output_format`
			
 
				+:   (boolean) The output format of the categories. Defaults to regex.
			
 
				+
			
 
				+`similarity_threshold`
			
 
				+:   (boolean) The minimum percentage of token weight that must match for text to be added to the category bucket. Must be between 1 and 100. The larger the value the narrower the categories. Larger values will increase memory usage and create narrower categories. Defaults to 70.
			
 
				+
			
 
				+`analyzer`
			
 
				+:   (keyword) Analyzer used to convert the field into tokens for text categorization.
			
 
				+
			
--- a/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md
@@ -19,5 +19,8 @@
 
				 :::{include} ../types/categorize.md
			
 
				 :::
			
 
				 
			
 
				+:::{include} ../functionNamedParams/categorize.md
			
 
				+:::
			
 
				+
			
 
				 :::{include} ../examples/categorize.md
			
 
				 :::
			
--- a/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md
@@ -5,3 +5,6 @@
 
				 `field`
			
 
				 :   Expression to categorize
			
 
				 
			
 
				+`options`
			
 
				+:   (Optional) Categorize additional options as [function named parameters](/reference/query-languages/esql/esql-syntax.md#esql-function-named-params).
			
 
				+
			
--- a/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md
@@ -2,8 +2,8 @@
 
				 
			
 
				 **Supported types**
			
 
				 
			
 
				-| field | result |
			
 
				-| --- | --- |
			
 
				-| keyword | keyword |
			
 
				-| text | keyword |
			
 
				+| field | options | result |
			
 
				+| --- | --- | --- |
			
 
				+| keyword | | keyword |
			
 
				+| text | | keyword |
			
 
				 
			
--- a/docs/reference/query-languages/esql/images/functions/categorize.svg
+++ b/docs/reference/query-languages/esql/images/functions/categorize.svg
@@ -1 +1 @@
 
				-<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="324" height="46" viewbox="0 0 324 46"><defs><style type="text/css">.c{fill:none;stroke:#222222;}.k{fill:#000000;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}.s{fill:#e4f4ff;stroke:#222222;}.syn{fill:#8D8D8D;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}</style></defs><path class="c" d="M0 31h5m140 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="140" height="36"/><text class="k" x="15" y="31">CATEGORIZE</text><rect class="s" x="155" y="5" width="32" height="36" rx="7"/><text class="syn" x="165" y="31">(</text><rect class="s" x="197" y="5" width="80" height="36" rx="7"/><text class="k" x="207" y="31">field</text><rect class="s" x="287" y="5" width="32" height="36" rx="7"/><text class="syn" x="297" y="31">)</text></svg>
			
 
				+<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="520" height="61" viewbox="0 0 520 61"><defs><style type="text/css">.c{fill:none;stroke:#222222;}.k{fill:#000000;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}.s{fill:#e4f4ff;stroke:#222222;}.syn{fill:#8D8D8D;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}</style></defs><path class="c" d="M0 31h5m140 0h10m32 0h10m80 0h10m32 0h30m104 0h20m-139 0q5 0 5 5v10q0 5 5 5h114q5 0 5-5v-10q0-5 5-5m5 0h10m32 0h5"/><rect class="s" x="5" y="5" width="140" height="36"/><text class="k" x="15" y="31">CATEGORIZE</text><rect class="s" x="155" y="5" width="32" height="36" rx="7"/><text class="syn" x="165" y="31">(</text><rect class="s" x="197" y="5" width="80" height="36" rx="7"/><text class="k" x="207" y="31">field</text><rect class="s" x="287" y="5" width="32" height="36" rx="7"/><text class="syn" x="297" y="31">,</text><rect class="s" x="349" y="5" width="104" height="36" rx="7"/><text class="k" x="359" y="31">options</text><rect class="s" x="483" y="5" width="32" height="36" rx="7"/><text class="syn" x="493" y="31">)</text></svg>
			
--- a/server/src/main/java/org/elasticsearch/TransportVersions.java
+++ b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -340,6 +340,7 @@ public class TransportVersions {
 
				     public static final TransportVersion ESQL_FIXED_INDEX_LIKE = def(9_119_0_00);
			
 
				     public static final TransportVersion LOOKUP_JOIN_CCS = def(9_120_0_00);
			
 
				     public static final TransportVersion NODE_USAGE_STATS_FOR_THREAD_POOLS_IN_CLUSTER_INFO = def(9_121_0_00);
			
 
				+    public static final TransportVersion ESQL_CATEGORIZE_OPTIONS = def(9_122_0_00);
			
 
				 
			
 
				     /*
			
 
				      * STOP! READ THIS FIRST! No, really,
			
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
@@ -128,16 +128,26 @@ public abstract class BlockHash implements Releasable, SeenGroupIds {
 
				     public record TopNDef(int order, boolean asc, boolean nullsFirst, int limit) {}
			
 
				 
			
 
				     /**
			
 
				-     * @param isCategorize Whether this group is a CATEGORIZE() or not.
			
 
				-     *                     May be changed in the future when more stateful grouping functions are added.
			
 
				+     * Configuration for a BlockHash group spec that is doing text categorization.
			
 
				      */
			
 
				-    public record GroupSpec(int channel, ElementType elementType, boolean isCategorize, @Nullable TopNDef topNDef) {
			
 
				+    public record CategorizeDef(String analyzer, OutputFormat outputFormat, int similarityThreshold) {
			
 
				+        public enum OutputFormat {
			
 
				+            REGEX,
			
 
				+            TOKENS
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public record GroupSpec(int channel, ElementType elementType, @Nullable CategorizeDef categorizeDef, @Nullable TopNDef topNDef) {
			
 
				         public GroupSpec(int channel, ElementType elementType) {
			
 
				-            this(channel, elementType, false, null);
			
 
				+            this(channel, elementType, null, null);
			
 
				+        }
			
 
				+
			
 
				+        public GroupSpec(int channel, ElementType elementType, CategorizeDef categorizeDef) {
			
 
				+            this(channel, elementType, categorizeDef, null);
			
 
				         }
			
 
				 
			
 
				-        public GroupSpec(int channel, ElementType elementType, boolean isCategorize) {
			
 
				-            this(channel, elementType, isCategorize, null);
			
 
				+        public boolean isCategorize() {
			
 
				+            return categorizeDef != null;
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -207,7 +217,13 @@ public abstract class BlockHash implements Releasable, SeenGroupIds {
 
				         int emitBatchSize
			
 
				     ) {
			
 
				         if (groups.size() == 1) {
			
 
				-            return new CategorizeBlockHash(blockFactory, groups.get(0).channel, aggregatorMode, analysisRegistry);
			
 
				+            return new CategorizeBlockHash(
			
 
				+                blockFactory,
			
 
				+                groups.get(0).channel,
			
 
				+                aggregatorMode,
			
 
				+                groups.get(0).categorizeDef,
			
 
				+                analysisRegistry
			
 
				+            );
			
 
				         } else {
			
 
				             assert groups.get(0).isCategorize();
			
 
				             assert groups.subList(1, groups.size()).stream().noneMatch(GroupSpec::isCategorize);
			
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java
@@ -18,7 +18,6 @@ import org.elasticsearch.common.util.BitArray;
 
				 import org.elasticsearch.common.util.BytesRefHash;
			
 
				 import org.elasticsearch.compute.aggregation.AggregatorMode;
			
 
				 import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
			
 
				-import org.elasticsearch.compute.aggregation.SeenGroupIds;
			
 
				 import org.elasticsearch.compute.data.Block;
			
 
				 import org.elasticsearch.compute.data.BlockFactory;
			
 
				 import org.elasticsearch.compute.data.BytesRefBlock;
			
@@ -47,12 +46,13 @@ import java.util.Objects;
 
				  */
			
 
				 public class CategorizeBlockHash extends BlockHash {
			
 
				 
			
 
				-    private static final CategorizationAnalyzerConfig ANALYZER_CONFIG = CategorizationAnalyzerConfig
			
 
				+    private static final CategorizationAnalyzerConfig DEFAULT_ANALYZER_CONFIG = CategorizationAnalyzerConfig
			
 
				         .buildStandardEsqlCategorizationAnalyzer();
			
 
				     private static final int NULL_ORD = 0;
			
 
				 
			
 
				     private final int channel;
			
 
				     private final AggregatorMode aggregatorMode;
			
 
				+    private final CategorizeDef categorizeDef;
			
 
				     private final TokenListCategorizer.CloseableTokenListCategorizer categorizer;
			
 
				     private final CategorizeEvaluator evaluator;
			
 
				 
			
@@ -64,28 +64,38 @@ public class CategorizeBlockHash extends BlockHash {
 
				      */
			
 
				     private boolean seenNull = false;
			
 
				 
			
 
				-    CategorizeBlockHash(BlockFactory blockFactory, int channel, AggregatorMode aggregatorMode, AnalysisRegistry analysisRegistry) {
			
 
				+    CategorizeBlockHash(
			
 
				+        BlockFactory blockFactory,
			
 
				+        int channel,
			
 
				+        AggregatorMode aggregatorMode,
			
 
				+        CategorizeDef categorizeDef,
			
 
				+        AnalysisRegistry analysisRegistry
			
 
				+    ) {
			
 
				         super(blockFactory);
			
 
				 
			
 
				         this.channel = channel;
			
 
				         this.aggregatorMode = aggregatorMode;
			
 
				+        this.categorizeDef = categorizeDef;
			
 
				 
			
 
				         this.categorizer = new TokenListCategorizer.CloseableTokenListCategorizer(
			
 
				             new CategorizationBytesRefHash(new BytesRefHash(2048, blockFactory.bigArrays())),
			
 
				             CategorizationPartOfSpeechDictionary.getInstance(),
			
 
				-            0.70f
			
 
				+            categorizeDef.similarityThreshold() / 100.0f
			
 
				         );
			
 
				 
			
 
				         if (aggregatorMode.isInputPartial() == false) {
			
 
				-            CategorizationAnalyzer analyzer;
			
 
				+            CategorizationAnalyzer categorizationAnalyzer;
			
 
				             try {
			
 
				                 Objects.requireNonNull(analysisRegistry);
			
 
				-                analyzer = new CategorizationAnalyzer(analysisRegistry, ANALYZER_CONFIG);
			
 
				-            } catch (Exception e) {
			
 
				+                CategorizationAnalyzerConfig config = categorizeDef.analyzer() == null
			
 
				+                    ? DEFAULT_ANALYZER_CONFIG
			
 
				+                    : new CategorizationAnalyzerConfig.Builder().setAnalyzer(categorizeDef.analyzer()).build();
			
 
				+                categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, config);
			
 
				+            } catch (IOException e) {
			
 
				                 categorizer.close();
			
 
				                 throw new RuntimeException(e);
			
 
				             }
			
 
				-            this.evaluator = new CategorizeEvaluator(analyzer);
			
 
				+            this.evaluator = new CategorizeEvaluator(categorizationAnalyzer);
			
 
				         } else {
			
 
				             this.evaluator = null;
			
 
				         }
			
@@ -114,7 +124,7 @@ public class CategorizeBlockHash extends BlockHash {
 
				 
			
 
				     @Override
			
 
				     public BitArray seenGroupIds(BigArrays bigArrays) {
			
 
				-        return new SeenGroupIds.Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays);
			
 
				+        return new Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays);
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -222,7 +232,7 @@ public class CategorizeBlockHash extends BlockHash {
 
				             try (BytesRefBlock.Builder result = blockFactory.newBytesRefBlockBuilder(categorizer.getCategoryCount())) {
			
 
				                 result.appendNull();
			
 
				                 for (SerializableTokenListCategory category : categorizer.toCategoriesById()) {
			
 
				-                    scratch.copyChars(category.getRegex());
			
 
				+                    scratch.copyChars(getKeyString(category));
			
 
				                     result.appendBytesRef(scratch.get());
			
 
				                     scratch.clear();
			
 
				                 }
			
@@ -232,7 +242,7 @@ public class CategorizeBlockHash extends BlockHash {
 
				 
			
 
				         try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) {
			
 
				             for (SerializableTokenListCategory category : categorizer.toCategoriesById()) {
			
 
				-                scratch.copyChars(category.getRegex());
			
 
				+                scratch.copyChars(getKeyString(category));
			
 
				                 result.appendBytesRef(scratch.get());
			
 
				                 scratch.clear();
			
 
				             }
			
@@ -240,6 +250,13 @@ public class CategorizeBlockHash extends BlockHash {
 
				         }
			
 
				     }
			
 
				 
			
 
				+    private String getKeyString(SerializableTokenListCategory category) {
			
 
				+        return switch (categorizeDef.outputFormat()) {
			
 
				+            case REGEX -> category.getRegex();
			
 
				+            case TOKENS -> category.getKeyTokensString();
			
 
				+        };
			
 
				+    }
			
 
				+
			
 
				     /**
			
 
				      * Similar implementation to an Evaluator.
			
 
				      */
			
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
@@ -56,6 +56,8 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
 
				         int emitBatchSize
			
 
				     ) {
			
 
				         super(blockFactory);
			
 
				+        assert specs.get(0).categorizeDef() != null;
			
 
				+
			
 
				         this.specs = specs;
			
 
				         this.aggregatorMode = aggregatorMode;
			
 
				         blocks = new Block[specs.size()];
			
@@ -68,7 +70,13 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
 
				 
			
 
				         boolean success = false;
			
 
				         try {
			
 
				-            categorizeBlockHash = new CategorizeBlockHash(blockFactory, specs.get(0).channel(), aggregatorMode, analysisRegistry);
			
 
				+            categorizeBlockHash = new CategorizeBlockHash(
			
 
				+                blockFactory,
			
 
				+                specs.get(0).channel(),
			
 
				+                aggregatorMode,
			
 
				+                specs.get(0).categorizeDef(),
			
 
				+                analysisRegistry
			
 
				+            );
			
 
				             packedValuesBlockHash = new PackedValuesBlockHash(delegateSpecs, blockFactory, emitBatchSize);
			
 
				             success = true;
			
 
				         } finally {
			
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java
@@ -76,7 +76,13 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				         ).getAnalysisRegistry();
			
 
				     }
			
 
				 
			
 
				+    private BlockHash.CategorizeDef getCategorizeDef() {
			
 
				+        return new BlockHash.CategorizeDef(null, randomFrom(BlockHash.CategorizeDef.OutputFormat.values()), 70);
			
 
				+    }
			
 
				+
			
 
				     public void testCategorizeRaw() {
			
 
				+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
			
 
				+
			
 
				         final Page page;
			
 
				         boolean withNull = randomBoolean();
			
 
				         final int positions = 7 + (withNull ? 1 : 0);
			
@@ -98,7 +104,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             page = new Page(builder.build());
			
 
				         }
			
 
				 
			
 
				-        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, analysisRegistry)) {
			
 
				+        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, categorizeDef, analysisRegistry)) {
			
 
				             for (int i = randomInt(2); i < 3; i++) {
			
 
				                 hash.add(page, new GroupingAggregatorFunction.AddInput() {
			
 
				                     private void addBlock(int positionOffset, IntBlock groupIds) {
			
@@ -137,7 +143,10 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				                     }
			
 
				                 });
			
 
				 
			
 
				-                assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
			
 
				+                switch (categorizeDef.outputFormat()) {
			
 
				+                    case REGEX -> assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
			
 
				+                    case TOKENS -> assertHashState(hash, withNull, "Connected to", "Connection error", "Disconnected");
			
 
				+                }
			
 
				             }
			
 
				         } finally {
			
 
				             page.releaseBlocks();
			
@@ -145,6 +154,8 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				     }
			
 
				 
			
 
				     public void testCategorizeRawMultivalue() {
			
 
				+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
			
 
				+
			
 
				         final Page page;
			
 
				         boolean withNull = randomBoolean();
			
 
				         final int positions = 3 + (withNull ? 1 : 0);
			
@@ -170,7 +181,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             page = new Page(builder.build());
			
 
				         }
			
 
				 
			
 
				-        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, analysisRegistry)) {
			
 
				+        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, categorizeDef, analysisRegistry)) {
			
 
				             for (int i = randomInt(2); i < 3; i++) {
			
 
				                 hash.add(page, new GroupingAggregatorFunction.AddInput() {
			
 
				                     private void addBlock(int positionOffset, IntBlock groupIds) {
			
@@ -216,7 +227,10 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				                     }
			
 
				                 });
			
 
				 
			
 
				-                assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
			
 
				+                switch (categorizeDef.outputFormat()) {
			
 
				+                    case REGEX -> assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
			
 
				+                    case TOKENS -> assertHashState(hash, withNull, "Connected to", "Connection error", "Disconnected");
			
 
				+                }
			
 
				             }
			
 
				         } finally {
			
 
				             page.releaseBlocks();
			
@@ -224,6 +238,8 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				     }
			
 
				 
			
 
				     public void testCategorizeIntermediate() {
			
 
				+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
			
 
				+
			
 
				         Page page1;
			
 
				         boolean withNull = randomBoolean();
			
 
				         int positions1 = 7 + (withNull ? 1 : 0);
			
@@ -259,8 +275,8 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				 
			
 
				         // Fill intermediatePages with the intermediate state from the raw hashes
			
 
				         try (
			
 
				-            BlockHash rawHash1 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry);
			
 
				-            BlockHash rawHash2 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry);
			
 
				+            BlockHash rawHash1 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, categorizeDef, analysisRegistry);
			
 
				+            BlockHash rawHash2 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, categorizeDef, analysisRegistry);
			
 
				         ) {
			
 
				             rawHash1.add(page1, new GroupingAggregatorFunction.AddInput() {
			
 
				                 private void addBlock(int positionOffset, IntBlock groupIds) {
			
@@ -335,7 +351,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             page2.releaseBlocks();
			
 
				         }
			
 
				 
			
 
				-        try (var intermediateHash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.FINAL, null)) {
			
 
				+        try (var intermediateHash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.FINAL, categorizeDef, null)) {
			
 
				             intermediateHash.add(intermediatePage1, new GroupingAggregatorFunction.AddInput() {
			
 
				                 private void addBlock(int positionOffset, IntBlock groupIds) {
			
 
				                     List<Integer> values = IntStream.range(0, groupIds.getPositionCount())
			
@@ -403,14 +419,24 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				                     }
			
 
				                 });
			
 
				 
			
 
				-                assertHashState(
			
 
				-                    intermediateHash,
			
 
				-                    withNull,
			
 
				-                    ".*?Connected.+?to.*?",
			
 
				-                    ".*?Connection.+?error.*?",
			
 
				-                    ".*?Disconnected.*?",
			
 
				-                    ".*?System.+?shutdown.*?"
			
 
				-                );
			
 
				+                switch (categorizeDef.outputFormat()) {
			
 
				+                    case REGEX -> assertHashState(
			
 
				+                        intermediateHash,
			
 
				+                        withNull,
			
 
				+                        ".*?Connected.+?to.*?",
			
 
				+                        ".*?Connection.+?error.*?",
			
 
				+                        ".*?Disconnected.*?",
			
 
				+                        ".*?System.+?shutdown.*?"
			
 
				+                    );
			
 
				+                    case TOKENS -> assertHashState(
			
 
				+                        intermediateHash,
			
 
				+                        withNull,
			
 
				+                        "Connected to",
			
 
				+                        "Connection error",
			
 
				+                        "Disconnected",
			
 
				+                        "System shutdown"
			
 
				+                    );
			
 
				+                }
			
 
				             }
			
 
				         } finally {
			
 
				             intermediatePage1.releaseBlocks();
			
@@ -419,6 +445,9 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				     }
			
 
				 
			
 
				     public void testCategorize_withDriver() {
			
 
				+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
			
 
				+        BlockHash.GroupSpec groupSpec = new BlockHash.GroupSpec(0, ElementType.BYTES_REF, categorizeDef);
			
 
				+
			
 
				         BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking();
			
 
				         CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
			
 
				         DriverContext driverContext = new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
			
@@ -477,7 +506,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             new LocalSourceOperator(input1),
			
 
				             List.of(
			
 
				                 new HashAggregationOperator.HashAggregationOperatorFactory(
			
 
				-                    List.of(makeGroupSpec()),
			
 
				+                    List.of(groupSpec),
			
 
				                     AggregatorMode.INITIAL,
			
 
				                     List.of(
			
 
				                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.INITIAL, List.of(1)),
			
@@ -496,7 +525,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             new LocalSourceOperator(input2),
			
 
				             List.of(
			
 
				                 new HashAggregationOperator.HashAggregationOperatorFactory(
			
 
				-                    List.of(makeGroupSpec()),
			
 
				+                    List.of(groupSpec),
			
 
				                     AggregatorMode.INITIAL,
			
 
				                     List.of(
			
 
				                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.INITIAL, List.of(1)),
			
@@ -517,7 +546,7 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             new CannedSourceOperator(intermediateOutput.iterator()),
			
 
				             List.of(
			
 
				                 new HashAggregationOperator.HashAggregationOperatorFactory(
			
 
				-                    List.of(makeGroupSpec()),
			
 
				+                    List.of(groupSpec),
			
 
				                     AggregatorMode.FINAL,
			
 
				                     List.of(
			
 
				                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.FINAL, List.of(1, 2)),
			
@@ -544,23 +573,36 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             sums.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputSums.getLong(i));
			
 
				             maxs.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputMaxs.getLong(i));
			
 
				         }
			
 
				+        List<String> keys = switch (categorizeDef.outputFormat()) {
			
 
				+            case REGEX -> List.of(
			
 
				+                ".*?aaazz.*?",
			
 
				+                ".*?bbbzz.*?",
			
 
				+                ".*?ccczz.*?",
			
 
				+                ".*?dddzz.*?",
			
 
				+                ".*?eeezz.*?",
			
 
				+                ".*?words.+?words.+?words.+?goodbye.*?",
			
 
				+                ".*?words.+?words.+?words.+?hello.*?"
			
 
				+            );
			
 
				+            case TOKENS -> List.of("aaazz", "bbbzz", "ccczz", "dddzz", "eeezz", "words words words goodbye", "words words words hello");
			
 
				+        };
			
 
				+
			
 
				         assertThat(
			
 
				             sums,
			
 
				             equalTo(
			
 
				                 Map.of(
			
 
				-                    ".*?aaazz.*?",
			
 
				+                    keys.get(0),
			
 
				                     1L,
			
 
				-                    ".*?bbbzz.*?",
			
 
				+                    keys.get(1),
			
 
				                     2L,
			
 
				-                    ".*?ccczz.*?",
			
 
				+                    keys.get(2),
			
 
				                     33L,
			
 
				-                    ".*?dddzz.*?",
			
 
				+                    keys.get(3),
			
 
				                     44L,
			
 
				-                    ".*?eeezz.*?",
			
 
				+                    keys.get(4),
			
 
				                     5L,
			
 
				-                    ".*?words.+?words.+?words.+?goodbye.*?",
			
 
				+                    keys.get(5),
			
 
				                     8888L,
			
 
				-                    ".*?words.+?words.+?words.+?hello.*?",
			
 
				+                    keys.get(6),
			
 
				                     999L
			
 
				                 )
			
 
				             )
			
@@ -569,19 +611,19 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				             maxs,
			
 
				             equalTo(
			
 
				                 Map.of(
			
 
				-                    ".*?aaazz.*?",
			
 
				+                    keys.get(0),
			
 
				                     1L,
			
 
				-                    ".*?bbbzz.*?",
			
 
				+                    keys.get(1),
			
 
				                     2L,
			
 
				-                    ".*?ccczz.*?",
			
 
				+                    keys.get(2),
			
 
				                     30L,
			
 
				-                    ".*?dddzz.*?",
			
 
				+                    keys.get(3),
			
 
				                     40L,
			
 
				-                    ".*?eeezz.*?",
			
 
				+                    keys.get(4),
			
 
				                     5L,
			
 
				-                    ".*?words.+?words.+?words.+?goodbye.*?",
			
 
				+                    keys.get(5),
			
 
				                     8000L,
			
 
				-                    ".*?words.+?words.+?words.+?hello.*?",
			
 
				+                    keys.get(6),
			
 
				                     900L
			
 
				                 )
			
 
				             )
			
@@ -589,10 +631,6 @@ public class CategorizeBlockHashTests extends BlockHashTestCase {
 
				         Releasables.close(() -> Iterators.map(finalOutput.iterator(), (Page p) -> p::releaseBlocks));
			
 
				     }
			
 
				 
			
 
				-    private BlockHash.GroupSpec makeGroupSpec() {
			
 
				-        return new BlockHash.GroupSpec(0, ElementType.BYTES_REF, true);
			
 
				-    }
			
 
				-
			
 
				     private void assertHashState(CategorizeBlockHash hash, boolean withNull, String... expectedKeys) {
			
 
				         // Check the keys
			
 
				         Block[] blocks = null;
			
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java
@@ -74,10 +74,15 @@ public class CategorizePackedValuesBlockHashTests extends BlockHashTestCase {
 
				         DriverContext driverContext = new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
			
 
				         boolean withNull = randomBoolean();
			
 
				         boolean withMultivalues = randomBoolean();
			
 
				+        BlockHash.CategorizeDef categorizeDef = new BlockHash.CategorizeDef(
			
 
				+            null,
			
 
				+            randomFrom(BlockHash.CategorizeDef.OutputFormat.values()),
			
 
				+            70
			
 
				+        );
			
 
				 
			
 
				         List<BlockHash.GroupSpec> groupSpecs = List.of(
			
 
				-            new BlockHash.GroupSpec(0, ElementType.BYTES_REF, true),
			
 
				-            new BlockHash.GroupSpec(1, ElementType.INT, false)
			
 
				+            new BlockHash.GroupSpec(0, ElementType.BYTES_REF, categorizeDef),
			
 
				+            new BlockHash.GroupSpec(1, ElementType.INT, null)
			
 
				         );
			
 
				 
			
 
				         LocalSourceOperator.BlockSupplier input1 = () -> {
			
@@ -218,8 +223,12 @@ public class CategorizePackedValuesBlockHashTests extends BlockHashTestCase {
 
				         }
			
 
				         Releasables.close(() -> Iterators.map(finalOutput.iterator(), (Page p) -> p::releaseBlocks));
			
 
				 
			
 
				+        List<String> keys = switch (categorizeDef.outputFormat()) {
			
 
				+            case REGEX -> List.of(".*?connected.+?to.*?", ".*?connection.+?error.*?", ".*?disconnected.*?");
			
 
				+            case TOKENS -> List.of("connected to", "connection error", "disconnected");
			
 
				+        };
			
 
				         Map<String, Map<Integer, Set<String>>> expectedResult = Map.of(
			
 
				-            ".*?connected.+?to.*?",
			
 
				+            keys.get(0),
			
 
				             Map.of(
			
 
				                 7,
			
 
				                 Set.of("connected to 1.1.1", "connected to 1.1.2", "connected to 1.1.4", "connected to 2.1.2"),
			
@@ -228,9 +237,9 @@ public class CategorizePackedValuesBlockHashTests extends BlockHashTestCase {
 
				                 111,
			
 
				                 Set.of("connected to 2.1.1")
			
 
				             ),
			
 
				-            ".*?connection.+?error.*?",
			
 
				+            keys.get(1),
			
 
				             Map.of(7, Set.of("connection error"), 42, Set.of("connection error")),
			
 
				-            ".*?disconnected.*?",
			
 
				+            keys.get(2),
			
 
				             Map.of(7, Set.of("disconnected"))
			
 
				         );
			
 
				         if (withNull) {
			
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java
@@ -363,7 +363,7 @@ public class TopNBlockHashTests extends BlockHashTestCase {
 
				     private BlockHash buildBlockHash(int emitBatchSize, Block... values) {
			
 
				         List<BlockHash.GroupSpec> specs = new ArrayList<>(values.length);
			
 
				         for (int c = 0; c < values.length; c++) {
			
 
				-            specs.add(new BlockHash.GroupSpec(c, values[c].elementType(), false, topNDef(c)));
			
 
				+            specs.add(new BlockHash.GroupSpec(c, values[c].elementType(), null, topNDef(c)));
			
 
				         }
			
 
				         assert forcePackedHash == false : "Packed TopN hash not implemented yet";
			
 
				         /*return forcePackedHash
			
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java
@@ -113,7 +113,7 @@ public class HashAggregationOperatorTests extends ForkingOperatorTestCase {
 
				 
			
 
				         try (
			
 
				             var operator = new HashAggregationOperator.HashAggregationOperatorFactory(
			
 
				-                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, false, new BlockHash.TopNDef(0, ascOrder, false, 3))),
			
 
				+                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, null, new BlockHash.TopNDef(0, ascOrder, false, 3))),
			
 
				                 mode,
			
 
				                 List.of(
			
 
				                     new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(mode, aggregatorChannels),
			
@@ -190,7 +190,7 @@ public class HashAggregationOperatorTests extends ForkingOperatorTestCase {
 
				 
			
 
				         try (
			
 
				             var operator = new HashAggregationOperator.HashAggregationOperatorFactory(
			
 
				-                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, false, new BlockHash.TopNDef(0, ascOrder, true, 3))),
			
 
				+                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, null, new BlockHash.TopNDef(0, ascOrder, true, 3))),
			
 
				                 mode,
			
 
				                 List.of(
			
 
				                     new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(mode, aggregatorChannels),
			
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
@@ -397,7 +397,7 @@ FROM sample_data
 
				 ;
			
 
				 
			
 
				 COUNT():long | SUM(event_duration):long | category:keyword
			
 
				-           7 |                 23231327 |  null
			
 
				+           7 |                 23231327 | null
			
 
				 ;
			
 
				 
			
 
				 on null row
			
@@ -800,3 +800,82 @@ COUNT():long | VALUES(str):keyword | category:keyword | str:keyword
 
				            1 | [a, b, c]           | null             | b
			
 
				            1 | [a, b, c]           | null             | c
			
 
				 ;
			
 
				+
			
 
				+with option output_format regex
			
 
				+required_capability: categorize_options
			
 
				+
			
 
				+FROM sample_data
			
 
				+  | STATS count=COUNT()
			
 
				+       BY category=CATEGORIZE(message, {"output_format": "regex"})
			
 
				+  | SORT count DESC, category
			
 
				+;
			
 
				+
			
 
				+count:long | category:keyword
			
 
				+         3 | .*?Connected.+?to.*?
			
 
				+         3 | .*?Connection.+?error.*?
			
 
				+         1 |  .*?Disconnected.*?
			
 
				+;
			
 
				+
			
 
				+with option output_format tokens
			
 
				+required_capability: categorize_options
			
 
				+
			
 
				+FROM sample_data
			
 
				+  | STATS count=COUNT()
			
 
				+       BY category=CATEGORIZE(message, {"output_format": "tokens"})
			
 
				+  | SORT count DESC, category
			
 
				+;
			
 
				+
			
 
				+count:long | category:keyword
			
 
				+         3 | Connected to
			
 
				+         3 | Connection error
			
 
				+         1 | Disconnected
			
 
				+;
			
 
				+
			
 
				+with option similarity_threshold
			
 
				+required_capability: categorize_options
			
 
				+
			
 
				+FROM sample_data
			
 
				+  | STATS count=COUNT()
			
 
				+       BY category=CATEGORIZE(message, {"similarity_threshold": 99})
			
 
				+  | SORT count DESC, category
			
 
				+;
			
 
				+
			
 
				+count:long | category:keyword
			
 
				+3          | .*?Connection.+?error.*?
			
 
				+1          | .*?Connected.+?to.+?10\.1\.0\.1.*?
			
 
				+1          | .*?Connected.+?to.+?10\.1\.0\.2.*?
			
 
				+1          | .*?Connected.+?to.+?10\.1\.0\.3.*?
			
 
				+1          | .*?Disconnected.*?
			
 
				+;
			
 
				+
			
 
				+with option analyzer
			
 
				+required_capability: categorize_options
			
 
				+
			
 
				+FROM sample_data
			
 
				+  | STATS count=COUNT()
			
 
				+       BY category=CATEGORIZE(message, {"analyzer": "stop"})
			
 
				+  | SORT count DESC, category
			
 
				+;
			
 
				+
			
 
				+count:long | category:keyword
			
 
				+3          | .*?connected.*?
			
 
				+3          | .*?connection.+?error.*?
			
 
				+1          | .*?disconnected.*?
			
 
				+;
			
 
				+
			
 
				+with all options
			
 
				+required_capability: categorize_options
			
 
				+
			
 
				+FROM sample_data
			
 
				+  | STATS count=COUNT()
			
 
				+       BY category=CATEGORIZE(message, {"analyzer": "whitespace", "similarity_threshold": 100, "output_format": "tokens"})
			
 
				+  | SORT count DESC, category
			
 
				+;
			
 
				+
			
 
				+count:long | category:keyword
			
 
				+3          | Connection error
			
 
				+1          | Connected to 10.1.0.1
			
 
				+1          | Connected to 10.1.0.2
			
 
				+1          | Connected to 10.1.0.3
			
 
				+1          | Disconnected
			
 
				+;
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1254,10 +1254,12 @@ public class EsqlCapabilities {
 
				          * FUSE command
			
 
				          */
			
 
				         FUSE(Build.current().isSnapshot()),
			
 
				+
			
 
				         /**
			
 
				          * Support improved behavior for LIKE operator when used with index fields.
			
 
				          */
			
 
				         LIKE_ON_INDEX_FIELDS,
			
 
				+
			
 
				         /**
			
 
				          * Support avg with aggregate metric doubles
			
 
				          */
			
@@ -1274,10 +1276,15 @@ public class EsqlCapabilities {
 
				          */
			
 
				         FAIL_IF_ALL_SHARDS_FAIL(Build.current().isSnapshot()),
			
 
				 
			
 
				-        /*
			
 
				+        /**
			
 
				          * Cosine vector similarity function
			
 
				          */
			
 
				-        COSINE_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot());
			
 
				+        COSINE_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()),
			
 
				+
			
 
				+        /**
			
 
				+         * Support for the options field of CATEGORIZE.
			
 
				+         */
			
 
				+        CATEGORIZE_OPTIONS;
			
 
				 
			
 
				         private final boolean enabled;
			
 
				 
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java
@@ -0,0 +1,107 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.esql.expression.function;
			
 
				+
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.Expression;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.Literal;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
			
 
				+import org.elasticsearch.xpack.esql.core.tree.Source;
			
 
				+import org.elasticsearch.xpack.esql.core.type.DataType;
			
 
				+import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
			
 
				+
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+import java.util.function.Consumer;
			
 
				+
			
 
				+import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
			
 
				+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
			
 
				+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
			
 
				+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
			
 
				+
			
 
				+public class Options {
			
 
				+
			
 
				+    public static Expression.TypeResolution resolve(
			
 
				+        Expression options,
			
 
				+        Source source,
			
 
				+        TypeResolutions.ParamOrdinal paramOrdinal,
			
 
				+        Map<String, DataType> allowedOptions
			
 
				+    ) {
			
 
				+        return resolve(options, source, paramOrdinal, allowedOptions, null);
			
 
				+    }
			
 
				+
			
 
				+    public static Expression.TypeResolution resolve(
			
 
				+        Expression options,
			
 
				+        Source source,
			
 
				+        TypeResolutions.ParamOrdinal paramOrdinal,
			
 
				+        Map<String, DataType> allowedOptions,
			
 
				+        Consumer<Map<String, Object>> verifyOptions
			
 
				+    ) {
			
 
				+        if (options != null) {
			
 
				+            Expression.TypeResolution resolution = isNotNull(options, source.text(), paramOrdinal);
			
 
				+            if (resolution.unresolved()) {
			
 
				+                return resolution;
			
 
				+            }
			
 
				+            // MapExpression does not have a DataType associated with it
			
 
				+            resolution = isMapExpression(options, source.text(), paramOrdinal);
			
 
				+            if (resolution.unresolved()) {
			
 
				+                return resolution;
			
 
				+            }
			
 
				+            try {
			
 
				+                Map<String, Object> optionsMap = new HashMap<>();
			
 
				+                populateMap((MapExpression) options, optionsMap, source, paramOrdinal, allowedOptions);
			
 
				+                if (verifyOptions != null) {
			
 
				+                    verifyOptions.accept(optionsMap);
			
 
				+                }
			
 
				+            } catch (InvalidArgumentException e) {
			
 
				+                return new Expression.TypeResolution(e.getMessage());
			
 
				+            }
			
 
				+        }
			
 
				+        return Expression.TypeResolution.TYPE_RESOLVED;
			
 
				+    }
			
 
				+
			
 
				+    public static void populateMap(
			
 
				+        final MapExpression options,
			
 
				+        final Map<String, Object> optionsMap,
			
 
				+        final Source source,
			
 
				+        final TypeResolutions.ParamOrdinal paramOrdinal,
			
 
				+        final Map<String, DataType> allowedOptions
			
 
				+    ) throws InvalidArgumentException {
			
 
				+        for (EntryExpression entry : options.entryExpressions()) {
			
 
				+            Expression optionExpr = entry.key();
			
 
				+            Expression valueExpr = entry.value();
			
 
				+            Expression.TypeResolution resolution = isFoldable(optionExpr, source.text(), paramOrdinal).and(
			
 
				+                isFoldable(valueExpr, source.text(), paramOrdinal)
			
 
				+            );
			
 
				+            if (resolution.unresolved()) {
			
 
				+                throw new InvalidArgumentException(resolution.message());
			
 
				+            }
			
 
				+            Object optionExprLiteral = ((Literal) optionExpr).value();
			
 
				+            Object valueExprLiteral = ((Literal) valueExpr).value();
			
 
				+            String optionName = optionExprLiteral instanceof BytesRef br ? br.utf8ToString() : optionExprLiteral.toString();
			
 
				+            String optionValue = valueExprLiteral instanceof BytesRef br ? br.utf8ToString() : valueExprLiteral.toString();
			
 
				+            // validate the optionExpr is supported
			
 
				+            DataType dataType = allowedOptions.get(optionName);
			
 
				+            if (dataType == null) {
			
 
				+                throw new InvalidArgumentException(
			
 
				+                    format(null, "Invalid option [{}] in [{}], expected one of {}", optionName, source.text(), allowedOptions.keySet())
			
 
				+                );
			
 
				+            }
			
 
				+            try {
			
 
				+                optionsMap.put(optionName, DataTypeConverter.convert(optionValue, dataType));
			
 
				+            } catch (InvalidArgumentException e) {
			
 
				+                throw new InvalidArgumentException(
			
 
				+                    format(null, "Invalid option [{}] in [{}], {}", optionName, source.text(), e.getMessage())
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
@@ -7,7 +7,6 @@
 
				 
			
 
				 package org.elasticsearch.xpack.esql.expression.function.fulltext;
			
 
				 
			
 
				-import org.apache.lucene.util.BytesRef;
			
 
				 import org.elasticsearch.common.lucene.BytesRefs;
			
 
				 import org.elasticsearch.compute.lucene.LuceneQueryEvaluator.ShardConfig;
			
 
				 import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator;
			
@@ -20,20 +19,15 @@ import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
 
				 import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
			
 
				 import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
			
 
				 import org.elasticsearch.xpack.esql.common.Failures;
			
 
				-import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
			
 
				-import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Expression;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.FoldContext;
			
 
				-import org.elasticsearch.xpack.esql.core.expression.Literal;
			
 
				-import org.elasticsearch.xpack.esql.core.expression.MapExpression;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Nullability;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.function.Function;
			
 
				 import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
			
 
				 import org.elasticsearch.xpack.esql.core.tree.Source;
			
 
				 import org.elasticsearch.xpack.esql.core.type.DataType;
			
 
				-import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
			
 
				 import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
			
 
				 import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
			
@@ -55,17 +49,12 @@ import org.elasticsearch.xpack.esql.score.ExpressionScoreMapper;
 
				 import java.util.ArrayList;
			
 
				 import java.util.List;
			
 
				 import java.util.Locale;
			
 
				-import java.util.Map;
			
 
				 import java.util.Objects;
			
 
				 import java.util.function.BiConsumer;
			
 
				 import java.util.function.Predicate;
			
 
				 
			
 
				-import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
			
 
				 import static org.elasticsearch.xpack.esql.common.Failure.fail;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
			
 
				-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
			
 
				-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
			
 
				-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
			
 
				 
			
@@ -409,66 +398,6 @@ public abstract class FullTextFunction extends Function
 
				         return new LuceneQueryScoreEvaluator.Factory(shardConfigs);
			
 
				     }
			
 
				 
			
 
				-    protected static void populateOptionsMap(
			
 
				-        final MapExpression options,
			
 
				-        final Map<String, Object> optionsMap,
			
 
				-        final TypeResolutions.ParamOrdinal paramOrdinal,
			
 
				-        final String sourceText,
			
 
				-        final Map<String, DataType> allowedOptions
			
 
				-    ) throws InvalidArgumentException {
			
 
				-        for (EntryExpression entry : options.entryExpressions()) {
			
 
				-            Expression optionExpr = entry.key();
			
 
				-            Expression valueExpr = entry.value();
			
 
				-            TypeResolution resolution = isFoldable(optionExpr, sourceText, paramOrdinal).and(
			
 
				-                isFoldable(valueExpr, sourceText, paramOrdinal)
			
 
				-            );
			
 
				-            if (resolution.unresolved()) {
			
 
				-                throw new InvalidArgumentException(resolution.message());
			
 
				-            }
			
 
				-            Object optionExprLiteral = ((Literal) optionExpr).value();
			
 
				-            Object valueExprLiteral = ((Literal) valueExpr).value();
			
 
				-            String optionName = optionExprLiteral instanceof BytesRef br ? br.utf8ToString() : optionExprLiteral.toString();
			
 
				-            String optionValue = valueExprLiteral instanceof BytesRef br ? br.utf8ToString() : valueExprLiteral.toString();
			
 
				-            // validate the optionExpr is supported
			
 
				-            DataType dataType = allowedOptions.get(optionName);
			
 
				-            if (dataType == null) {
			
 
				-                throw new InvalidArgumentException(
			
 
				-                    format(null, "Invalid option [{}] in [{}], expected one of {}", optionName, sourceText, allowedOptions.keySet())
			
 
				-                );
			
 
				-            }
			
 
				-            try {
			
 
				-                optionsMap.put(optionName, DataTypeConverter.convert(optionValue, dataType));
			
 
				-            } catch (InvalidArgumentException e) {
			
 
				-                throw new InvalidArgumentException(format(null, "Invalid option [{}] in [{}], {}", optionName, sourceText, e.getMessage()));
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    protected TypeResolution resolveOptions(Expression options, TypeResolutions.ParamOrdinal paramOrdinal) {
			
 
				-        if (options != null) {
			
 
				-            TypeResolution resolution = isNotNull(options, sourceText(), paramOrdinal);
			
 
				-            if (resolution.unresolved()) {
			
 
				-                return resolution;
			
 
				-            }
			
 
				-            // MapExpression does not have a DataType associated with it
			
 
				-            resolution = isMapExpression(options, sourceText(), paramOrdinal);
			
 
				-            if (resolution.unresolved()) {
			
 
				-                return resolution;
			
 
				-            }
			
 
				-
			
 
				-            try {
			
 
				-                resolvedOptions();
			
 
				-            } catch (InvalidArgumentException e) {
			
 
				-                return new TypeResolution(e.getMessage());
			
 
				-            }
			
 
				-        }
			
 
				-        return TypeResolution.TYPE_RESOLVED;
			
 
				-    }
			
 
				-
			
 
				-    protected Map<String, Object> resolvedOptions() throws InvalidArgumentException {
			
 
				-        return Map.of();
			
 
				-    }
			
 
				-
			
 
				     // TODO: this should likely be replaced by calls to FieldAttribute#fieldName; the MultiTypeEsField case looks
			
 
				     // wrong if `fieldAttribute` is a subfield, e.g. `parent.child` - multiTypeEsField#getName will just return `child`.
			
 
				     public static String getNameFromFieldAttribute(FieldAttribute fieldAttribute) {
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java
@@ -33,6 +33,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecyc
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
			
 
				 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
			
@@ -298,7 +299,9 @@ public class Match extends FullTextFunction implements OptionalArgument, PostAna
 
				 
			
 
				     @Override
			
 
				     protected TypeResolution resolveParams() {
			
 
				-        return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD)).and(checkParamCompatibility());
			
 
				+        return resolveField().and(resolveQuery())
			
 
				+            .and(Options.resolve(options(), source(), THIRD, ALLOWED_OPTIONS))
			
 
				+            .and(checkParamCompatibility());
			
 
				     }
			
 
				 
			
 
				     private TypeResolution resolveField() {
			
@@ -342,11 +345,6 @@ public class Match extends FullTextFunction implements OptionalArgument, PostAna
 
				         return new TypeResolution(formatIncompatibleTypesMessage(fieldType, queryType, sourceText()));
			
 
				     }
			
 
				 
			
 
				-    @Override
			
 
				-    protected Map<String, Object> resolvedOptions() {
			
 
				-        return matchQueryOptions();
			
 
				-    }
			
 
				-
			
 
				     private Map<String, Object> matchQueryOptions() throws InvalidArgumentException {
			
 
				         if (options() == null) {
			
 
				             return Map.of(LENIENT_FIELD.getPreferredName(), true);
			
@@ -356,7 +354,7 @@ public class Match extends FullTextFunction implements OptionalArgument, PostAna
 
				         // Match is lenient by default to avoid failing on incompatible types
			
 
				         matchOptions.put(LENIENT_FIELD.getPreferredName(), true);
			
 
				 
			
 
				-        populateOptionsMap((MapExpression) options(), matchOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
			
 
				+        Options.populateMap((MapExpression) options(), matchOptions, source(), SECOND, ALLOWED_OPTIONS);
			
 
				         return matchOptions;
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java
@@ -30,6 +30,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecyc
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
			
 
				 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
			
@@ -187,7 +188,7 @@ public class MatchPhrase extends FullTextFunction implements OptionalArgument, P
 
				 
			
 
				     @Override
			
 
				     protected TypeResolution resolveParams() {
			
 
				-        return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD));
			
 
				+        return resolveField().and(resolveQuery()).and(Options.resolve(options(), source(), THIRD, ALLOWED_OPTIONS));
			
 
				     }
			
 
				 
			
 
				     private TypeResolution resolveField() {
			
@@ -200,18 +201,13 @@ public class MatchPhrase extends FullTextFunction implements OptionalArgument, P
 
				         );
			
 
				     }
			
 
				 
			
 
				-    @Override
			
 
				-    protected Map<String, Object> resolvedOptions() throws InvalidArgumentException {
			
 
				-        return matchPhraseQueryOptions();
			
 
				-    }
			
 
				-
			
 
				     private Map<String, Object> matchPhraseQueryOptions() throws InvalidArgumentException {
			
 
				         if (options() == null) {
			
 
				             return Map.of();
			
 
				         }
			
 
				 
			
 
				         Map<String, Object> matchPhraseOptions = new HashMap<>();
			
 
				-        populateOptionsMap((MapExpression) options(), matchPhraseOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
			
 
				+        Options.populateMap((MapExpression) options(), matchPhraseOptions, source(), SECOND, ALLOWED_OPTIONS);
			
 
				         return matchPhraseOptions;
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java
@@ -29,6 +29,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecyc
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
			
 
				 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
			
@@ -368,7 +369,7 @@ public class MultiMatch extends FullTextFunction implements OptionalArgument, Po
 
				             return options;
			
 
				         }
			
 
				 
			
 
				-        Match.populateOptionsMap((MapExpression) options(), options, THIRD, sourceText(), OPTIONS);
			
 
				+        Options.populateMap((MapExpression) options(), options, source(), THIRD, OPTIONS);
			
 
				         return options;
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java
@@ -26,6 +26,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecyc
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
			
 
				 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
			
@@ -321,18 +322,13 @@ public class QueryString extends FullTextFunction implements OptionalArgument {
 
				         }
			
 
				 
			
 
				         Map<String, Object> matchOptions = new HashMap<>();
			
 
				-        populateOptionsMap((MapExpression) options(), matchOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
			
 
				+        Options.populateMap((MapExpression) options(), matchOptions, source(), SECOND, ALLOWED_OPTIONS);
			
 
				         return matchOptions;
			
 
				     }
			
 
				 
			
 
				-    @Override
			
 
				-    protected Map<String, Object> resolvedOptions() {
			
 
				-        return queryStringOptions();
			
 
				-    }
			
 
				-
			
 
				     @Override
			
 
				     protected TypeResolution resolveParams() {
			
 
				-        return resolveQuery().and(resolveOptions(options(), SECOND));
			
 
				+        return resolveQuery().and(Options.resolve(options(), source(), SECOND, ALLOWED_OPTIONS));
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
@@ -7,13 +7,18 @@
 
				 
			
 
				 package org.elasticsearch.xpack.esql.expression.function.grouping;
			
 
				 
			
 
				+import org.elasticsearch.TransportVersions;
			
 
				 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
			
 
				 import org.elasticsearch.common.io.stream.StreamInput;
			
 
				 import org.elasticsearch.common.io.stream.StreamOutput;
			
 
				+import org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef;
			
 
				+import org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef.OutputFormat;
			
 
				 import org.elasticsearch.license.XPackLicenseState;
			
 
				 import org.elasticsearch.xpack.esql.LicenseAware;
			
 
				 import org.elasticsearch.xpack.esql.SupportsObservabilityTier;
			
 
				+import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Expression;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Nullability;
			
 
				 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
			
 
				 import org.elasticsearch.xpack.esql.core.tree.Source;
			
@@ -21,16 +26,29 @@ import org.elasticsearch.xpack.esql.core.type.DataType;
 
				 import org.elasticsearch.xpack.esql.expression.function.Example;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionType;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
			
 
				 import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.HashMap;
			
 
				 import java.util.List;
			
 
				+import java.util.Locale;
			
 
				+import java.util.Map;
			
 
				+import java.util.TreeMap;
			
 
				 
			
 
				+import static java.util.Map.entry;
			
 
				+import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
			
 
				+import static org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef.OutputFormat.REGEX;
			
 
				 import static org.elasticsearch.xpack.esql.SupportsObservabilityTier.ObservabilityTier.COMPLETE;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
			
 
				+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
			
 
				+import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
			
 
				+import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
			
 
				 
			
 
				 /**
			
 
				  * Categorizes text messages.
			
@@ -42,14 +60,23 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isStr
 
				  * </p>
			
 
				  */
			
 
				 @SupportsObservabilityTier(tier = COMPLETE)
			
 
				-public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction implements LicenseAware {
			
 
				+public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction implements OptionalArgument, LicenseAware {
			
 
				     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
			
 
				         Expression.class,
			
 
				         "Categorize",
			
 
				         Categorize::new
			
 
				     );
			
 
				 
			
 
				+    private static final String ANALYZER = "analyzer";
			
 
				+    private static final String OUTPUT_FORMAT = "output_format";
			
 
				+    private static final String SIMILARITY_THRESHOLD = "similarity_threshold";
			
 
				+
			
 
				+    private static final Map<String, DataType> ALLOWED_OPTIONS = new TreeMap<>(
			
 
				+        Map.ofEntries(entry(ANALYZER, KEYWORD), entry(OUTPUT_FORMAT, KEYWORD), entry(SIMILARITY_THRESHOLD, INTEGER))
			
 
				+    );
			
 
				+
			
 
				     private final Expression field;
			
 
				+    private final Expression options;
			
 
				 
			
 
				     @FunctionInfo(
			
 
				         returnType = "keyword",
			
@@ -70,21 +97,56 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
 
				     )
			
 
				     public Categorize(
			
 
				         Source source,
			
 
				-        @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field
			
 
				-
			
 
				+        @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field,
			
 
				+        @MapParam(
			
 
				+            name = "options",
			
 
				+            description = "(Optional) Categorize additional options as <<esql-function-named-params,function named parameters>>.",
			
 
				+            params = {
			
 
				+                @MapParam.MapParamEntry(
			
 
				+                    name = ANALYZER,
			
 
				+                    type = "keyword",
			
 
				+                    valueHint = { "standard" },
			
 
				+                    description = "Analyzer used to convert the field into tokens for text categorization."
			
 
				+                ),
			
 
				+                @MapParam.MapParamEntry(
			
 
				+                    name = OUTPUT_FORMAT,
			
 
				+                    type = "keyword",
			
 
				+                    valueHint = { "regex", "tokens" },
			
 
				+                    description = "The output format of the categories. Defaults to regex."
			
 
				+                ),
			
 
				+                @MapParam.MapParamEntry(
			
 
				+                    name = SIMILARITY_THRESHOLD,
			
 
				+                    type = "integer",
			
 
				+                    valueHint = { "70" },
			
 
				+                    description = "The minimum percentage of token weight that must match for text to be added to the category bucket. "
			
 
				+                        + "Must be between 1 and 100. The larger the value the narrower the categories. "
			
 
				+                        + "Larger values will increase memory usage and create narrower categories. Defaults to 70."
			
 
				+                ), },
			
 
				+            optional = true
			
 
				+        ) Expression options
			
 
				     ) {
			
 
				-        super(source, List.of(field));
			
 
				+        super(source, options == null ? List.of(field) : List.of(field, options));
			
 
				         this.field = field;
			
 
				+        this.options = options;
			
 
				     }
			
 
				 
			
 
				     private Categorize(StreamInput in) throws IOException {
			
 
				-        this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class));
			
 
				+        this(
			
 
				+            Source.readFrom((PlanStreamInput) in),
			
 
				+            in.readNamedWriteable(Expression.class),
			
 
				+            in.getTransportVersion().onOrAfter(TransportVersions.ESQL_CATEGORIZE_OPTIONS)
			
 
				+                ? in.readOptionalNamedWriteable(Expression.class)
			
 
				+                : null
			
 
				+        );
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     public void writeTo(StreamOutput out) throws IOException {
			
 
				         source().writeTo(out);
			
 
				         out.writeNamedWriteable(field);
			
 
				+        if (out.getTransportVersion().onOrAfter(TransportVersions.ESQL_CATEGORIZE_OPTIONS)) {
			
 
				+            out.writeOptionalNamedWriteable(options);
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -107,7 +169,48 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
 
				 
			
 
				     @Override
			
 
				     protected TypeResolution resolveType() {
			
 
				-        return isString(field(), sourceText(), DEFAULT);
			
 
				+        return isString(field(), sourceText(), DEFAULT).and(
			
 
				+            Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions)
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    private void verifyOptions(Map<String, Object> optionsMap) {
			
 
				+        if (options == null) {
			
 
				+            return;
			
 
				+        }
			
 
				+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
			
 
				+        if (similarityThreshold != null) {
			
 
				+            if (similarityThreshold <= 0 || similarityThreshold > 100) {
			
 
				+                throw new InvalidArgumentException(
			
 
				+                    format("invalid similarity threshold [{}], expecting a number between 1 and 100, inclusive", similarityThreshold)
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+        String outputFormat = (String) optionsMap.get(OUTPUT_FORMAT);
			
 
				+        if (outputFormat != null) {
			
 
				+            try {
			
 
				+                OutputFormat.valueOf(outputFormat.toUpperCase(Locale.ROOT));
			
 
				+            } catch (IllegalArgumentException e) {
			
 
				+                throw new InvalidArgumentException(
			
 
				+                    format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormat)
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public CategorizeDef categorizeDef() {
			
 
				+        Map<String, Object> optionsMap = new HashMap<>();
			
 
				+        if (options != null) {
			
 
				+            Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
			
 
				+        }
			
 
				+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
			
 
				+        String outputFormatString = (String) optionsMap.get(OUTPUT_FORMAT);
			
 
				+        OutputFormat outputFormat = outputFormatString == null ? null : OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
			
 
				+        return new CategorizeDef(
			
 
				+            (String) optionsMap.get("analyzer"),
			
 
				+            outputFormat == null ? REGEX : outputFormat,
			
 
				+            similarityThreshold == null ? 70 : similarityThreshold
			
 
				+        );
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -117,12 +220,12 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
 
				 
			
 
				     @Override
			
 
				     public Categorize replaceChildren(List<Expression> newChildren) {
			
 
				-        return new Categorize(source(), newChildren.get(0));
			
 
				+        return new Categorize(source(), newChildren.get(0), newChildren.size() > 1 ? newChildren.get(1) : null);
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     protected NodeInfo<? extends Expression> info() {
			
 
				-        return NodeInfo.create(this, Categorize::new, field);
			
 
				+        return NodeInfo.create(this, Categorize::new, field, options);
			
 
				     }
			
 
				 
			
 
				     public Expression field() {
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java
@@ -30,6 +30,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecyc
 
				 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.MapParam;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
			
 
				+import org.elasticsearch.xpack.esql.expression.function.Options;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.Param;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
			
@@ -53,10 +54,10 @@ import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD;
 
				 import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD;
			
 
				 import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
			
 
				+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
			
 
				-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
			
 
				 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
			
@@ -198,7 +199,7 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun
 
				 
			
 
				     @Override
			
 
				     protected TypeResolution resolveParams() {
			
 
				-        return resolveField().and(resolveQuery()).and(resolveK()).and(resolveOptions());
			
 
				+        return resolveField().and(resolveQuery()).and(resolveK()).and(Options.resolve(options(), source(), FOURTH, ALLOWED_OPTIONS));
			
 
				     }
			
 
				 
			
 
				     private TypeResolution resolveField() {
			
@@ -221,37 +222,6 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun
 
				             .and(isNotNull(k(), sourceText(), THIRD));
			
 
				     }
			
 
				 
			
 
				-    private TypeResolution resolveOptions() {
			
 
				-        if (options() != null) {
			
 
				-            TypeResolution resolution = isNotNull(options(), sourceText(), TypeResolutions.ParamOrdinal.FOURTH);
			
 
				-            if (resolution.unresolved()) {
			
 
				-                return resolution;
			
 
				-            }
			
 
				-            // MapExpression does not have a DataType associated with it
			
 
				-            resolution = isMapExpression(options(), sourceText(), TypeResolutions.ParamOrdinal.FOURTH);
			
 
				-            if (resolution.unresolved()) {
			
 
				-                return resolution;
			
 
				-            }
			
 
				-
			
 
				-            try {
			
 
				-                knnQueryOptions();
			
 
				-            } catch (InvalidArgumentException e) {
			
 
				-                return new TypeResolution(e.getMessage());
			
 
				-            }
			
 
				-        }
			
 
				-        return TypeResolution.TYPE_RESOLVED;
			
 
				-    }
			
 
				-
			
 
				-    private Map<String, Object> knnQueryOptions() throws InvalidArgumentException {
			
 
				-        if (options() == null) {
			
 
				-            return Map.of();
			
 
				-        }
			
 
				-
			
 
				-        Map<String, Object> matchOptions = new HashMap<>();
			
 
				-        populateOptionsMap((MapExpression) options(), matchOptions, TypeResolutions.ParamOrdinal.FOURTH, sourceText(), ALLOWED_OPTIONS);
			
 
				-        return matchOptions;
			
 
				-    }
			
 
				-
			
 
				     @Override
			
 
				     public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
			
 
				         return new Knn(source(), field(), query(), k(), options(), queryBuilder, filterExpressions());
			
@@ -307,7 +277,7 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun
 
				     private Map<String, Object> queryOptions() throws InvalidArgumentException {
			
 
				         Map<String, Object> options = new HashMap<>();
			
 
				         if (options() != null) {
			
 
				-            populateOptionsMap((MapExpression) options(), options, TypeResolutions.ParamOrdinal.FOURTH, sourceText(), ALLOWED_OPTIONS);
			
 
				+            Options.populateMap((MapExpression) options(), options, source(), FOURTH, ALLOWED_OPTIONS);
			
 
				         }
			
 
				         return options;
			
 
				     }
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
@@ -10,6 +10,7 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical;
 
				 import org.elasticsearch.xpack.esql.core.expression.Alias;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Attribute;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.Expression;
			
 
				+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
			
 
				 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
			
 
				 import org.elasticsearch.xpack.esql.core.util.Holder;
			
 
				 import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
			
@@ -137,13 +138,13 @@ public final class ReplaceAggregateNestedExpressionWithEval extends OptimizerRul
 
				         List<Expression> newChildren = new ArrayList<>(gf.children().size());
			
 
				 
			
 
				         for (Expression ex : gf.children()) {
			
 
				-            if (ex instanceof Attribute == false) { // TODO: foldables shouldn't require eval'ing either
			
 
				+            if (ex instanceof Attribute || ex instanceof MapExpression) {
			
 
				+                newChildren.add(ex);
			
 
				+            } else { // TODO: foldables shouldn't require eval'ing either
			
 
				                 var alias = new Alias(ex.source(), syntheticName(ex, gf, counter++), ex, null, true);
			
 
				                 evals.add(alias);
			
 
				                 newChildren.add(alias.toAttribute());
			
 
				                 childrenChanged = true;
			
 
				-            } else {
			
 
				-                newChildren.add(ex);
			
 
				             }
			
 
				         }
			
 
				 
			
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
@@ -343,8 +343,12 @@ public abstract class AbstractPhysicalOperationProviders implements PhysicalOper
 
				             if (channel == null) {
			
 
				                 throw new EsqlIllegalArgumentException("planned to use ordinals but tried to use the hash instead");
			
 
				             }
			
 
				-
			
 
				-            return new BlockHash.GroupSpec(channel, elementType(), Alias.unwrap(expression) instanceof Categorize, null);
			
 
				+            return new BlockHash.GroupSpec(
			
 
				+                channel,
			
 
				+                elementType(),
			
 
				+                Alias.unwrap(expression) instanceof Categorize categorize ? categorize.categorizeDef() : null,
			
 
				+                null
			
 
				+            );
			
 
				         }
			
 
				 
			
 
				         ElementType elementType() {
			
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -1972,6 +1972,57 @@ public class VerifierTests extends ESTestCase {
 
				         );
			
 
				     }
			
 
				 
			
 
				+    public void testCategorizeInvalidOptionsField() {
			
 
				+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
			
 
				+
			
 
				+        assertEquals(
			
 
				+            "1:31: second argument of [CATEGORIZE(last_name, first_name)] must be a map expression, received [first_name]",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, first_name)")
			
 
				+        );
			
 
				+        assertEquals(
			
 
				+            "1:31: Invalid option [blah] in [CATEGORIZE(last_name, { \"blah\": 42 })], "
			
 
				+                + "expected one of [analyzer, output_format, similarity_threshold]",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"blah\": 42 })")
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testCategorizeOptionOutputFormat() {
			
 
				+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
			
 
				+
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"regex\" })");
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"REGEX\" })");
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"tokens\" })");
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"ToKeNs\" })");
			
 
				+        assertEquals(
			
 
				+            "1:31: invalid output format [blah], expecting one of [REGEX, TOKENS]",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"blah\" })")
			
 
				+        );
			
 
				+        assertEquals(
			
 
				+            "1:31: invalid output format [42], expecting one of [REGEX, TOKENS]",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": 42 })")
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testCategorizeOptionSimilarityThreshold() {
			
 
				+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
			
 
				+
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 1 })");
			
 
				+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 100 })");
			
 
				+        assertEquals(
			
 
				+            "1:31: invalid similarity threshold [0], expecting a number between 1 and 100, inclusive",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 0 })")
			
 
				+        );
			
 
				+        assertEquals(
			
 
				+            "1:31: invalid similarity threshold [101], expecting a number between 1 and 100, inclusive",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 101 })")
			
 
				+        );
			
 
				+        assertEquals(
			
 
				+            "1:31: Invalid option [similarity_threshold] in [CATEGORIZE(last_name, { \"similarity_threshold\": \"blah\" })], "
			
 
				+                + "cannot cast [blah] to [integer]",
			
 
				+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": \"blah\" })")
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				     public void testChangePoint() {
			
 
				         assumeTrue("change_point must be enabled", EsqlCapabilities.Cap.CHANGE_POINT.isEnabled());
			
 
				         var airports = AnalyzerTestUtils.analyzer(loadMapping("mapping-airports.json", "airports"));
			
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java
@@ -27,7 +27,7 @@ public class CategorizeErrorTests extends ErrorsForCasesWithoutExamplesTestCase
 
				 
			
 
				     @Override
			
 
				     protected Expression build(Source source, List<Expression> args) {
			
 
				-        return new Categorize(source, args.get(0));
			
 
				+        return new Categorize(source, args.get(0), args.size() > 1 ? args.get(1) : null);
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java
@@ -61,7 +61,7 @@ public class CategorizeTests extends AbstractScalarFunctionTestCase {
 
				 
			
 
				     @Override
			
 
				     protected Expression build(Source source, List<Expression> args) {
			
 
				-        return new Categorize(source, args.get(0));
			
 
				+        return new Categorize(source, args.get(0), args.size() > 1 ? args.get(1) : null);
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java
@@ -269,7 +269,7 @@ public class FoldNullTests extends ESTestCase {
 
				     }
			
 
				 
			
 
				     public void testNullCategorizeGroupingNotFolded() {
			
 
				-        Categorize categorize = new Categorize(EMPTY, NULL);
			
 
				+        Categorize categorize = new Categorize(EMPTY, NULL, NULL);
			
 
				         assertEquals(categorize, foldNull(categorize));
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java
@@ -162,6 +162,13 @@ public class SerializableTokenListCategory implements Writeable {
 
				         return Arrays.stream(keyTokenIndexes).mapToObj(index -> baseTokens[index]).toArray(BytesRef[]::new);
			
 
				     }
			
 
				 
			
 
				+    public String getKeyTokensString() {
			
 
				+        return Arrays.stream(keyTokenIndexes)
			
 
				+            .mapToObj(index -> baseTokens[index])
			
 
				+            .map(BytesRef::utf8ToString)
			
 
				+            .collect(Collectors.joining(" "));
			
 
				+    }
			
 
				+
			
 
				     public String getRegex() {
			
 
				         if (keyTokenIndexes.length == 0 || orderedCommonTokenBeginIndex == orderedCommonTokenEndIndex) {
			
 
				             return ".*";