Browse Source

ES|QL categorize docs (#117827) (#117838)

* Move ES|QL categorize out of snapshot functions

* Categorize docs

* Add experimental + fix docs

* Add experimental + fix docs
Jan Kuipers 10 months ago
parent
commit
5b220ddc88

+ 1 - 1
docs/reference/esql/functions/description/categorize.asciidoc

@@ -2,4 +2,4 @@
 
 *Description*
 
-Categorizes text messages.
+Groups text messages into categories of similarly formatted text values.

+ 14 - 0
docs/reference/esql/functions/examples/categorize.asciidoc

@@ -0,0 +1,14 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Example*
+
+This example categorizes server logs messages into categories and aggregates their counts. 
+[source.merge.styled,esql]
+----
+include::{esql-specs}/docs.csv-spec[tag=docsCategorize]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/docs.csv-spec[tag=docsCategorize-result]
+|===
+

+ 2 - 0
docs/reference/esql/functions/grouping-functions.asciidoc

@@ -9,6 +9,8 @@ The <<esql-stats-by>> command supports these grouping functions:
 
 // tag::group_list[]
 * <<esql-bucket>>
+* experimental:[] <<esql-categorize>>
 // end::group_list[]
 
 include::layout/bucket.asciidoc[]
+include::layout/categorize.asciidoc[]

+ 6 - 3
docs/reference/esql/functions/kibana/definition/categorize.json

@@ -2,7 +2,7 @@
   "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
   "type" : "eval",
   "name" : "categorize",
-  "description" : "Categorizes text messages.",
+  "description" : "Groups text messages into categories of similarly formatted text values.",
   "signatures" : [
     {
       "params" : [
@@ -29,6 +29,9 @@
       "returnType" : "keyword"
     }
   ],
-  "preview" : false,
-  "snapshot_only" : true
+  "examples" : [
+    "FROM sample_data\n| STATS count=COUNT() BY category=CATEGORIZE(message)"
+  ],
+  "preview" : true,
+  "snapshot_only" : false
 }

+ 5 - 1
docs/reference/esql/functions/kibana/docs/categorize.md

@@ -3,5 +3,9 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
 -->
 
 ### CATEGORIZE
-Categorizes text messages.
+Groups text messages into categories of similarly formatted text values.
 
+```
+FROM sample_data
+| STATS count=COUNT() BY category=CATEGORIZE(message)
+```

+ 3 - 0
docs/reference/esql/functions/layout/categorize.asciidoc

@@ -4,6 +4,8 @@
 [[esql-categorize]]
 === `CATEGORIZE`
 
+preview::["Do not use on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."]
+
 *Syntax*
 
 [.text-center]
@@ -12,3 +14,4 @@ image::esql/functions/signature/categorize.svg[Embedded,opts=inline]
 include::../parameters/categorize.asciidoc[]
 include::../description/categorize.asciidoc[]
 include::../types/categorize.asciidoc[]
+include::../examples/categorize.asciidoc[]

+ 17 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec

@@ -676,3 +676,20 @@ Ahmedabad     | 9                 | 72
 Bangalore     | 9                 | 72
 // end::bitLength-result[]
 ;
+
+docsCategorize
+required_capability: categorize_v4
+// tag::docsCategorize[]
+FROM sample_data
+| STATS count=COUNT() BY category=CATEGORIZE(message)
+// end::docsCategorize[]
+| SORT category
+;
+
+// tag::docsCategorize-result[]
+count:long | category:keyword
+         3 | .*?Connected.+?to.*?
+         3 | .*?Connection.+?error.*?
+         1 | .*?Disconnected.*?
+// end::docsCategorize-result[]
+;

+ 3 - 2
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

@@ -265,7 +265,9 @@ public class EsqlFunctionRegistry {
     private static FunctionDefinition[][] functions() {
         return new FunctionDefinition[][] {
             // grouping functions
-            new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), },
+            new FunctionDefinition[] {
+                def(Bucket.class, Bucket::new, "bucket", "bin"),
+                def(Categorize.class, Categorize::new, "categorize") },
             // aggregate functions
             // since they declare two public constructors - one with filter (for nested where) and one without
             // use casting to disambiguate between the two
@@ -411,7 +413,6 @@ public class EsqlFunctionRegistry {
                 // The delay() function is for debug/snapshot environments only and should never be enabled in a non-snapshot build.
                 // This is an experimental function and can be removed without notice.
                 def(Delay.class, Delay::new, "delay"),
-                def(Categorize.class, Categorize::new, "categorize"),
                 def(Kql.class, Kql::new, "kql"),
                 def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } };
     }

+ 13 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java

@@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.Example;
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
@@ -44,10 +45,21 @@ public class Categorize extends GroupingFunction implements Validatable {
 
     private final Expression field;
 
-    @FunctionInfo(returnType = "keyword", description = "Categorizes text messages.")
+    @FunctionInfo(
+        returnType = "keyword",
+        description = "Groups text messages into categories of similarly formatted text values.",
+        examples = {
+            @Example(
+                file = "docs",
+                tag = "docsCategorize",
+                description = "This example categorizes server logs messages into categories and aggregates their counts. "
+            ) },
+        preview = true
+    )
     public Categorize(
         Source source,
         @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field
+
     ) {
         super(source, List.of(field));
         this.field = field;