Browse Source

[DOCS] Add ES|QL 'getting started' code snippets to CSV tests (#102653)

* [DOCS] Add ES|QL 'getting started' code snippets to CSV tests

* Change dots in columns names into underscores

* Add LIMIT 0 to ENRICH test

* Move code snippets out of docs.csv-spec

* Replace code snippets by includes

* Add missing semicolon
Abdon Pijpelink 1 year ago
parent
commit
89faf4497e

+ 20 - 50
docs/reference/esql/esql-get-started.asciidoc

@@ -39,7 +39,7 @@ This query returns up to 500 documents from the `sample_data` index:
 
 [source,esql]
 ----
-FROM sample_data
+include::{esql-specs}/docs.csv-spec[tag=gs-from]
 ----
 
 Each column corresponds to a field, and can be accessed by the name of that
@@ -52,7 +52,7 @@ previous one:
 
 [source,esql]
 ----
-from sample_data
+include::{esql-specs}/docs.csv-spec[tag=gs-from-lowercase]
 ----
 ====
 
@@ -73,8 +73,7 @@ that are returned, up to a maximum of 10,000 rows:
 
 [source,esql]
 ----
-FROM sample_data
-| LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-limit]
 ----
 
 [TIP]
@@ -84,7 +83,7 @@ have to. The following query is identical to the previous one:
 
 [source,esql]
 ----
-FROM sample_data | LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-limit-one-line]
 ----
 ====
 
@@ -100,8 +99,7 @@ sort rows on one or more columns:
 
 [source,esql]
 ----
-FROM sample_data
-| SORT @timestamp DESC
+include::{esql-specs}/docs.csv-spec[tag=gs-sort]
 ----
 
 [discrete]
@@ -113,16 +111,14 @@ events with a duration longer than 5ms:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE event.duration > 5000000
+include::{esql-specs}/where.csv-spec[tag=gs-where]
 ----
 
 `WHERE` supports several <<esql-operators,operators>>. For example, you can use <<esql-like-operator>> to run a wildcard query against the `message` column:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE message LIKE "Connected*"
+include::{esql-specs}/where-like.csv-spec[tag=gs-like]
 ----
 
 [discrete]
@@ -149,9 +145,7 @@ result set to 3 rows:
 
 [source,esql]
 ----
-FROM sample_data
-| SORT @timestamp DESC
-| LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-chaining]
 ----
 
 NOTE: The order of processing commands is important. First limiting the result
@@ -169,8 +163,7 @@ other words: `event.duration` converted from nanoseconds to milliseconds.
 
 [source,esql]
 ----
-FROM sample_data
-| EVAL duration_ms = event.duration / 1000000.0
+include::{esql-specs}/eval.csv-spec[tag=gs-eval]
 ----
 
 `EVAL` supports several <<esql-functions,functions>>. For example, to round a
@@ -179,8 +172,7 @@ number to the closest number with the specified number of digits, use the
 
 [source,esql]
 ----
-FROM sample_data
-| EVAL duration_ms = ROUND(event.duration / 1000000.0, 1)
+include::{esql-specs}/eval.csv-spec[tag=gs-round]
 ----
 
 [discrete]
@@ -193,16 +185,14 @@ example, the median duration:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration)
+include::{esql-specs}/stats.csv-spec[tag=gs-stats]
 ----
 
 You can calculate multiple stats with one command:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration), max_duration = MAX(event.duration)
+include::{esql-specs}/stats.csv-spec[tag=gs-two-stats]
 ----
 
 Use `BY` to group calculated stats by one or more columns. For example, to
@@ -210,8 +200,7 @@ calculate the median duration per client IP:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration) BY client.ip
+include::{esql-specs}/stats.csv-spec[tag=gs-stats-by]
 ----
 
 [discrete]
@@ -227,9 +216,7 @@ For example, to create hourly buckets for the data on October 23rd:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket]
 ----
 
 Combine `AUTO_BUCKET` with <<esql-stats-by>> to create a histogram. For example,
@@ -237,20 +224,14 @@ to count the number of events per hour:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, event.duration
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS COUNT(*) BY bucket
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket-stats-by]
 ----
 
 Or the median duration per hour:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, event.duration
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS median_duration = MEDIAN(event.duration) BY bucket
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket-stats-by-median]
 ----
 
 [discrete]
@@ -273,10 +254,7 @@ command:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, client.ip, event.duration
-| EVAL client.ip = TO_STRING(client.ip)
-| ENRICH clientip_policy ON client.ip WITH env
+include::{esql-specs}/enrich.csv-spec[tag=gs-enrich]
 ----
 
 You can use the new `env` column that's added by the `ENRICH` command in
@@ -285,11 +263,7 @@ environment:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, client.ip, event.duration
-| EVAL client.ip = TO_STRING(client.ip)
-| ENRICH clientip_policy ON client.ip WITH env
-| STATS median_duration = MEDIAN(event.duration) BY env
+include::{esql-specs}/enrich.csv-spec[tag=gs-enrich-stats-by]
 ----
 
 For more about data enrichment with {esql}, refer to <<esql-enrich-data>>.
@@ -321,8 +295,7 @@ string, you can use the following `DISSECT` command:
 
 [source,esql]
 ----
-FROM sample_data
-| DISSECT message "Connected to %{server.ip}"
+include::{esql-specs}/dissect.csv-spec[tag=gs-dissect]
 ----
 
 This adds a `server.ip` column to those rows that have a `message` that matches
@@ -334,10 +307,7 @@ has accepted:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE STARTS_WITH(message, "Connected to")
-| DISSECT message "Connected to %{server.ip}"
-| STATS COUNT(*) BY server.ip
+include::{esql-specs}/dissect.csv-spec[tag=gs-dissect-stats-by]
 ----
 
 For more about data processing with {esql}, refer to

+ 9 - 2
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

@@ -52,6 +52,8 @@ public class CsvTestsDataLoader {
     private static final TestsDataset APPS = new TestsDataset("apps", "mapping-apps.json", "apps.csv");
     private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv");
     private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv");
+    private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv");
+    private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv");
     private static final TestsDataset AIRPORTS = new TestsDataset("airports", "mapping-airports.json", "airports.csv");
     private static final TestsDataset AIRPORTS_WEB = new TestsDataset("airports_web", "mapping-airports_web.json", "airports_web.csv");
 
@@ -66,15 +68,20 @@ public class CsvTestsDataLoader {
         LANGUAGES,
         UL_LOGS.indexName,
         UL_LOGS,
+        SAMPLE_DATA.indexName,
+        SAMPLE_DATA,
+        CLIENT_IPS.indexName,
+        CLIENT_IPS,
         AIRPORTS.indexName,
         AIRPORTS,
         AIRPORTS_WEB.indexName,
         AIRPORTS_WEB
     );
 
-    private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enricy-policy-languages.json");
+    private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
+    private static final EnrichConfig CLIENT_IPS_ENRICH = new EnrichConfig("clientip_policy", "enrich-policy-clientips.json");
 
-    public static final List<EnrichConfig> ENRICH_POLICIES = List.of(LANGUAGES_ENRICH);
+    public static final List<EnrichConfig> ENRICH_POLICIES = List.of(LANGUAGES_ENRICH, CLIENT_IPS_ENRICH);
 
     /**
      * <p>

+ 6 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips.csv

@@ -0,0 +1,6 @@
+client_ip:keyword,env:keyword
+172.21.0.5,Development
+172.21.2.113,QA
+172.21.2.162,QA
+172.21.3.15,Production
+172.21.3.16,Production

+ 42 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec

@@ -725,3 +725,45 @@ birth_date:datetime
 1952-02-27T00:00:00.000Z
 1953-04-21T00:00:00.000Z
 ;
+
+docsGettingStartedAutoBucket
+// tag::gs-auto_bucket[]
+FROM sample_data
+| KEEP @timestamp
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+// end::gs-auto_bucket[]
+| LIMIT 0
+;
+
+@timestamp:date | bucket:date
+;
+
+docsGettingStartedAutoBucketStatsBy
+// tag::gs-auto_bucket-stats-by[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+| STATS COUNT(*) BY bucket
+// end::gs-auto_bucket-stats-by[]
+| SORT bucket
+;
+
+COUNT(*):long | bucket:date
+2              |2023-10-23T12:00:00.000Z
+5              |2023-10-23T13:00:00.000Z
+;
+
+docsGettingStartedAutoBucketStatsByMedian
+// tag::gs-auto_bucket-stats-by-median[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+| STATS median_duration = MEDIAN(event_duration) BY bucket
+// end::gs-auto_bucket-stats-by-median[]
+| SORT bucket
+;
+
+median_duration:double | bucket:date
+3107561.0              |2023-10-23T12:00:00.000Z
+1756467.0              |2023-10-23T13:00:00.000Z
+;

+ 27 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec

@@ -159,6 +159,33 @@ emp_no:integer | a:keyword            | b:keyword         | c:keyword
 10006          | [Principal, Senior]  | [Support, Team]   | [Engineer, Lead]
 ;
 
+docsGettingStartedDissect
+// tag::gs-dissect[]
+FROM sample_data
+| DISSECT message "Connected to %{server_ip}"
+// end::gs-dissect[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword | server_ip:keyword
+;
+
+docsGettingStartedDissectStatsBy
+// tag::gs-dissect-stats-by[]
+FROM sample_data
+| WHERE STARTS_WITH(message, "Connected to")
+| DISSECT message "Connected to %{server_ip}"
+| STATS COUNT(*) BY server_ip
+// end::gs-dissect-stats-by[]
+| SORT server_ip
+;
+
+COUNT(*):long  | server_ip:keyword
+1              |10.1.0.1       
+1              |10.1.0.2       
+1              |10.1.0.3      
+;
+
 emptyPattern#[skip:-8.11.99]
 ROW a="b c d"| DISSECT a "%{b} %{} %{d}";
 

+ 65 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec

@@ -650,4 +650,68 @@ FROM employees
 first_name:keyword  | last_name:keyword
 Alejandro      |McAlpine     
 // end::rlike-result[]
-;
+;
+
+docsGettingStartedFrom
+// tag::gs-from[]
+FROM sample_data
+// end::gs-from[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedFromLowercase
+// tag::gs-from-lowercase[]
+from sample_data
+// end::gs-from-lowercase[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedLimit
+// tag::gs-limit[]
+FROM sample_data
+| LIMIT 3
+// end::gs-limit[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedLimitOneLine
+// tag::gs-limit-one-line[]
+FROM sample_data | LIMIT 3
+// end::gs-limit-one-line[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedSort
+// tag::gs-sort[]
+FROM sample_data
+| SORT @timestamp DESC
+// end::gs-sort[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedChaining
+// tag::gs-chaining[]
+FROM sample_data
+| SORT @timestamp DESC
+| LIMIT 3
+// end::gs-chaining[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;

+ 7 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-policy-clientips.json

@@ -0,0 +1,7 @@
+{
+  "match": {
+    "indices": "clientips",
+    "match_field": "client_ip",
+    "enrich_fields": ["env"]
+  }
+}

+ 0 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/enricy-policy-languages.json → x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-policy-languages.json


+ 27 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec

@@ -3,3 +3,30 @@ from employees | eval x = 1, y = to_string(languages) | enrich languages_policy
 
 emp_no:integer | language_name:keyword
 ;
+
+docsGettingStartedEnrich
+// tag::gs-enrich[]
+FROM sample_data
+| KEEP @timestamp, client_ip, event_duration
+| EVAL client_ip = TO_STRING(client_ip)
+| ENRICH clientip_policy ON client_ip WITH env
+// end::gs-enrich[]
+| LIMIT 0
+;
+
+@timestamp:date | event_duration:long | client_ip:keyword | env:keyword
+;
+
+docsGettingStartedEnrichStatsBy
+// tag::gs-enrich-stats-by[]
+FROM sample_data
+| KEEP @timestamp, client_ip, event_duration
+| EVAL client_ip = TO_STRING(client_ip)
+| ENRICH clientip_policy ON client_ip WITH env
+| STATS median_duration = MEDIAN(event_duration) BY env
+// end::gs-enrich-stats-by[]
+| LIMIT 0
+;
+
+median_duration:double | env:keyword
+;

+ 22 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec

@@ -215,3 +215,25 @@ emp_no:integer  | foldable:keyword | folded_mv:keyword
 10001           | "foo,bar"        | [foo, bar]
 10002           | "foo,bar"        | [foo, bar]
 ;
+
+docsGettingStartedEval
+// tag::gs-eval[]
+FROM sample_data
+| EVAL duration_ms = event_duration / 1000000.0
+// end::gs-eval[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword |  duration_ms:double
+;
+
+docsGettingStartedRound
+// tag::gs-round[]
+FROM sample_data
+| EVAL duration_ms = ROUND(event_duration / 1000000.0, 1)
+// end::gs-round[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword |  duration_ms:double
+;

+ 10 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json

@@ -0,0 +1,10 @@
+{
+    "properties": {
+      "client_ip": {
+        "type": "keyword"
+      },
+      "env": {
+        "type": "keyword"
+      }
+    }
+  }

+ 16 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data.json

@@ -0,0 +1,16 @@
+{
+    "properties": {
+        "@timestamp": {
+            "type": "date"
+        },
+        "client_ip": {
+            "type": "ip"
+        },
+        "event_duration": {
+            "type": "long"
+        },
+        "message": {
+            "type": "keyword"
+        }
+    }
+}

+ 8 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data.csv

@@ -0,0 +1,8 @@
+@timestamp:date,client_ip:ip,event_duration:long,message:keyword
+2023-10-23T13:55:01.543Z,172.21.3.15,1756467,Connected to 10.1.0.1
+2023-10-23T13:53:55.832Z,172.21.3.15,5033755,Connection error
+2023-10-23T13:52:55.015Z,172.21.3.15,8268153,Connection error
+2023-10-23T13:51:54.732Z,172.21.3.15,725448,Connection error
+2023-10-23T13:33:34.937Z,172.21.0.5,1232382,Disconnected
+2023-10-23T12:27:28.948Z,172.21.2.113,2764889,Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z,172.21.2.162,3450233,Connected to 10.1.0.3

+ 32 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

@@ -750,3 +750,35 @@ c:long | a:long
 1      | 1
 ;
 
+docsGettingStartedStats
+// tag::gs-stats[]
+FROM sample_data
+| STATS median_duration = MEDIAN(event_duration)
+// end::gs-stats[]
+;
+
+median_duration:double
+2764889.0
+;
+
+docsGettingStartedTwoStats
+// tag::gs-two-stats[]
+FROM sample_data
+| STATS median_duration = MEDIAN(event_duration), max_duration = MAX(event_duration)
+// end::gs-two-stats[]
+;
+
+median_duration:double | max_duration:long
+2764889.0      |8268153   
+;
+
+docsGettingStartedStatsBy
+// tag::gs-stats-by[]
+FROM sample_data
+| STATS median_duration = MEDIAN(event_duration) BY client_ip
+// end::gs-stats-by[]
+| LIMIT 0
+;
+
+median_duration:double | client_ip:ip
+;

+ 11 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec

@@ -287,3 +287,14 @@ row x = "C:\\foo\\bar.exe" | mv_expand x | where x LIKE "C:\\\\\\\\*";
 
 x:keyword
 ;
+
+docsGettingStartedLike
+// tag::gs-like[]
+FROM sample_data
+| WHERE message LIKE "Connected*"
+// end::gs-like[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;

+ 11 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/where.csv-spec

@@ -76,3 +76,14 @@ emp_no:integer | first_name:keyword
 10010          |Duangkaew      
 10011          |Mary       
 ;
+
+docsGettingStartedWhere
+// tag::gs-where[]
+FROM sample_data
+| WHERE event_duration > 5000000
+// end::gs-where[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;