Bläddra i källkod

ES|QL: improve docs about escaping for GROK, DISSECT, LIKE, RLIKE (#115320) (#115493)

Luigi Dell'Aquila 11 månader sedan
förälder
incheckning
5290630bd0

+ 20 - 11
docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc

@@ -40,7 +40,7 @@ delimiter-based pattern, and extracts the specified keys as columns.
 For example, the following pattern:
 [source,txt]
 ----
-%{clientip} [%{@timestamp}] %{status} 
+%{clientip} [%{@timestamp}] %{status}
 ----
 
 matches a log line of this format:
@@ -76,8 +76,8 @@ ignore certain fields, append fields, skip over padding, etc.
 ===== Terminology
 
 dissect pattern::
-the set of fields and delimiters describing the textual 
-format. Also known as a dissection. 
+the set of fields and delimiters describing the textual
+format. Also known as a dissection.
 The dissection is described using a set of `%{}` sections:
 `%{a} - %{b} - %{c}`
 
@@ -91,14 +91,14 @@ Any set of characters other than `%{`, `'not }'`, or `}` is a delimiter.
 key::
 +
 --
-the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes 
-and the ordinal suffix. 
+the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes
+and the ordinal suffix.
 
 Examples:
 
-* `%{?aaa}` - the key is `aaa` 
-* `%{+bbb/3}` - the key is `bbb` 
-* `%{&ccc}` - the key is `ccc` 
+* `%{?aaa}` - the key is `aaa`
+* `%{+bbb/3}` - the key is `bbb`
+* `%{&ccc}` - the key is `ccc`
 --
 
 [[esql-dissect-examples]]
@@ -218,7 +218,7 @@ Putting it together as an {esql} query:
 
 [source.merge.styled,esql]
 ----
-include::{esql-specs}/docs.csv-spec[tag=grokWithEscape]
+include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes]
 ----
 
 `GROK` adds the following columns to the input table:
@@ -239,15 +239,24 @@ with a `\`. For example, in the earlier pattern:
 %{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status}
 ----
 
-In {esql} queries, the backslash character itself is a special character that
+In {esql} queries, when using single quotes for strings, the backslash character itself is a special character that
 needs to be escaped with another `\`. For this example, the corresponding {esql}
 query becomes:
 [source.merge.styled,esql]
 ----
 include::{esql-specs}/docs.csv-spec[tag=grokWithEscape]
 ----
+
+For this reason, in general it is more convenient to use triple quotes `"""` for GROK patterns,
+that do not require escaping for backslash.
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes]
+----
 ====
 
+
 [[esql-grok-patterns]]
 ===== Grok patterns
 
@@ -318,4 +327,4 @@ as the `GROK` command.
 The `GROK` command does not support configuring <<custom-patterns,custom
 patterns>>, or <<trace-match,multiple patterns>>. The `GROK` command is not
 subject to <<grok-watchdog,Grok watchdog settings>>.
-// end::grok-limitations[]
+// end::grok-limitations[]

+ 1 - 1
docs/reference/esql/functions/kibana/definition/like.json

@@ -42,7 +42,7 @@
     }
   ],
   "examples" : [
-    "FROM employees\n| WHERE first_name LIKE \"?b*\"\n| KEEP first_name, last_name"
+    "FROM employees\n| WHERE first_name LIKE \"\"\"?b*\"\"\"\n| KEEP first_name, last_name"
   ],
   "preview" : false,
   "snapshot_only" : false

+ 1 - 1
docs/reference/esql/functions/kibana/definition/rlike.json

@@ -42,7 +42,7 @@
     }
   ],
   "examples" : [
-    "FROM employees\n| WHERE first_name RLIKE \".leja.*\"\n| KEEP first_name, last_name"
+    "FROM employees\n| WHERE first_name RLIKE \"\"\".leja.*\"\"\"\n| KEEP first_name, last_name"
   ],
   "preview" : false,
   "snapshot_only" : false

+ 1 - 1
docs/reference/esql/functions/kibana/docs/like.md

@@ -15,6 +15,6 @@ The following wildcard characters are supported:
 
 ```
 FROM employees
-| WHERE first_name LIKE "?b*"
+| WHERE first_name LIKE """?b*"""
 | KEEP first_name, last_name
 ```

+ 1 - 1
docs/reference/esql/functions/kibana/docs/rlike.md

@@ -10,6 +10,6 @@ expression. The right-hand side of the operator represents the pattern.
 
 ```
 FROM employees
-| WHERE first_name RLIKE ".leja.*"
+| WHERE first_name RLIKE """.leja.*"""
 | KEEP first_name, last_name
 ```

+ 16 - 0
docs/reference/esql/functions/like.asciidoc

@@ -23,4 +23,20 @@ include::{esql-specs}/docs.csv-spec[tag=like]
 |===
 include::{esql-specs}/docs.csv-spec[tag=like-result]
 |===
+
+Matching the exact characters `*` and `.` will require escaping.
+The escape character is backslash `\`. Since also backslash is a special character in string literals,
+it will require further escaping.
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes]
+----
+
+To reduce the overhead of escaping, we suggest using triple quotes strings `"""`
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes]
+----
 // end::body[]

+ 16 - 0
docs/reference/esql/functions/rlike.asciidoc

@@ -18,4 +18,20 @@ include::{esql-specs}/docs.csv-spec[tag=rlike]
 |===
 include::{esql-specs}/docs.csv-spec[tag=rlike-result]
 |===
+
+Matching special characters (eg. `.`, `*`, `(`...) will require escaping.
+The escape character is backslash `\`. Since also backslash is a special character in string literals,
+it will require further escaping.
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes]
+----
+
+To reduce the overhead of escaping, we suggest using triple quotes strings `"""`
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes]
+----
 // end::body[]

+ 29 - 13
x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec

@@ -382,7 +382,7 @@ count:long | languages:integer
 basicGrok
 // tag::basicGrok[]
 ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" 
-| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" 
+| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}""" 
 | KEEP date, ip, email, num
 // end::basicGrok[]
 ;
@@ -396,7 +396,7 @@ date:keyword          | ip:keyword    | email:keyword       | num:keyword
 grokWithConversionSuffix
 // tag::grokWithConversionSuffix[]
 ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" 
-| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" 
+| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" 
 | KEEP date, ip, email, num
 // end::grokWithConversionSuffix[]
 ;
@@ -410,7 +410,7 @@ date:keyword              | ip:keyword    | email:keyword       | num:integer
 grokWithToDatetime
 // tag::grokWithToDatetime[]
 ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" 
-| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" 
+| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" 
 | KEEP date, ip, email, num
 | EVAL date = TO_DATETIME(date)
 // end::grokWithToDatetime[]
@@ -436,11 +436,27 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected"
 // end::grokWithEscape-result[]
 ;
 
+
+grokWithEscapeTripleQuotes
+// tag::grokWithEscapeTripleQuotes[]
+ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected"
+| GROK a """%{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status}"""
+// end::grokWithEscapeTripleQuotes[]
+| KEEP @timestamp
+;
+
+// tag::grokWithEscapeTripleQuotes-result[]
+@timestamp:keyword
+2023-01-23T12:15:00.000Z
+// end::grokWithEscapeTripleQuotes-result[]
+;
+
+
 grokWithDuplicateFieldNames
 // tag::grokWithDuplicateFieldNames[]
 FROM addresses
 | KEEP city.name, zip_code
-| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}"
+| GROK zip_code """%{WORD:zip_parts} %{WORD:zip_parts}"""
 // end::grokWithDuplicateFieldNames[]
 | SORT city.name
 ;
@@ -456,7 +472,7 @@ Tokyo             | 100-7014         | null
 basicDissect
 // tag::basicDissect[]
 ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" 
-| DISSECT a "%{date} - %{msg} - %{ip}"
+| DISSECT a """%{date} - %{msg} - %{ip}"""
 | KEEP date, msg, ip
 // end::basicDissect[]
 ;
@@ -470,7 +486,7 @@ date:keyword             | msg:keyword  | ip:keyword
 dissectWithToDatetime
 // tag::dissectWithToDatetime[]
 ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" 
-| DISSECT a "%{date} - %{msg} - %{ip}" 
+| DISSECT a """%{date} - %{msg} - %{ip}""" 
 | KEEP date, msg, ip
 | EVAL date = TO_DATETIME(date)
 // end::dissectWithToDatetime[]
@@ -485,7 +501,7 @@ some text    | 127.0.0.1  | 2023-01-23T12:15:00.000Z
 dissectRightPaddingModifier
 // tag::dissectRightPaddingModifier[]
 ROW message="1998-08-10T17:15:42          WARN"
-| DISSECT message "%{ts->} %{level}"
+| DISSECT message """%{ts->} %{level}"""
 // end::dissectRightPaddingModifier[]
 ;
 
@@ -498,7 +514,7 @@ message:keyword  | ts:keyword | level:keyword
 dissectEmptyRightPaddingModifier#[skip:-8.11.2, reason:Support for empty right padding modifiers introduced in 8.11.2]
 // tag::dissectEmptyRightPaddingModifier[]
 ROW message="[1998-08-10T17:15:42]          [WARN]"
-| DISSECT message "[%{ts}]%{->}[%{level}]"
+| DISSECT message """[%{ts}]%{->}[%{level}]"""
 // end::dissectEmptyRightPaddingModifier[]
 ;
 
@@ -511,7 +527,7 @@ ROW message="[1998-08-10T17:15:42]          [WARN]"
 dissectAppendModifier
 // tag::dissectAppendModifier[]
 ROW message="john jacob jingleheimer schmidt"
-| DISSECT message "%{+name} %{+name} %{+name} %{+name}" APPEND_SEPARATOR=" "
+| DISSECT message """%{+name} %{+name} %{+name} %{+name}""" APPEND_SEPARATOR=" "
 // end::dissectAppendModifier[]
 ;
 
@@ -524,7 +540,7 @@ john jacob jingleheimer schmidt|john jacob jingleheimer schmidt
 dissectAppendWithOrderModifier
 // tag::dissectAppendWithOrderModifier[]
 ROW message="john jacob jingleheimer schmidt"
-| DISSECT message "%{+name/2} %{+name/4} %{+name/3} %{+name/1}" APPEND_SEPARATOR=","
+| DISSECT message """%{+name/2} %{+name/4} %{+name/3} %{+name/1}""" APPEND_SEPARATOR=","
 // end::dissectAppendWithOrderModifier[]
 ;
 
@@ -537,7 +553,7 @@ john jacob jingleheimer schmidt|schmidt,john,jingleheimer,jacob
 dissectNamedSkipKey
 // tag::dissectNamedSkipKey[]
 ROW message="1.2.3.4 - - 30/Apr/1998:22:00:52 +0000"
-| DISSECT message "%{clientip} %{?ident} %{?auth} %{@timestamp}"
+| DISSECT message """%{clientip} %{?ident} %{?auth} %{@timestamp}"""
 // end::dissectNamedSkipKey[]
 ;
 
@@ -550,7 +566,7 @@ message:keyword  | clientip:keyword  | @timestamp:keyword
 docsLike
 // tag::like[]
 FROM employees
-| WHERE first_name LIKE "?b*"
+| WHERE first_name LIKE """?b*"""
 | KEEP first_name, last_name
 // end::like[]
 | SORT first_name
@@ -566,7 +582,7 @@ Eberhardt      |Terkki
 docsRlike
 // tag::rlike[]
 FROM employees
-| WHERE first_name RLIKE ".leja.*"
+| WHERE first_name RLIKE """.leja.*"""
 | KEEP first_name, last_name
 // end::rlike[]
 ;

+ 56 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

@@ -1800,3 +1800,59 @@ warning:Line 1:29: java.lang.IllegalArgumentException: single-value function enc
 x:keyword
 null
 ;
+
+
+likeEscapingSingleQuotes
+// tag::likeEscapingSingleQuotes[]
+ROW message = "foo * bar"
+| WHERE message LIKE "foo \\* bar"
+// end::likeEscapingSingleQuotes[]
+;
+
+// tag::likeEscapingSingleQuotes-result[]
+message:keyword
+foo * bar
+// end::likeEscapingSingleQuotes-result[]
+;
+
+
+likeEscapingTripleQuotes
+// tag::likeEscapingTripleQuotes[]
+ROW message = "foo * bar"
+| WHERE message LIKE """foo \* bar"""
+// end::likeEscapingTripleQuotes[]
+;
+
+// tag::likeEscapingTripleQuotes-result[]
+message:keyword
+foo * bar
+// end::likeEscapingTripleQuotes-result[]
+;
+
+
+rlikeEscapingSingleQuotes
+// tag::rlikeEscapingSingleQuotes[]
+ROW message = "foo ( bar"
+| WHERE message RLIKE "foo \\( bar"
+// end::rlikeEscapingSingleQuotes[]
+;
+
+// tag::rlikeEscapingSingleQuotes-result[]
+message:keyword
+foo ( bar
+// end::rlikeEscapingSingleQuotes-result[]
+;
+
+
+rlikeEscapingTripleQuotes
+// tag::rlikeEscapingTripleQuotes[]
+ROW message = "foo ( bar"
+| WHERE message RLIKE """foo \( bar"""
+// end::rlikeEscapingTripleQuotes[]
+;
+
+// tag::rlikeEscapingTripleQuotes-result[]
+message:keyword
+foo ( bar
+// end::rlikeEscapingTripleQuotes-result[]
+;

+ 17 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java

@@ -33,7 +33,23 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicat
         Use `RLIKE` to filter data based on string patterns using using
         <<regexp-syntax,regular expressions>>. `RLIKE` usually acts on a field placed on
         the left-hand side of the operator, but it can also act on a constant (literal)
-        expression. The right-hand side of the operator represents the pattern.""", examples = @Example(file = "docs", tag = "rlike"))
+        expression. The right-hand side of the operator represents the pattern.""", detailedDescription = """
+        Matching special characters (eg. `.`, `*`, `(`...) will require escaping.
+        The escape character is backslash `\\`. Since also backslash is a special character in string literals,
+        it will require further escaping.
+
+        [source.merge.styled,esql]
+        ----
+        include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes]
+        ----
+
+        To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"`
+
+        [source.merge.styled,esql]
+        ----
+        include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes]
+        ----
+        """, examples = @Example(file = "docs", tag = "rlike"))
     public RLike(
         Source source,
         @Param(name = "str", type = { "keyword", "text" }, description = "A literal value.") Expression value,

+ 17 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java

@@ -43,7 +43,23 @@ public class WildcardLike extends org.elasticsearch.xpack.esql.core.expression.p
         The following wildcard characters are supported:
 
         * `*` matches zero or more characters.
-        * `?` matches one character.""", examples = @Example(file = "docs", tag = "like"))
+        * `?` matches one character.""", detailedDescription = """
+        Matching the exact characters `*` and `.` will require escaping.
+        The escape character is backslash `\\`. Since also backslash is a special character in string literals,
+        it will require further escaping.
+
+        [source.merge.styled,esql]
+        ----
+        include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes]
+        ----
+
+        To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"`
+
+        [source.merge.styled,esql]
+        ----
+        include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes]
+        ----
+        """, examples = @Example(file = "docs", tag = "like"))
     public WildcardLike(
         Source source,
         @Param(name = "str", type = { "keyword", "text" }, description = "A literal expression.") Expression left,