Jelajahi Sumber

ES|QL: better validation for GROK patterns (#110574)

Luigi Dell'Aquila 1 tahun lalu
induk
melakukan
bfc32a2acc

+ 6 - 0
docs/changelog/110574.yaml

@@ -0,0 +1,6 @@
+pr: 110574
+summary: "ES|QL: better validation for GROK patterns"
+area: ES|QL
+type: bug
+issues:
+ - 110533

+ 7 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

@@ -111,7 +111,13 @@ public class EsqlCapabilities {
         /**
          * Fix for union-types when aggregating over an inline conversion with casting operator. Done in #110476.
          */
-        UNION_TYPES_AGG_CAST;
+        UNION_TYPES_AGG_CAST,
+
+        /**
+         * Fix to GROK validation in case of multiple fields with same name and different types
+         * https://github.com/elastic/elasticsearch/issues/110533
+         */
+        GROK_VALIDATION;
 
         private final boolean snapshotOnly;
 

+ 19 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

@@ -146,12 +146,30 @@ public class LogicalPlanBuilder extends ExpressionBuilder {
     @Override
     public PlanFactory visitGrokCommand(EsqlBaseParser.GrokCommandContext ctx) {
         return p -> {
+            Source source = source(ctx);
             String pattern = visitString(ctx.string()).fold().toString();
-            Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), Grok.pattern(source(ctx), pattern));
+            Grok.Parser grokParser = Grok.pattern(source, pattern);
+            validateGrokPattern(source, grokParser, pattern);
+            Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), grokParser);
             return result;
         };
     }
 
+    private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern) {
+        Map<String, DataType> definedAttributes = new HashMap<>();
+        for (Attribute field : grokParser.extractedFields()) {
+            String name = field.name();
+            DataType type = field.dataType();
+            DataType prev = definedAttributes.put(name, type);
+            if (prev != null) {
+                throw new ParsingException(
+                    source,
+                    "Invalid GROK pattern [" + pattern + "]: the attribute [" + name + "] is defined multiple times with different types"
+                );
+            }
+        }
+    }
+
     @Override
     public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) {
         return p -> {

+ 1 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java

@@ -30,7 +30,7 @@ public class Grok extends RegexExtract {
 
     public record Parser(String pattern, org.elasticsearch.grok.Grok grok) {
 
-        private List<Attribute> extractedFields() {
+        public List<Attribute> extractedFields() {
             return grok.captureConfig()
                 .stream()
                 .sorted(Comparator.comparing(GrokCaptureConfig::name))

+ 15 - 3
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

@@ -758,15 +758,27 @@ public class StatementParserTests extends AbstractStatementParserTests {
     public void testGrokPattern() {
         LogicalPlan cmd = processingCommand("grok a \"%{WORD:foo}\"");
         assertEquals(Grok.class, cmd.getClass());
-        Grok dissect = (Grok) cmd;
-        assertEquals("%{WORD:foo}", dissect.parser().pattern());
-        assertEquals(List.of(referenceAttribute("foo", KEYWORD)), dissect.extractedFields());
+        Grok grok = (Grok) cmd;
+        assertEquals("%{WORD:foo}", grok.parser().pattern());
+        assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields());
 
         ParsingException pe = expectThrows(ParsingException.class, () -> statement("row a = \"foo bar\" | grok a \"%{_invalid_:x}\""));
         assertThat(
             pe.getMessage(),
             containsString("Invalid pattern [%{_invalid_:x}] for grok: Unable to find pattern [_invalid_] in Grok's pattern dictionary")
         );
+
+        cmd = processingCommand("grok a \"%{WORD:foo} %{WORD:foo}\"");
+        assertEquals(Grok.class, cmd.getClass());
+        grok = (Grok) cmd;
+        assertEquals("%{WORD:foo} %{WORD:foo}", grok.parser().pattern());
+        assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields());
+
+        expectError(
+            "row a = \"foo bar\" | GROK a \"%{NUMBER:foo} %{WORD:foo}\"",
+            "line 1:22: Invalid GROK pattern [%{NUMBER:foo} %{WORD:foo}]:"
+                + " the attribute [foo] is defined multiple times with different types"
+        );
     }
 
     public void testLikeRLike() {

+ 35 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/100_bug_fix.yml

@@ -303,3 +303,38 @@
   - match: { values.0.2: [1, 2] }
   - match: { values.0.3: [1, 2] }
   - match: { values.0.4: [1.1, 2.2] }
+
+
+---
+"grok with duplicate names and different types #110533":
+  - requires:
+      test_runner_features: [capabilities]
+      capabilities:
+        - method: POST
+          path: /_query
+          parameters: []
+          capabilities: [grok_validation]
+      reason: "fixed grok validation with patterns containing the same attribute multiple times with different types"
+  - do:
+      indices.create:
+        index: test_grok
+        body:
+          mappings:
+            properties:
+              first_name :
+                type : keyword
+              last_name:
+                type: keyword
+
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - { "index": { "_index": "test_grok" } }
+          - { "first_name": "Georgi", "last_name":"Facello" }
+
+  - do:
+      catch: '/Invalid GROK pattern \[%\{NUMBER:foo\} %\{WORD:foo\}\]: the attribute \[foo\] is defined multiple times with different types/'
+      esql.query:
+        body:
+          query: 'FROM test_grok | KEEP name | WHERE last_name == "Facello" | EVAL name = concat("1 ", last_name) | GROK name "%{NUMBER:foo} %{WORD:foo}"'