فهرست منبع

New GrokPatternBank data structure (#95269)

This refactor introduces a new data structure called `PatternBank` which is an abstraction over the old `Map<String, String>` used all over the place. This data structure has handy methods to extend the pattern bank with new patterns and also centralize the validation of pattern banks into one place. Thanks to this, the repeated code to create Grok Pattern banks is 0.
---------

Co-authored-by: Joe Gallo <joe.gallo@elastic.co>
Pablo Alcantar Morales 2 سال پیش
والد
کامیت
b51951f679

+ 6 - 82
libs/grok/src/main/java/org/elasticsearch/grok/Grok.java

@@ -51,38 +51,34 @@ public final class Grok {
 
     private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit
 
-    private final Map<String, String> patternBank;
     private final boolean namedCaptures;
     private final Regex compiledExpression;
     private final MatcherWatchdog matcherWatchdog;
     private final List<GrokCaptureConfig> captureConfig;
 
-    public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
+    public Grok(PatternBank patternBank, String grokPattern, Consumer<String> logCallBack) {
         this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
     }
 
-    public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
+    public Grok(PatternBank patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
         this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
     }
 
-    Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
+    Grok(PatternBank patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
         this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
     }
 
     private Grok(
-        Map<String, String> patternBank,
+        PatternBank patternBank,
         String grokPattern,
         boolean namedCaptures,
         MatcherWatchdog matcherWatchdog,
         Consumer<String> logCallBack
     ) {
-        this.patternBank = patternBank;
         this.namedCaptures = namedCaptures;
         this.matcherWatchdog = matcherWatchdog;
 
-        forbidCircularReferences();
-
-        String expression = toRegex(grokPattern);
+        String expression = toRegex(patternBank, grokPattern);
         byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
         this.compiledExpression = new Regex(
             expressionBytes,
@@ -100,78 +96,6 @@ public final class Grok {
         this.captureConfig = List.copyOf(grokCaptureConfigs);
     }
 
-    /**
-     * Checks whether patterns reference each other in a circular manner and if so fail with an exception
-     *
-     * In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
-     * a reference to another named pattern. This method will navigate to all these named patterns and
-     * check for a circular reference.
-     */
-    private void forbidCircularReferences() {
-
-        // first ensure that the pattern bank contains no simple circular references (i.e., any pattern
-        // containing an immediate reference to itself) as those can cause the remainder of this algorithm
-        // to recurse infinitely
-        for (Map.Entry<String, String> entry : patternBank.entrySet()) {
-            if (patternReferencesItself(entry.getValue(), entry.getKey())) {
-                throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]");
-            }
-        }
-
-        // next, recursively check any other pattern names referenced in each pattern
-        for (Map.Entry<String, String> entry : patternBank.entrySet()) {
-            String name = entry.getKey();
-            String pattern = entry.getValue();
-            innerForbidCircularReferences(name, new ArrayList<>(), pattern);
-        }
-    }
-
-    private void innerForbidCircularReferences(String patternName, List<String> path, String pattern) {
-        if (patternReferencesItself(pattern, patternName)) {
-            String message;
-            if (path.isEmpty()) {
-                message = "circular reference in pattern [" + patternName + "][" + pattern + "]";
-            } else {
-                message = "circular reference in pattern ["
-                    + path.remove(path.size() - 1)
-                    + "]["
-                    + pattern
-                    + "] back to pattern ["
-                    + patternName
-                    + "]";
-                // add rest of the path:
-                if (path.isEmpty() == false) {
-                    message += " via patterns [" + String.join("=>", path) + "]";
-                }
-            }
-            throw new IllegalArgumentException(message);
-        }
-
-        // next check any other pattern names found in the pattern
-        for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
-            int begin = i + 2;
-            int bracketIndex = pattern.indexOf('}', begin);
-            int columnIndex = pattern.indexOf(':', begin);
-            int end;
-            if (bracketIndex != -1 && columnIndex == -1) {
-                end = bracketIndex;
-            } else if (columnIndex != -1 && bracketIndex == -1) {
-                end = columnIndex;
-            } else if (bracketIndex != -1 && columnIndex != -1) {
-                end = Math.min(bracketIndex, columnIndex);
-            } else {
-                throw new IllegalArgumentException("pattern [" + pattern + "] has circular references to other pattern definitions");
-            }
-            String otherPatternName = pattern.substring(begin, end);
-            path.add(otherPatternName);
-            innerForbidCircularReferences(patternName, path, patternBank.get(otherPatternName));
-        }
-    }
-
-    private static boolean patternReferencesItself(String pattern, String patternName) {
-        return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
-    }
-
     private String groupMatch(String name, Region region, String pattern) {
         int number = GROK_PATTERN_REGEX.nameToBackrefNumber(
             name.getBytes(StandardCharsets.UTF_8),
@@ -192,7 +116,7 @@ public final class Grok {
      *
      * @return named regex expression
      */
-    protected String toRegex(String grokPattern) {
+    protected String toRegex(PatternBank patternBank, String grokPattern) {
         StringBuilder res = new StringBuilder();
         for (int i = 0; i < MAX_TO_REGEX_ITERATIONS; i++) {
             byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);

+ 10 - 11
libs/grok/src/main/java/org/elasticsearch/grok/GrokBuiltinPatterns.java

@@ -13,7 +13,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
-import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -27,21 +26,21 @@ public class GrokBuiltinPatterns {
     /**
      * Patterns built in to the grok library.
      */
-    private static Map<String, String> LEGACY_PATTERNS;
-    private static Map<String, String> ECS_V1_PATTERNS;
+    private static PatternBank LEGACY_PATTERNS;
+    private static PatternBank ECS_V1_PATTERNS;
 
-    public static synchronized Map<String, String> legacyPatterns() {
+    public static synchronized PatternBank legacyPatterns() {
         return get(false);
     }
 
-    public static synchronized Map<String, String> ecsV1Patterns() {
+    public static synchronized PatternBank ecsV1Patterns() {
         return get(true);
     }
 
     /**
      * Load built-in patterns.
      */
-    public static synchronized Map<String, String> get(boolean ecsCompatibility) {
+    public static synchronized PatternBank get(boolean ecsCompatibility) {
         if (ecsCompatibility) {
             if (ECS_V1_PATTERNS == null) {
                 ECS_V1_PATTERNS = loadEcsPatterns();
@@ -55,7 +54,7 @@ public class GrokBuiltinPatterns {
         }
     }
 
-    public static Map<String, String> get(String ecsCompatibility) {
+    public static PatternBank get(String ecsCompatibility) {
         if (isValidEcsCompatibilityMode(ecsCompatibility)) {
             return get(ECS_COMPATIBILITY_V1.equals(ecsCompatibility));
         } else {
@@ -67,7 +66,7 @@ public class GrokBuiltinPatterns {
         return ECS_COMPATIBILITY_MODES.contains(ecsCompatibility);
     }
 
-    private static Map<String, String> loadLegacyPatterns() {
+    private static PatternBank loadLegacyPatterns() {
         var patternNames = List.of(
             "aws",
             "bacula",
@@ -94,7 +93,7 @@ public class GrokBuiltinPatterns {
         return loadPatternsFromDirectory(patternNames, "/patterns/legacy/");
     }
 
-    private static Map<String, String> loadEcsPatterns() {
+    private static PatternBank loadEcsPatterns() {
         var patternNames = List.of(
             "aws",
             "bacula",
@@ -122,7 +121,7 @@ public class GrokBuiltinPatterns {
         return loadPatternsFromDirectory(patternNames, "/patterns/ecs-v1/");
     }
 
-    private static Map<String, String> loadPatternsFromDirectory(List<String> patternNames, String directory) {
+    private static PatternBank loadPatternsFromDirectory(List<String> patternNames, String directory) {
         Map<String, String> builtinPatterns = new LinkedHashMap<>();
         for (String pattern : patternNames) {
             try {
@@ -133,7 +132,7 @@ public class GrokBuiltinPatterns {
                 throw new RuntimeException("failed to load built-in patterns", e);
             }
         }
-        return Collections.unmodifiableMap(builtinPatterns);
+        return new PatternBank(builtinPatterns);
     }
 
     private static void loadPatternsFromFile(Map<String, String> patternBank, InputStream inputStream) throws IOException {

+ 136 - 0
libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java

@@ -0,0 +1,136 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.grok;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public class PatternBank {
+
+    public static PatternBank EMPTY = new PatternBank(Map.of());
+
+    private final Map<String, String> bank;
+
+    public PatternBank(Map<String, String> bank) {
+        Objects.requireNonNull(bank, "bank must not be null");
+        forbidCircularReferences(bank);
+
+        // the bank reference should be unmodifiable, based on a defensive copy of the passed-in bank, and
+        // maintain the iteration order of the passed-in bank (assuming there was a meaningful order)
+        this.bank = Collections.unmodifiableMap(new LinkedHashMap<>(bank));
+    }
+
+    public String get(String patternName) {
+        return bank.get(patternName);
+    }
+
+    public Map<String, String> bank() {
+        return bank;
+    }
+
+    /**
+     * Extends a pattern bank with extra patterns, returning a new pattern bank.
+     * <p>
+     * The returned bank will be the same reference as the original pattern bank if the extra patterns map is null or empty.
+     *
+     * @param extraPatterns the patterns to extend this bank with (may be empty or null)
+     * @return the extended pattern bank
+     */
+    public PatternBank extendWith(Map<String, String> extraPatterns) {
+        if (extraPatterns == null || extraPatterns.isEmpty()) {
+            return this;
+        }
+
+        var extendedBank = new LinkedHashMap<>(bank);
+        extendedBank.putAll(extraPatterns);
+        return new PatternBank(extendedBank);
+    }
+
+    /**
+     * Checks whether patterns reference each other in a circular manner and if so fail with an exception.
+     * <p>
+     * In a pattern, anything between <code>%{</code> and <code>}</code> or <code>:</code> is considered
+     * a reference to another named pattern. This method will navigate to all these named patterns and
+     * check for a circular reference.
+     */
+    static void forbidCircularReferences(Map<String, String> bank) {
+        // first ensure that the pattern bank contains no simple circular references (i.e., any pattern
+        // containing an immediate reference to itself) as those can cause the remainder of this algorithm
+        // to recurse infinitely
+        for (Map.Entry<String, String> entry : bank.entrySet()) {
+            if (patternReferencesItself(entry.getValue(), entry.getKey())) {
+                throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]");
+            }
+        }
+
+        // next, recursively check any other pattern names referenced in each pattern
+        for (Map.Entry<String, String> entry : bank.entrySet()) {
+            String name = entry.getKey();
+            String pattern = entry.getValue();
+            innerForbidCircularReferences(bank, name, new ArrayList<>(), pattern);
+        }
+    }
+
+    private static void innerForbidCircularReferences(Map<String, String> bank, String patternName, List<String> path, String pattern) {
+        if (patternReferencesItself(pattern, patternName)) {
+            String message;
+            if (path.isEmpty()) {
+                message = "circular reference in pattern [" + patternName + "][" + pattern + "]";
+            } else {
+                message = "circular reference in pattern ["
+                    + path.remove(path.size() - 1)
+                    + "]["
+                    + pattern
+                    + "] back to pattern ["
+                    + patternName
+                    + "]";
+                // add rest of the path:
+                if (path.isEmpty() == false) {
+                    message += " via patterns [" + String.join("=>", path) + "]";
+                }
+            }
+            throw new IllegalArgumentException(message);
+        }
+
+        // next check any other pattern names found in the pattern
+        for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
+            int begin = i + 2;
+            int bracketIndex = pattern.indexOf('}', begin);
+            int columnIndex = pattern.indexOf(':', begin);
+            int end;
+            if (bracketIndex != -1 && columnIndex == -1) {
+                end = bracketIndex;
+            } else if (columnIndex != -1 && bracketIndex == -1) {
+                end = columnIndex;
+            } else if (bracketIndex != -1 && columnIndex != -1) {
+                end = Math.min(bracketIndex, columnIndex);
+            } else {
+                throw new IllegalArgumentException("pattern [" + pattern + "] has an invalid syntax");
+            }
+            String otherPatternName = pattern.substring(begin, end);
+            path.add(otherPatternName);
+            String otherPattern = bank.get(otherPatternName);
+            if (otherPattern == null) {
+                throw new IllegalArgumentException(
+                    "pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]"
+                );
+            }
+
+            innerForbidCircularReferences(bank, patternName, path, otherPattern);
+        }
+    }
+
+    private static boolean patternReferencesItself(String pattern, String patternName) {
+        return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
+    }
+}

+ 10 - 86
libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java

@@ -15,7 +15,6 @@ import org.elasticsearch.test.ESTestCase;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -91,7 +90,7 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testNoMatchingPatternInDictionary() {
-        Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn));
+        Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(PatternBank.EMPTY, "%{NOTFOUND}", logger::warn));
         assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary"));
     }
 
@@ -409,18 +408,14 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testNoNamedCaptures() {
-        Map<String, String> bank = new HashMap<>();
-
-        bank.put("NAME", "Tal");
-        bank.put("EXCITED_NAME", "!!!%{NAME:name}!!!");
-        bank.put("TEST", "hello world");
+        var bank = new PatternBank(Map.of("NAME", "Tal", "EXCITED_NAME", "!!!%{NAME:name}!!!", "TEST", "hello world"));
 
         String text = "wowza !!!Tal!!! - Tal";
         String pattern = "%{EXCITED_NAME} - %{NAME}";
         Grok g = new Grok(bank, pattern, false, logger::warn);
         assertCaptureConfig(g, Map.of("EXCITED_NAME_0", STRING, "NAME_21", STRING, "NAME_22", STRING));
 
-        assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
+        assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(bank, pattern));
         assertEquals(true, g.match(text));
 
         Object actual = g.captures(text);
@@ -431,77 +426,6 @@ public class GrokTests extends ESTestCase {
         assertEquals(expected, actual);
     }
 
-    public void testCircularReference() {
-        Exception e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new HashMap<>();
-            bank.put("NAME", "!!!%{NAME}!!!");
-            String pattern = "%{NAME}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
-
-        e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new HashMap<>();
-            bank.put("NAME", "!!!%{NAME:name}!!!");
-            String pattern = "%{NAME}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());
-
-        e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new HashMap<>();
-            bank.put("NAME", "!!!%{NAME:name:int}!!!");
-            String pattern = "%{NAME}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());
-
-        e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new TreeMap<>();
-            bank.put("NAME1", "!!!%{NAME2}!!!");
-            bank.put("NAME2", "!!!%{NAME1}!!!");
-            String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());
-
-        e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new TreeMap<>();
-            bank.put("NAME1", "!!!%{NAME2}!!!");
-            bank.put("NAME2", "!!!%{NAME3}!!!");
-            bank.put("NAME3", "!!!%{NAME1}!!!");
-            String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", e.getMessage());
-
-        e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new TreeMap<>();
-            bank.put("NAME1", "!!!%{NAME2}!!!");
-            bank.put("NAME2", "!!!%{NAME3}!!!");
-            bank.put("NAME3", "!!!%{NAME4}!!!");
-            bank.put("NAME4", "!!!%{NAME5}!!!");
-            bank.put("NAME5", "!!!%{NAME1}!!!");
-            String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals(
-            "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2=>NAME3=>NAME4]",
-            e.getMessage()
-        );
-    }
-
-    public void testCircularSelfReference() {
-        Exception e = expectThrows(IllegalArgumentException.class, () -> {
-            Map<String, String> bank = new HashMap<>();
-            bank.put("ANOTHER", "%{INT}");
-            bank.put("INT", "%{INT}");
-            String pattern = "does_not_matter";
-            new Grok(bank, pattern, false, logger::warn);
-        });
-        assertEquals("circular reference in pattern [INT][%{INT}]", e.getMessage());
-    }
-
     public void testBooleanCaptures() {
         testBooleanCaptures(false);
         testBooleanCaptures(true);
@@ -537,7 +461,7 @@ public class GrokTests extends ESTestCase {
         bank.put("NUMBER", "(?:%{BASE10NUM})");
 
         String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
-        Grok g = new Grok(bank, pattern, logger::warn);
+        Grok g = new Grok(new PatternBank(bank), pattern, logger::warn);
         assertCaptureConfig(g, Map.of("bytes", FLOAT, "id", LONG, "rating", DOUBLE));
 
         String text = "12009.34 20000000000 4820.092";
@@ -585,7 +509,7 @@ public class GrokTests extends ESTestCase {
         bank.put("NUMBER", "(?:%{BASE10NUM})");
 
         String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
-        Grok g = new Grok(bank, pattern, logger::warn);
+        Grok g = new Grok(new PatternBank(bank), pattern, logger::warn);
         assertCaptureConfig(g, Map.of("bytes", FLOAT, "status", STRING));
 
         String text = "12009.34 200 9032";
@@ -603,7 +527,7 @@ public class GrokTests extends ESTestCase {
         bank.put("NUMBER", "(?:%{BASE10NUM})");
 
         String pattern = "%{NUMBER:f:not_a_valid_type}";
-        Grok g = new Grok(bank, pattern, logger::warn);
+        Grok g = new Grok(new PatternBank(bank), pattern, logger::warn);
         assertCaptureConfig(g, Map.of("f", STRING));
         assertThat(g.captures("12009.34"), equalTo(Map.of("f", "12009.34")));
     }
@@ -759,7 +683,7 @@ public class GrokTests extends ESTestCase {
             %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] "%{WORD:verb} %{DATA:request} \
             HTTP/%{NUMBER:httpversion}" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}""";
 
-        Grok grok = new Grok(bank, pattern, logger::warn);
+        Grok grok = new Grok(new PatternBank(bank), pattern, logger::warn);
         assertCaptureConfig(
             grok,
             Map.ofEntries(
@@ -802,19 +726,19 @@ public class GrokTests extends ESTestCase {
     public void testNoMatch() {
         Map<String, String> bank = new HashMap<>();
         bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
-        Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn);
+        Grok grok = new Grok(new PatternBank(bank), "%{MONTHDAY:greatday}", logger::warn);
         assertThat(grok.captures("nomatch"), nullValue());
     }
 
     public void testMultipleNamedCapturesWithSameName() {
         Map<String, String> bank = new HashMap<>();
         bank.put("SINGLEDIGIT", "[0-9]");
-        Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
+        Grok grok = new Grok(new PatternBank(bank), "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
         assertCaptureConfig(grok, Map.of("num", STRING));
 
         assertThat(grok.captures("12"), equalTo(Map.of("num", List.of("1", "2"))));
 
-        grok = new Grok(bank, "%{SINGLEDIGIT:num:int}(%{SINGLEDIGIT:num:int})?", logger::warn);
+        grok = new Grok(new PatternBank(bank), "%{SINGLEDIGIT:num:int}(%{SINGLEDIGIT:num:int})?", logger::warn);
         assertCaptureConfig(grok, Map.of("num", INTEGER));
         assertEquals(grok.captures("1"), Map.of("num", 1));
         assertEquals(grok.captures("1a"), Map.of("num", 1));

+ 115 - 0
libs/grok/src/test/java/org/elasticsearch/grok/PatternBankTests.java

@@ -0,0 +1,115 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.grok;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class PatternBankTests extends ESTestCase {
+
+    public void testInternalBankIsUnmodifiableAndACopy() {
+        Map<String, String> bank = new HashMap<>();
+        bank.put("ONE", "1");
+        var patternBank = new PatternBank(bank);
+        assertNotSame(patternBank.bank(), bank);
+        assertEquals(patternBank.bank(), bank);
+        expectThrows(UnsupportedOperationException.class, () -> { patternBank.bank().put("some", "thing"); });
+    }
+
+    public void testBankCannotBeNull() {
+        var e = expectThrows(NullPointerException.class, () -> new PatternBank(null));
+        assertEquals("bank must not be null", e.getMessage());
+    }
+
+    public void testConstructorValidatesCircularReferences() {
+        var e = expectThrows(IllegalArgumentException.class, () -> new PatternBank(Map.of("NAME", "!!!%{NAME}!!!")));
+        assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
+    }
+
+    public void testExtendWith() {
+        var baseBank = new PatternBank(Map.of("ONE", "1", "TWO", "2"));
+
+        assertSame(baseBank.extendWith(null), baseBank);
+        assertSame(baseBank.extendWith(Map.of()), baseBank);
+
+        var extended = baseBank.extendWith(Map.of("THREE", "3", "FOUR", "4"));
+        assertNotSame(extended, baseBank);
+        assertEquals(extended.bank(), Map.of("ONE", "1", "TWO", "2", "THREE", "3", "FOUR", "4"));
+    }
+
+    public void testCircularReference() {
+        var e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME}!!!")));
+        assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
+
+        e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME:name}!!!")));
+        assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());
+
+        e = expectThrows(
+            IllegalArgumentException.class,
+            () -> { PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME:name:int}!!!")); }
+        );
+        assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());
+
+        e = expectThrows(IllegalArgumentException.class, () -> {
+            Map<String, String> bank = new TreeMap<>();
+            bank.put("NAME1", "!!!%{NAME2}!!!");
+            bank.put("NAME2", "!!!%{NAME1}!!!");
+            PatternBank.forbidCircularReferences(bank);
+        });
+        assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());
+
+        e = expectThrows(IllegalArgumentException.class, () -> {
+            Map<String, String> bank = new TreeMap<>();
+            bank.put("NAME1", "!!!%{NAME2}!!!");
+            bank.put("NAME2", "!!!%{NAME3}!!!");
+            bank.put("NAME3", "!!!%{NAME1}!!!");
+            PatternBank.forbidCircularReferences(bank);
+        });
+        assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", e.getMessage());
+
+        e = expectThrows(IllegalArgumentException.class, () -> {
+            Map<String, String> bank = new TreeMap<>();
+            bank.put("NAME1", "!!!%{NAME2}!!!");
+            bank.put("NAME2", "!!!%{NAME3}!!!");
+            bank.put("NAME3", "!!!%{NAME4}!!!");
+            bank.put("NAME4", "!!!%{NAME5}!!!");
+            bank.put("NAME5", "!!!%{NAME1}!!!");
+            PatternBank.forbidCircularReferences(bank);
+        });
+        assertEquals(
+            "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2=>NAME3=>NAME4]",
+            e.getMessage()
+        );
+    }
+
+    public void testCircularSelfReference() {
+        var e = expectThrows(
+            IllegalArgumentException.class,
+            () -> PatternBank.forbidCircularReferences(Map.of("ANOTHER", "%{INT}", "INT", "%{INT}"))
+        );
+        assertEquals("circular reference in pattern [INT][%{INT}]", e.getMessage());
+    }
+
+    public void testInvalidPatternReferences() {
+        var e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "%{NON_EXISTENT}")));
+        assertEquals("pattern [NAME] is referencing a non-existent pattern [NON_EXISTENT]", e.getMessage());
+
+        e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "%{NON_EXISTENT:id}")));
+        assertEquals("pattern [NAME] is referencing a non-existent pattern [NON_EXISTENT]", e.getMessage());
+
+        e = expectThrows(
+            IllegalArgumentException.class,
+            () -> PatternBank.forbidCircularReferences(Map.of("VALID", "valid", "NAME", "%{VALID"))
+        );
+        assertEquals("pattern [%{VALID] has an invalid syntax", e.getMessage());
+    }
+}

+ 3 - 6
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java

@@ -13,6 +13,7 @@ import org.apache.logging.log4j.Logger;
 import org.elasticsearch.grok.Grok;
 import org.elasticsearch.grok.GrokBuiltinPatterns;
 import org.elasticsearch.grok.MatcherWatchdog;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.ConfigurationUtils;
 import org.elasticsearch.ingest.IngestDocument;
@@ -42,7 +43,7 @@ public final class GrokProcessor extends AbstractProcessor {
     GrokProcessor(
         String tag,
         String description,
-        Map<String, String> patternBank,
+        PatternBank patternBank,
         List<String> matchPatterns,
         String matchField,
         boolean traceMatch,
@@ -169,16 +170,12 @@ public final class GrokProcessor extends AbstractProcessor {
                 throw newConfigurationException(TYPE, processorTag, "patterns", "List of patterns must not be empty");
             }
             Map<String, String> customPatternBank = ConfigurationUtils.readOptionalMap(TYPE, processorTag, config, "pattern_definitions");
-            Map<String, String> patternBank = new HashMap<>(GrokBuiltinPatterns.get(ecsCompatibility));
-            if (customPatternBank != null) {
-                patternBank.putAll(customPatternBank);
-            }
 
             try {
                 return new GrokProcessor(
                     processorTag,
                     description,
-                    patternBank,
+                    GrokBuiltinPatterns.get(ecsCompatibility).extendWith(customPatternBank),
                     matchPatterns,
                     matchField,
                     traceMatch,

+ 5 - 4
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessorGetAction.java

@@ -20,6 +20,7 @@ import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.grok.GrokBuiltinPatterns;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.Scope;
@@ -135,13 +136,13 @@ public class GrokProcessorGetAction extends ActionType<GrokProcessorGetAction.Re
         TransportAction(
             TransportService transportService,
             ActionFilters actionFilters,
-            Map<String, String> legacyGrokPatterns,
-            Map<String, String> ecsV1GrokPatterns
+            PatternBank legacyGrokPatterns,
+            PatternBank ecsV1GrokPatterns
         ) {
             super(NAME, transportService, actionFilters, Request::new);
-            this.legacyGrokPatterns = legacyGrokPatterns;
+            this.legacyGrokPatterns = legacyGrokPatterns.bank();
             this.sortedLegacyGrokPatterns = new TreeMap<>(this.legacyGrokPatterns);
-            this.ecsV1GrokPatterns = ecsV1GrokPatterns;
+            this.ecsV1GrokPatterns = ecsV1GrokPatterns.bank();
             this.sortedEcsV1GrokPatterns = new TreeMap<>(this.ecsV1GrokPatterns);
         }
 

+ 3 - 7
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/RedactProcessor.java

@@ -15,6 +15,7 @@ import org.elasticsearch.grok.Grok;
 import org.elasticsearch.grok.GrokBuiltinPatterns;
 import org.elasticsearch.grok.GrokCaptureExtracter;
 import org.elasticsearch.grok.MatcherWatchdog;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.ConfigurationUtils;
 import org.elasticsearch.ingest.IngestDocument;
@@ -26,7 +27,6 @@ import org.joni.Region;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -55,7 +55,7 @@ public class RedactProcessor extends AbstractProcessor {
     RedactProcessor(
         String tag,
         String description,
-        Map<String, String> patternBank,
+        PatternBank patternBank,
         List<String> matchPatterns,
         String redactField,
         boolean ignoreMissing,
@@ -351,16 +351,12 @@ public class RedactProcessor extends AbstractProcessor {
                 throw newConfigurationException(TYPE, processorTag, "patterns", "List of patterns must not be empty");
             }
             Map<String, String> customPatternBank = ConfigurationUtils.readOptionalMap(TYPE, processorTag, config, "pattern_definitions");
-            Map<String, String> patternBank = new HashMap<>(GrokBuiltinPatterns.ecsV1Patterns());
-            if (customPatternBank != null) {
-                patternBank.putAll(customPatternBank);
-            }
 
             try {
                 return new RedactProcessor(
                     processorTag,
                     description,
-                    patternBank,
+                    GrokBuiltinPatterns.ecsV1Patterns().extendWith(customPatternBank),
                     matchPatterns,
                     matchField,
                     ignoreMissing,

+ 8 - 7
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorGetActionTests.java

@@ -14,6 +14,7 @@ import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.transport.TransportService;
 import org.elasticsearch.xcontent.ToXContent;
@@ -34,8 +35,8 @@ import static org.hamcrest.Matchers.sameInstance;
 import static org.mockito.Mockito.mock;
 
 public class GrokProcessorGetActionTests extends ESTestCase {
-    private static final Map<String, String> LEGACY_TEST_PATTERNS = Map.of("PATTERN2", "foo2", "PATTERN1", "foo1");
-    private static final Map<String, String> ECS_TEST_PATTERNS = Map.of("ECS_PATTERN2", "foo2", "ECS_PATTERN1", "foo1");
+    private static final PatternBank LEGACY_TEST_PATTERNS = new PatternBank(Map.of("PATTERN2", "foo2", "PATTERN1", "foo1"));
+    private static final PatternBank ECS_TEST_PATTERNS = new PatternBank(Map.of("ECS_PATTERN2", "foo2", "ECS_PATTERN1", "foo1"));
 
     public void testRequest() throws Exception {
         GrokProcessorGetAction.Request request = new GrokProcessorGetAction.Request(false, GrokProcessor.DEFAULT_ECS_COMPATIBILITY_MODE);
@@ -47,17 +48,17 @@ public class GrokProcessorGetActionTests extends ESTestCase {
     }
 
     public void testResponseSerialization() throws Exception {
-        GrokProcessorGetAction.Response response = new GrokProcessorGetAction.Response(LEGACY_TEST_PATTERNS);
+        GrokProcessorGetAction.Response response = new GrokProcessorGetAction.Response(LEGACY_TEST_PATTERNS.bank());
         BytesStreamOutput out = new BytesStreamOutput();
         response.writeTo(out);
         StreamInput streamInput = out.bytes().streamInput();
         GrokProcessorGetAction.Response otherResponse = new GrokProcessorGetAction.Response(streamInput);
-        assertThat(response.getGrokPatterns(), equalTo(LEGACY_TEST_PATTERNS));
+        assertThat(response.getGrokPatterns(), equalTo(LEGACY_TEST_PATTERNS.bank()));
         assertThat(response.getGrokPatterns(), equalTo(otherResponse.getGrokPatterns()));
     }
 
     public void testResponseSorting() {
-        List<String> sortedKeys = new ArrayList<>(LEGACY_TEST_PATTERNS.keySet());
+        List<String> sortedKeys = new ArrayList<>(LEGACY_TEST_PATTERNS.bank().keySet());
         Collections.sort(sortedKeys);
         GrokProcessorGetAction.TransportAction transportAction = new GrokProcessorGetAction.TransportAction(
             mock(TransportService.class),
@@ -106,7 +107,7 @@ public class GrokProcessorGetActionTests extends ESTestCase {
     }
 
     public void testEcsCompatibilityMode() {
-        List<String> sortedKeys = new ArrayList<>(ECS_TEST_PATTERNS.keySet());
+        List<String> sortedKeys = new ArrayList<>(ECS_TEST_PATTERNS.bank().keySet());
         Collections.sort(sortedKeys);
         GrokProcessorGetAction.TransportAction transportAction = new GrokProcessorGetAction.TransportAction(
             mock(TransportService.class),
@@ -132,7 +133,7 @@ public class GrokProcessorGetActionTests extends ESTestCase {
 
     @SuppressWarnings("unchecked")
     public void testResponseToXContent() throws Exception {
-        GrokProcessorGetAction.Response response = new GrokProcessorGetAction.Response(LEGACY_TEST_PATTERNS);
+        GrokProcessorGetAction.Response response = new GrokProcessorGetAction.Response(LEGACY_TEST_PATTERNS.bank());
         try (XContentBuilder builder = JsonXContent.contentBuilder()) {
             response.toXContent(builder, ToXContent.EMPTY_PARAMS);
             Map<String, Object> converted = XContentHelper.convertToMap(BytesReference.bytes(builder), false, builder.contentType()).v2();

+ 18 - 17
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java

@@ -9,6 +9,7 @@
 package org.elasticsearch.ingest.common;
 
 import org.elasticsearch.grok.MatcherWatchdog;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.ingest.TestIngestDocument;
@@ -30,7 +31,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -48,7 +49,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of(),
+            PatternBank.EMPTY,
             List.of("(?<a>(?i)A)"),
             fieldName,
             false,
@@ -66,7 +67,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -86,7 +87,7 @@ public class GrokProcessorTests extends ESTestCase {
             () -> new GrokProcessor(
                 randomAlphaOfLength(10),
                 null,
-                Map.of("ONE", "1"),
+                new PatternBank(Map.of("ONE", "1")),
                 List.of("%{NOTONE:not_one}"),
                 fieldName,
                 false,
@@ -105,7 +106,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of(),
+            PatternBank.EMPTY,
             List.of(fieldName),
             fieldName,
             false,
@@ -123,7 +124,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -142,7 +143,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -160,7 +161,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -178,7 +179,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -195,7 +196,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -213,7 +214,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            Map.of("ONE", "1"),
+            new PatternBank(Map.of("ONE", "1")),
             List.of("%{ONE:one}"),
             fieldName,
             false,
@@ -235,7 +236,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONE:one}", "%{TWO:two}", "%{THREE:three}"),
             fieldName,
             false,
@@ -259,7 +260,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONE:one}", "%{TWO:two}", "%{THREE:three}"),
             fieldName,
             true,
@@ -282,7 +283,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONE:one}"),
             fieldName,
             true,
@@ -321,7 +322,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONE:first}-%{TWO:second}", "%{ONE:first}-%{THREE:second}"),
             fieldName,
             randomBoolean(),
@@ -342,7 +343,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONETWO:first}%{ONETWO:first}"),
             fieldName,
             randomBoolean(),
@@ -363,7 +364,7 @@ public class GrokProcessorTests extends ESTestCase {
         GrokProcessor processor = new GrokProcessor(
             randomAlphaOfLength(10),
             null,
-            patternBank,
+            new PatternBank(patternBank),
             List.of("%{ONETWO:first}|%{THREE:second}"),
             fieldName,
             randomBoolean(),

+ 14 - 35
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/GrokPatternCreator.java

@@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.grok.Grok;
 import org.elasticsearch.grok.GrokBuiltinPatterns;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.FieldStats;
 
 import java.util.ArrayList;
@@ -203,7 +204,7 @@ public final class GrokPatternCreator {
      */
     private final Map<String, Object> mappings;
     private final Map<String, FieldStats> fieldStats;
-    private final Map<String, String> grokPatternDefinitions;
+    private final PatternBank grokPatternBank;
     private final Map<String, Integer> fieldNameCountStore = new HashMap<>();
     private final StringBuilder overallGrokPatternBuilder = new StringBuilder();
     private final TimeoutChecker timeoutChecker;
@@ -233,12 +234,7 @@ public final class GrokPatternCreator {
         this.sampleMessages = Collections.unmodifiableCollection(sampleMessages);
         this.mappings = mappings;
         this.fieldStats = fieldStats;
-        if (customGrokPatternDefinitions.isEmpty()) {
-            grokPatternDefinitions = GrokBuiltinPatterns.get(ecsCompatibility);
-        } else {
-            grokPatternDefinitions = new HashMap<>(GrokBuiltinPatterns.get(ecsCompatibility));
-            grokPatternDefinitions.putAll(customGrokPatternDefinitions);
-        }
+        this.grokPatternBank = GrokBuiltinPatterns.get(ecsCompatibility).extendWith(customGrokPatternDefinitions);
         this.timeoutChecker = Objects.requireNonNull(timeoutChecker);
         this.ecsCompatibility = ecsCompatibility;
     }
@@ -274,7 +270,7 @@ public final class GrokPatternCreator {
         FullMatchGrokPatternCandidate candidate = FullMatchGrokPatternCandidate.fromGrokPattern(
             grokPattern,
             timestampField,
-            grokPatternDefinitions
+            grokPatternBank
         );
         if (candidate.matchesAll(sampleMessages, timeoutChecker)) {
             candidate.processMatch(explanation, sampleMessages, mappings, fieldStats, timeoutChecker, ecsCompatibility);
@@ -301,7 +297,7 @@ public final class GrokPatternCreator {
                 seedPatternName,
                 seedMapping,
                 seedFieldName,
-                grokPatternDefinitions
+                grokPatternBank
             );
 
             processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0);
@@ -651,12 +647,7 @@ public final class GrokPatternCreator {
          * @param fieldName              Name of the field to extract from the match.
          * @param grokPatternDefinitions Definitions of Grok patterns to be used.
          */
-        ValueOnlyGrokPatternCandidate(
-            String grokPatternName,
-            String mappingType,
-            String fieldName,
-            Map<String, String> grokPatternDefinitions
-        ) {
+        ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName, PatternBank grokPatternDefinitions) {
             this(
                 grokPatternName,
                 Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, mappingType),
@@ -707,7 +698,7 @@ public final class GrokPatternCreator {
             String fieldName,
             String preBreak,
             String postBreak,
-            Map<String, String> grokPatternDefinitions
+            PatternBank grokPatternDefinitions
         ) {
             this.grokPatternName = Objects.requireNonNull(grokPatternName);
             this.mapping = Collections.unmodifiableMap(mapping);
@@ -886,7 +877,7 @@ public final class GrokPatternCreator {
             String grokPatternName,
             Map<String, String> mapping,
             String fieldName,
-            Map<String, String> grokPatternDefinitions
+            PatternBank grokPatternDefinitions
         ) {
             super(grokPatternName, mapping, fieldName, "\\b", "\\b", grokPatternDefinitions);
         }
@@ -927,25 +918,17 @@ public final class GrokPatternCreator {
         private final Grok grok;
 
         static FullMatchGrokPatternCandidate fromGrokPatternNameLegacy(String grokPatternName, String timeField) {
-            return new FullMatchGrokPatternCandidate(
-                "%{" + grokPatternName + "}",
-                timeField,
-                GrokBuiltinPatterns.get(ECS_COMPATIBILITY_DISABLED)
-            );
+            return new FullMatchGrokPatternCandidate("%{" + grokPatternName + "}", timeField, GrokBuiltinPatterns.legacyPatterns());
         }
 
         static FullMatchGrokPatternCandidate fromGrokPatternNameEcs(String grokPatternName, String timeField) {
-            return new FullMatchGrokPatternCandidate(
-                "%{" + grokPatternName + "}",
-                timeField,
-                GrokBuiltinPatterns.get(ECS_COMPATIBILITY_ENABLED)
-            );
+            return new FullMatchGrokPatternCandidate("%{" + grokPatternName + "}", timeField, GrokBuiltinPatterns.ecsV1Patterns());
         }
 
         static FullMatchGrokPatternCandidate fromGrokPatternName(
             String grokPatternName,
             String timeField,
-            Map<String, String> grokPatternDefinitions
+            PatternBank grokPatternDefinitions
         ) {
             return new FullMatchGrokPatternCandidate("%{" + grokPatternName + "}", timeField, grokPatternDefinitions);
         }
@@ -958,15 +941,11 @@ public final class GrokPatternCreator {
             return new FullMatchGrokPatternCandidate(grokPattern, timeField, GrokBuiltinPatterns.get(ECS_COMPATIBILITY_ENABLED));
         }
 
-        static FullMatchGrokPatternCandidate fromGrokPattern(
-            String grokPattern,
-            String timeField,
-            Map<String, String> grokPatternDefinitions
-        ) {
-            return new FullMatchGrokPatternCandidate(grokPattern, timeField, grokPatternDefinitions);
+        static FullMatchGrokPatternCandidate fromGrokPattern(String grokPattern, String timeField, PatternBank grokPatternBank) {
+            return new FullMatchGrokPatternCandidate(grokPattern, timeField, grokPatternBank);
         }
 
-        private FullMatchGrokPatternCandidate(String grokPattern, String timeField, Map<String, String> grokPatternDefinitions) {
+        private FullMatchGrokPatternCandidate(String grokPattern, String timeField, PatternBank grokPatternDefinitions) {
             this.grokPattern = grokPattern;
             this.timeField = timeField;
             grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog, logger::warn);

+ 3 - 3
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java

@@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.grok.Grok;
 import org.elasticsearch.grok.GrokBuiltinPatterns;
+import org.elasticsearch.grok.PatternBank;
 import org.elasticsearch.ingest.Pipeline;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.FieldStats;
 
@@ -47,7 +48,7 @@ public final class TextStructureUtils {
 
     public static final String NULL_TIMESTAMP_FORMAT = "null";
 
-    private static final Map<String, String> EXTENDED_PATTERNS;
+    private static final PatternBank EXTENDED_PATTERNS;
     static {
         Map<String, String> patterns = new HashMap<>();
         patterns.put("GEO_POINT", "%{NUMBER} %{NUMBER}");
@@ -65,8 +66,7 @@ public final class TextStructureUtils {
             "(?:%{WKT_POINT}|%{WKT_LINESTRING}|%{WKT_MULTIPOINT}|%{WKT_POLYGON}|%{WKT_MULTILINESTRING}|%{WKT_MULTIPOLYGON}|%{WKT_BBOX})"
         );
         patterns.put("WKT_GEOMETRYCOLLECTION", "GEOMETRYCOLLECTION \\(%{WKT_ANY}(?:, %{WKT_ANY})\\)");
-        patterns.putAll(GrokBuiltinPatterns.legacyPatterns());
-        EXTENDED_PATTERNS = Collections.unmodifiableMap(patterns);
+        EXTENDED_PATTERNS = GrokBuiltinPatterns.legacyPatterns().extendWith(patterns);
     }
 
     private static final int NUM_TOP_HITS = 10;