Browse Source

Ensure Joni warning are logged at debug (#57302)

When Joni, the regex engine that powers grok emits a warning it
does so by default to System.err. System.err logs are all bucketed
together in the server log at WARN level. When Joni emits a warning,
it can be extremely verbose, logging a message for each execution
again that pattern. For ingest node that means for every document
that is run that through Grok. Fortunately, Joni provides a call
back hook to push these warnings to a custom location.

This commit implements Joni's callback hook to push the Joni warning
to the Elasticsearch server logger (logger.org.elasticsearch.ingest.common.GrokProcessor)
at debug level. Generally these warning indicate a possible issue with
the regular expression and upon creation of the Grok processor will
do a "test run" of the expression and log the result (if any) at WARN 
level. This WARN level log should only occur on pipeline creation which 
is a much lower frequency then every document. 

Additionally, the documentation is updated with instructions for how
to set the logger to debug level.
Jake Landis 5 years ago
parent
commit
f5910664b7

+ 20 - 0
docs/reference/ingest/processors/grok.asciidoc

@@ -321,3 +321,23 @@ settings:
 | `ingest.grok.watchdog.interval`           | 1s      | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time.
 | `ingest.grok.watchdog.max_execution_time` | 1s      | The maximum allowed execution of a grok expression evaluation.
 |======
+
+[[grok-debugging]]
+==== Grok debugging
+
+It is advised to use the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] to debug grok patterns. From there you can test one or more
+patterns in the UI against sample data. Under the covers it uses the same engine as ingest node processor.
+
+Additionally, it is recommended to enable debug logging for Grok so that any additional messages may also be seen in the Elasticsearch
+server log.
+
+[source,js]
+--------------------------------------------------
+PUT _cluster/settings
+{
+  "transient": {
+    "logger.org.elasticsearch.ingest.common.GrokProcessor": "debug"
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE

+ 11 - 8
libs/grok/src/main/java/org/elasticsearch/grok/Grok.java

@@ -40,6 +40,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.function.Consumer;
 
 public final class Grok {
 
@@ -76,19 +77,20 @@ public final class Grok {
     private final Regex compiledExpression;
     private final MatcherWatchdog matcherWatchdog;
 
-    public Grok(Map<String, String> patternBank, String grokPattern) {
-        this(patternBank, grokPattern, true, MatcherWatchdog.noop());
+    public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
+        this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
     }
 
-    public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog) {
-        this(patternBank, grokPattern, true, matcherWatchdog);
+    public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
+        this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
     }
 
-    Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures) {
-        this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop());
+    Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
+        this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
     }
 
-    private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog) {
+    private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog,
+                 Consumer<String> logCallBack) {
         this.patternBank = patternBank;
         this.namedCaptures = namedCaptures;
         this.matcherWatchdog = matcherWatchdog;
@@ -101,7 +103,8 @@ public final class Grok {
 
         String expression = toRegex(grokPattern);
         byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
-        this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE);
+        this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE,
+            message -> logCallBack.accept(message));
     }
 
     /**

+ 42 - 31
libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java

@@ -28,8 +28,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.BiConsumer;
 
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.nullValue;
@@ -40,19 +42,19 @@ public class GrokTests extends ESTestCase {
 
     public void testMatchWithoutCaptures() {
         String line = "value";
-        Grok grok = new Grok(basePatterns, "value");
+        Grok grok = new Grok(basePatterns, "value", logger::warn);
         Map<String, Object> matches = grok.captures(line);
         assertEquals(0, matches.size());
     }
 
     public void testNoMatchingPatternInDictionary() {
-        Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}"));
+        Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn));
         assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary"));
     }
 
     public void testSimpleSyslogLine() {
         String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
-        Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}");
+        Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn);
         Map<String, Object> matches = grok.captures(line);
         assertEquals("evita", matches.get("logsource"));
         assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
@@ -64,7 +66,7 @@ public class GrokTests extends ESTestCase {
     public void testSyslog5424Line() {
         String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
                 "Hello, syslog.";
-        Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}");
+        Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn);
         Map<String, Object> matches = grok.captures(line);
         assertEquals("191", matches.get("syslog5424_pri"));
         assertEquals("1", matches.get("syslog5424_ver"));
@@ -79,13 +81,13 @@ public class GrokTests extends ESTestCase {
 
     public void testDatePattern() {
         String line = "fancy 12-12-12 12:12:12";
-        Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})");
+        Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
         Map<String, Object> matches = grok.captures(line);
         assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
     }
 
     public void testNilCoercedValues() {
-        Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)");
+        Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
         Map<String, Object> matches = grok.captures("test 28.4ms");
         assertEquals(28.4f, matches.get("duration"));
         matches = grok.captures("test N/A");
@@ -93,7 +95,7 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testNilWithNoCoercion() {
-        Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)");
+        Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
         Map<String, Object> matches = grok.captures("test 28.4ms");
         assertEquals("28.4", matches.get("duration"));
         matches = grok.captures("test N/A");
@@ -103,7 +105,7 @@ public class GrokTests extends ESTestCase {
     public void testUnicodeSyslog() {
         Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
                 "%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
-                "%{GREEDYDATA:syslog_message}");
+                "%{GREEDYDATA:syslog_message}", logger::warn);
         Map<String, Object> matches = grok.captures("<22>Jan  4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
                 "SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: " +
                 "email@domain.no");
@@ -113,19 +115,19 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testNamedFieldsWithWholeTextMatch() {
-        Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}");
+        Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn);
         Map<String, Object> matches = grok.captures("11/01/01");
         assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
     }
 
     public void testWithOniguramaNamedCaptures() {
-        Grok grok = new Grok(basePatterns, "(?<foo>\\w+)");
+        Grok grok = new Grok(basePatterns, "(?<foo>\\w+)", logger::warn);
         Map<String, Object> matches = grok.captures("hello world");
         assertThat(matches.get("foo"), equalTo("hello"));
     }
 
     public void testISO8601() {
-        Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
+        Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
         List<String> timeMessages = Arrays.asList(
                 "2001-01-01T00:00:00",
                 "1974-03-02T04:09:09",
@@ -149,7 +151,7 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testNotISO8601() {
-        Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
+        Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
         List<String> timeMessages = Arrays.asList(
                 "2001-13-01T00:00:00", // invalid month
                 "2001-00-01T00:00:00", // invalid month
@@ -188,7 +190,7 @@ public class GrokTests extends ESTestCase {
 
         String text = "wowza !!!Tal!!! - Tal";
         String pattern = "%{EXCITED_NAME} - %{NAME}";
-        Grok g = new Grok(bank, pattern, false);
+        Grok g = new Grok(bank, pattern, false, logger::warn);
 
         assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
         assertEquals(true, g.match(text));
@@ -206,7 +208,7 @@ public class GrokTests extends ESTestCase {
             Map<String, String> bank = new HashMap<>();
             bank.put("NAME", "!!!%{NAME}!!!");
             String pattern = "%{NAME}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn);
         });
         assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
 
@@ -214,7 +216,7 @@ public class GrokTests extends ESTestCase {
             Map<String, String> bank = new HashMap<>();
             bank.put("NAME", "!!!%{NAME:name}!!!");
             String pattern = "%{NAME}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn);
         });
         assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());
 
@@ -222,7 +224,7 @@ public class GrokTests extends ESTestCase {
             Map<String, String> bank = new HashMap<>();
             bank.put("NAME", "!!!%{NAME:name:int}!!!");
             String pattern = "%{NAME}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn);
         });
         assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());
 
@@ -231,7 +233,7 @@ public class GrokTests extends ESTestCase {
             bank.put("NAME1", "!!!%{NAME2}!!!");
             bank.put("NAME2", "!!!%{NAME1}!!!");
             String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn);
         });
         assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());
 
@@ -241,7 +243,7 @@ public class GrokTests extends ESTestCase {
             bank.put("NAME2", "!!!%{NAME3}!!!");
             bank.put("NAME3", "!!!%{NAME1}!!!");
             String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn);
         });
         assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]",
             e.getMessage());
@@ -254,7 +256,7 @@ public class GrokTests extends ESTestCase {
             bank.put("NAME4", "!!!%{NAME5}!!!");
             bank.put("NAME5", "!!!%{NAME1}!!!");
             String pattern = "%{NAME1}";
-            new Grok(bank, pattern, false);
+            new Grok(bank, pattern, false, logger::warn );
         });
         assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " +
             "via patterns [NAME2=>NAME3=>NAME4]", e.getMessage());
@@ -262,7 +264,7 @@ public class GrokTests extends ESTestCase {
 
     public void testBooleanCaptures() {
         String pattern = "%{WORD:name}=%{WORD:status:boolean}";
-        Grok g = new Grok(basePatterns, pattern);
+        Grok g = new Grok(basePatterns, pattern, logger::warn);
 
         String text = "active=true";
         Map<String, Object> expected = new HashMap<>();
@@ -279,7 +281,7 @@ public class GrokTests extends ESTestCase {
         bank.put("NUMBER", "(?:%{BASE10NUM})");
 
         String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
-        Grok g = new Grok(bank, pattern);
+        Grok g = new Grok(bank, pattern, logger::warn);
 
         String text = "12009.34 20000000000 4820.092";
         Map<String, Object> expected = new HashMap<>();
@@ -297,7 +299,7 @@ public class GrokTests extends ESTestCase {
         bank.put("NUMBER", "(?:%{BASE10NUM})");
 
         String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
-        Grok g = new Grok(bank, pattern);
+        Grok g = new Grok(bank, pattern, logger::warn);
 
         String text = "12009.34 200 9032";
         Map<String, Object> expected = new HashMap<>();
@@ -312,7 +314,7 @@ public class GrokTests extends ESTestCase {
         String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
                 "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
                 "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
-        Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}");
+        Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn);
         Map<String, Object> matches = grok.captures(logLine);
 
         assertEquals("31.184.238.164", matches.get("clientip"));
@@ -372,7 +374,7 @@ public class GrokTests extends ESTestCase {
         String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " +
                 "HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";
 
-        Grok grok = new Grok(bank, pattern);
+        Grok grok = new Grok(bank, pattern, logger::warn);
 
         Map<String, Object> expected = new HashMap<>();
         expected.put("clientip", "83.149.9.216");
@@ -396,14 +398,14 @@ public class GrokTests extends ESTestCase {
     public void testNoMatch() {
         Map<String, String> bank = new HashMap<>();
         bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
-        Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
+        Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn);
         assertThat(grok.captures("nomatch"), nullValue());
     }
 
     public void testMultipleNamedCapturesWithSameName() {
         Map<String, String> bank = new HashMap<>();
         bank.put("SINGLEDIGIT", "[0-9]");
-        Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
+        Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
 
         Map<String, Object> expected = new HashMap<>();
         expected.put("num", "1");
@@ -430,7 +432,8 @@ public class GrokTests extends ESTestCase {
             });
             t.start();
         };
-        Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler));
+        Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
+            logger::warn);
         Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine));
         run.set(false);
         assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms"));
@@ -470,24 +473,32 @@ public class GrokTests extends ESTestCase {
     }
 
     public void testUnsupportedBracketsInFieldName() {
-        Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}");
+        Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn);
         Map<String, Object> matches = grok.captures("line");
         assertNull(matches);
     }
 
     public void testJavaClassPatternWithUnderscore() {
-        Grok grok = new Grok(basePatterns, "%{JAVACLASS}");
+        Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn);
         assertThat(grok.match("Test_Class.class"), is(true));
     }
 
     public void testJavaFilePatternWithSpaces() {
-        Grok grok = new Grok(basePatterns, "%{JAVAFILE}");
+        Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn);
         assertThat(grok.match("Test Class.java"), is(true));
     }
 
+    public void testLogCallBack(){
+        AtomicReference<String> message = new AtomicReference<>();
+        Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set);
+        grok.match("[foo]");
+        //this message comes from Joni, so updates to Joni may change the expectation
+        assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
+    }
+
     private void assertGrokedField(String fieldName) {
         String line = "foo";
-        Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}");
+        Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn);
         Map<String, Object> matches = grok.captures(line);
         assertEquals(line, matches.get(fieldName));
     }

+ 13 - 0
modules/ingest-common/build.gradle

@@ -40,3 +40,16 @@ testClusters.integTest {
   // (this is because the integTest node is not using default distribution, but only the minimal number of required modules)
   module project(':modules:lang-mustache').tasks.bundlePlugin.archiveFile
 }
+
+thirdPartyAudit.ignoreMissingClasses(
+  // from log4j
+  'org.osgi.framework.AdaptPermission',
+  'org.osgi.framework.AdminPermission',
+  'org.osgi.framework.Bundle',
+  'org.osgi.framework.BundleActivator',
+  'org.osgi.framework.BundleContext',
+  'org.osgi.framework.BundleEvent',
+  'org.osgi.framework.SynchronousBundleListener',
+  'org.osgi.framework.wiring.BundleWire',
+  'org.osgi.framework.wiring.BundleWiring'
+)

+ 7 - 1
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java

@@ -19,6 +19,8 @@
 
 package org.elasticsearch.ingest.common;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.grok.Grok;
 import org.elasticsearch.grok.MatcherWatchdog;
 import org.elasticsearch.ingest.AbstractProcessor;
@@ -36,6 +38,7 @@ public final class GrokProcessor extends AbstractProcessor {
 
     public static final String TYPE = "grok";
     private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
+    private static final Logger logger = LogManager.getLogger(GrokProcessor.class);
 
     private final String matchField;
     private final List<String> matchPatterns;
@@ -48,9 +51,12 @@ public final class GrokProcessor extends AbstractProcessor {
         super(tag);
         this.matchField = matchField;
         this.matchPatterns = matchPatterns;
-        this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog);
+        this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug);
         this.traceMatch = traceMatch;
         this.ignoreMissing = ignoreMissing;
+        // Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose
+        // so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation.
+        new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___");
     }
 
     @Override

+ 8 - 4
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java

@@ -5,6 +5,8 @@
  */
 package org.elasticsearch.xpack.ml.filestructurefinder;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.grok.Grok;
@@ -27,6 +29,7 @@ import java.util.stream.Stream;
 
 public final class FileStructureUtils {
 
+    private static final Logger logger = LogManager.getLogger(FileStructureUtils.class);
     public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
     public static final String MAPPING_TYPE_SETTING = "type";
     public static final String MAPPING_FORMAT_SETTING = "format";
@@ -61,10 +64,11 @@ public final class FileStructureUtils {
     private static final int NUM_TOP_HITS = 10;
     // NUMBER Grok pattern doesn't support scientific notation, so we extend it
     private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
-        TimeoutChecker.watchdog);
-    private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog);
-    private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog);
-    private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog);
+        TimeoutChecker.watchdog, logger::warn);
+    private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog, logger::warn);
+    private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog, logger::warn);
+    private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog,
+        logger::warn);
     private static final int KEYWORD_MAX_LEN = 256;
     private static final int KEYWORD_MAX_SPACES = 5;
 

+ 6 - 3
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java

@@ -5,6 +5,8 @@
  */
 package org.elasticsearch.xpack.ml.filestructurefinder;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.grok.Grok;
 import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
@@ -30,6 +32,7 @@ import java.util.stream.Collectors;
  */
 public final class GrokPatternCreator {
 
+    private static final Logger logger = LogManager.getLogger(GrokPatternCreator.class);
     private static final Map<Character, Boolean> PUNCTUATION_OR_SPACE_NEEDS_ESCAPING;
     static {
         HashMap<Character, Boolean> punctuationOrSpaceNeedsEscaping = new HashMap<>();
@@ -498,7 +501,7 @@ public final class GrokPatternCreator {
             grok = new Grok(grokPatternDefinitions,
                 "(?m)%{DATA:" + PREFACE + "}" + Objects.requireNonNull(preBreak) +
                     "%{" + grokPatternName + ":" + VALUE + "}" + Objects.requireNonNull(postBreak) + "%{GREEDYDATA:" + EPILOGUE + "}",
-                TimeoutChecker.watchdog);
+                TimeoutChecker.watchdog, logger::warn);
         }
 
         @Override
@@ -592,7 +595,7 @@ public final class GrokPatternCreator {
                 throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
             }
             Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
-                fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
+                fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
             Collection<String> values = new ArrayList<>();
             for (String snippet : snippets) {
                 Map<String, Object> captures = grok.captures(snippet);
@@ -666,7 +669,7 @@ public final class GrokPatternCreator {
         private FullMatchGrokPatternCandidate(String grokPattern, String timeField, Map<String, String> grokPatternDefinitions) {
             this.grokPattern = grokPattern;
             this.timeField = timeField;
-            grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog);
+            grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog, logger::warn);
         }
 
         public String getTimeField() {

+ 6 - 2
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java

@@ -5,6 +5,8 @@
  */
 package org.elasticsearch.xpack.ml.filestructurefinder;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.SuppressForbidden;
 import org.elasticsearch.common.collect.Tuple;
@@ -48,6 +50,7 @@ public final class TimestampFormatFinder {
     private static final String PREFACE = "preface";
     private static final String EPILOGUE = "epilogue";
 
+    private static final Logger logger = LogManager.getLogger(TimestampFormatFinder.class);
     private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\|()[]{}^$.*?";
     private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,";
     private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?';
@@ -1400,8 +1403,9 @@ public final class TimestampFormatFinder {
             this.strictGrokPattern = Objects.requireNonNull(strictGrokPattern);
             // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
             this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
-                "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
-            this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog);
+                "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
+            this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog,
+                logger::warn);
             this.outputGrokPatternName = Objects.requireNonNull(outputGrokPatternName);
             this.quickRuleOutBitSets = quickRuleOutPatterns.stream().map(TimestampFormatFinder::stringToNumberPosBitSet)
                 .collect(Collectors.toList());

+ 1 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java

@@ -272,7 +272,7 @@ public final class GrokPatternCreator {
             this.grokPatternName = grokPatternName;
             this.fieldName = fieldName;
             this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
-                    postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
+                    postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn);
         }
     }
 }

+ 1 - 1
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java

@@ -80,7 +80,7 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
     }
 
     public void testGrokCaptures() throws Exception {
-        Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog);
+        Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn);
         TimeValue timeout = TimeValue.timeValueMillis(1);
         try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {