7 gadi atpakaļ · 50c34b2a9b
--- a/x-pack/docs/en/rest-api/ml/get-category.asciidoc
+++ b/x-pack/docs/en/rest-api/ml/get-category.asciidoc
@@ -62,11 +62,11 @@ roles provide these privileges. For more information, see
 
				 ==== Examples
			
 
				 
			
 
				 The following example gets information about one category for the
			
 
				-`it_ops_new_logs` job:
			
 
				+`esxi_log` job:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-GET _xpack/ml/anomaly_detectors/it_ops_new_logs/results/categories
			
 
				+GET _xpack/ml/anomaly_detectors/esxi_log/results/categories
			
 
				 {
			
 
				   "page":{
			
 
				     "size": 1
			
@@ -83,14 +83,18 @@ In this example, the API returns the following information:
 
				   "count": 11,
			
 
				   "categories": [
			
 
				     {
			
 
				-      "job_id": "it_ops_new_logs",
			
 
				-      "category_id": 1,
			
 
				-      "terms": "Actual Transaction Already Voided Reversed hostname dbserver.acme.com physicalhost esxserver1.acme.com vmhost app1.acme.com",
			
 
				-      "regex": ".*?Actual.+?Transaction.+?Already.+?Voided.+?Reversed.+?hostname.+?dbserver.acme.com.+?physicalhost.+?esxserver1.acme.com.+?vmhost.+?app1.acme.com.*",
			
 
				-      "max_matching_length": 137,
			
 
				-      "examples": [
			
 
				-        "Actual Transaction Already Voided / Reversed;hostname=dbserver.acme.com;physicalhost=esxserver1.acme.com;vmhost=app1.acme.com"
			
 
				-      ]
			
 
				+      "job_id" : "esxi_log",
			
 
				+      "category_id" : 1,
			
 
				+      "terms" : "Vpxa verbose vpxavpxaInvtVm opID VpxaInvtVmChangeListener Guest DiskInfo Changed",
			
 
				+      "regex" : ".*?Vpxa.+?verbose.+?vpxavpxaInvtVm.+?opID.+?VpxaInvtVmChangeListener.+?Guest.+?DiskInfo.+?Changed.*",
			
 
				+      "max_matching_length": 154,
			
 
				+      "examples" : [
			
 
				+        "Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed",
			
 
				+        "Oct 19 17:04:45 esxi2.acme.com Vpxa: [3CA66B90 verbose 'vpxavpxaInvtVm' opID=WFU-33927856] [VpxaInvtVmChangeListener] Guest DiskInfo Changed",
			
 
				+        "Oct 19 17:04:51 esxi1.acme.com Vpxa: [FFDBAB90 verbose 'vpxavpxaInvtVm' opID=WFU-25e0d447] [VpxaInvtVmChangeListener] Guest DiskInfo Changed",
			
 
				+        "Oct 19 17:04:58 esxi2.acme.com Vpxa: [FFDDBB90 verbose 'vpxavpxaInvtVm' opID=WFU-bbff0134] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"
			
 
				+      ],
			
 
				+      "grok_pattern" : ".*?%{SYSLOGTIMESTAMP:timestamp}.+?Vpxa.+?%{BASE16NUM:field}.+?verbose.+?vpxavpxaInvtVm.+?opID.+?VpxaInvtVmChangeListener.+?Guest.+?DiskInfo.+?Changed.*"
			
 
				     }
			
 
				   ]
			
 
				 }
			
--- a/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc
+++ b/x-pack/docs/en/rest-api/ml/resultsresource.asciidoc
@@ -405,6 +405,13 @@ A category resource has the following properties:
 
				 `examples`::
			
 
				   (array) A list of examples of actual values that matched the category.
			
 
				 
			
 
				+`grok_pattern`::
			
 
				+  experimental[] (string) A Grok pattern that could be used in Logstash or an
			
 
				+  Ingest Pipeline to extract fields from messages that match the category. This
			
 
				+  field is experimental and may be changed or removed in a future release. The
			
 
				+  Grok patterns that are found are not optimal, but are often a good starting
			
 
				+  point for manual tweaking.
			
 
				+
			
 
				 `job_id`::
			
 
				   (string) The unique identifier for the job that these results belong to.
			
 
				 
			
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/CategoryDefinition.java
@@ -5,6 +5,7 @@
 
				  */
			
 
				 package org.elasticsearch.xpack.core.ml.job.results;
			
 
				 
			
 
				+import org.elasticsearch.Version;
			
 
				 import org.elasticsearch.common.ParseField;
			
 
				 import org.elasticsearch.common.io.stream.StreamInput;
			
 
				 import org.elasticsearch.common.io.stream.StreamOutput;
			
@@ -34,6 +35,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				     public static final ParseField REGEX = new ParseField("regex");
			
 
				     public static final ParseField MAX_MATCHING_LENGTH = new ParseField("max_matching_length");
			
 
				     public static final ParseField EXAMPLES = new ParseField("examples");
			
 
				+    public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
			
 
				 
			
 
				     // Used for QueryPage
			
 
				     public static final ParseField RESULTS_FIELD = new ParseField("categories");
			
@@ -51,6 +53,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				         parser.declareString(CategoryDefinition::setRegex, REGEX);
			
 
				         parser.declareLong(CategoryDefinition::setMaxMatchingLength, MAX_MATCHING_LENGTH);
			
 
				         parser.declareStringArray(CategoryDefinition::setExamples, EXAMPLES);
			
 
				+        parser.declareString(CategoryDefinition::setGrokPattern, GROK_PATTERN);
			
 
				 
			
 
				         return parser;
			
 
				     }
			
@@ -61,6 +64,7 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				     private String regex = "";
			
 
				     private long maxMatchingLength = 0L;
			
 
				     private final Set<String> examples;
			
 
				+    private String grokPattern;
			
 
				 
			
 
				     public CategoryDefinition(String jobId) {
			
 
				         this.jobId = jobId;
			
@@ -74,6 +78,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				         regex = in.readString();
			
 
				         maxMatchingLength = in.readLong();
			
 
				         examples = new TreeSet<>(in.readList(StreamInput::readString));
			
 
				+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
			
 
				+            grokPattern = in.readOptionalString();
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -84,6 +91,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				         out.writeString(regex);
			
 
				         out.writeLong(maxMatchingLength);
			
 
				         out.writeStringList(new ArrayList<>(examples));
			
 
				+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
			
 
				+            out.writeOptionalString(grokPattern);
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     public String getJobId() {
			
@@ -139,6 +149,14 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				         examples.add(example);
			
 
				     }
			
 
				 
			
 
				+    public String getGrokPattern() {
			
 
				+        return grokPattern;
			
 
				+    }
			
 
				+
			
 
				+    public void setGrokPattern(String grokPattern) {
			
 
				+        this.grokPattern = grokPattern;
			
 
				+    }
			
 
				+
			
 
				     @Override
			
 
				     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
			
 
				         builder.startObject();
			
@@ -148,6 +166,9 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				         builder.field(REGEX.getPreferredName(), regex);
			
 
				         builder.field(MAX_MATCHING_LENGTH.getPreferredName(), maxMatchingLength);
			
 
				         builder.field(EXAMPLES.getPreferredName(), examples);
			
 
				+        if (grokPattern != null) {
			
 
				+            builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
			
 
				+        }
			
 
				         builder.endObject();
			
 
				         return builder;
			
 
				     }
			
@@ -166,11 +187,12 @@ public class CategoryDefinition implements ToXContentObject, Writeable {
 
				                 && Objects.equals(this.terms, that.terms)
			
 
				                 && Objects.equals(this.regex, that.regex)
			
 
				                 && Objects.equals(this.maxMatchingLength, that.maxMatchingLength)
			
 
				-                && Objects.equals(this.examples, that.examples);
			
 
				+                && Objects.equals(this.examples, that.examples)
			
 
				+                && Objects.equals(this.grokPattern, that.grokPattern);
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     public int hashCode() {
			
 
				-        return Objects.hash(jobId, categoryId, terms, regex, maxMatchingLength, examples);
			
 
				+        return Objects.hash(jobId, categoryId, terms, regex, maxMatchingLength, examples, grokPattern);
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/build.gradle
+++ b/x-pack/plugin/ml/build.gradle
@@ -46,6 +46,7 @@ dependencies {
 
				     testCompile project(path: xpackModule('security'), configuration: 'testArtifacts')
			
 
				 
			
 
				     // ml deps
			
 
				+    compile project(':libs:grok')
			
 
				     compile 'net.sf.supercsv:super-csv:2.4.0'
			
 
				     nativeBundle "org.elasticsearch.ml:ml-cpp:${project.version}@zip"
			
 
				     testCompile 'org.ini4j:ini4j:0.5.2'
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetCategoriesAction.java
@@ -41,7 +41,7 @@ public class TransportGetCategoriesAction extends HandledTransportAction<GetCate
 
				 
			
 
				         Integer from = request.getPageParams() != null ? request.getPageParams().getFrom() : null;
			
 
				         Integer size = request.getPageParams() != null ? request.getPageParams().getSize() : null;
			
 
				-        jobProvider.categoryDefinitions(request.getJobId(), request.getCategoryId(), from, size,
			
 
				+        jobProvider.categoryDefinitions(request.getJobId(), request.getCategoryId(), true, from, size,
			
 
				                 r -> listener.onResponse(new GetCategoriesAction.Response(r)), listener::onFailure, client);
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java
@@ -0,0 +1,243 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License;
			
 
				+ * you may not use this file except in compliance with the Elastic License.
			
 
				+ */
			
 
				+package org.elasticsearch.xpack.ml.job.categorization;
			
 
				+
			
 
				+import org.elasticsearch.common.logging.Loggers;
			
 
				+import org.elasticsearch.grok.Grok;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.Collection;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * Creates Grok patterns that will match all the examples in a given category_definition.
			
 
				+ *
			
 
				+ * The choice of field names is quite primitive.  The intention is that a human will edit these.
			
 
				+ */
			
 
				+public final class GrokPatternCreator {
			
 
				+
			
 
				+    private static String PREFACE = "preface";
			
 
				+    private static String EPILOGUE = "epilogue";
			
 
				+
			
 
				+    /**
			
 
				+     * The first match in this list will be chosen, so it needs to be ordered
			
 
				+     * such that more generic patterns come after more specific patterns.
			
 
				+     */
			
 
				+    private static final List<GrokPatternCandidate> ORDERED_CANDIDATE_GROK_PATTERNS = Arrays.asList(
			
 
				+            new GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp"),
			
 
				+            new GrokPatternCandidate("DATESTAMP_RFC822", "timestamp"),
			
 
				+            new GrokPatternCandidate("DATESTAMP_RFC2822", "timestamp"),
			
 
				+            new GrokPatternCandidate("DATESTAMP_OTHER", "timestamp"),
			
 
				+            new GrokPatternCandidate("DATESTAMP_EVENTLOG", "timestamp"),
			
 
				+            new GrokPatternCandidate("SYSLOGTIMESTAMP", "timestamp"),
			
 
				+            new GrokPatternCandidate("HTTPDATE", "timestamp"),
			
 
				+            new GrokPatternCandidate("CATALINA_DATESTAMP", "timestamp"),
			
 
				+            new GrokPatternCandidate("TOMCAT_DATESTAMP", "timestamp"),
			
 
				+            new GrokPatternCandidate("CISCOTIMESTAMP", "timestamp"),
			
 
				+            new GrokPatternCandidate("DATE", "date"),
			
 
				+            new GrokPatternCandidate("TIME", "time"),
			
 
				+            new GrokPatternCandidate("LOGLEVEL", "loglevel"),
			
 
				+            new GrokPatternCandidate("URI", "uri"),
			
 
				+            new GrokPatternCandidate("UUID", "uuid"),
			
 
				+            new GrokPatternCandidate("MAC", "macaddress"),
			
 
				+            // Can't use \b as the breaks, because slashes are not "word" characters
			
 
				+            new GrokPatternCandidate("PATH", "path", "(?<!\\w)", "(?!\\w)"),
			
 
				+            new GrokPatternCandidate("EMAILADDRESS", "email"),
			
 
				+            // TODO: would be nice to have IPORHOST here, but HOST matches almost all words
			
 
				+            new GrokPatternCandidate("IP", "ipaddress"),
			
 
				+            // This already includes pre/post break conditions
			
 
				+            new GrokPatternCandidate("QUOTEDSTRING", "field", "", ""),
			
 
				+            // Can't use \b as the break before, because it doesn't work for negative numbers (the
			
 
				+            // minus sign is not a "word" character)
			
 
				+            new GrokPatternCandidate("NUMBER", "field", "(?<!\\w)"),
			
 
				+            // Disallow +, - and . before hex numbers, otherwise this pattern will pick up base 10
			
 
				+            // numbers that NUMBER rejected due to preceeding characters
			
 
				+            new GrokPatternCandidate("BASE16NUM", "field", "(?<![\\w.+-])")
			
 
				+            // TODO: also unfortunately can't have USERNAME in the list as it matches too broadly
			
 
				+            // Fixing these problems with overly broad matches would require some extra intelligence
			
 
				+            // to be added to remove inappropriate matches.  One idea would be to use a dictionary,
			
 
				+            // but that doesn't necessarily help as "jay" could be a username but is also a dictionary
			
 
				+            // word (plus there's the international headache with relying on dictionaries).  Similarly,
			
 
				+            // hostnames could also be dictionary words - I've worked on machines called "hippo" and
			
 
				+            // "scarf" in the past.  Another idea would be to look at the adjacent characters and
			
 
				+            // apply some heuristic based on those.
			
 
				+    );
			
 
				+
			
 
				+    private GrokPatternCreator() {
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Given a category definition regex and a collection of examples from the category, return
			
 
				+     * a grok pattern that will match the category and pull out any likely fields.  The extracted
			
 
				+     * fields are given pretty generic names, but unique within the grok pattern provided.  The
			
 
				+     * expectation is that a user will adjust the extracted field names based on their domain
			
 
				+     * knowledge.
			
 
				+     */
			
 
				+    public static String findBestGrokMatchFromExamples(String jobId, String regex, Collection<String> examples) {
			
 
				+
			
 
				+        // The first string in this array will end up being the empty string, and it doesn't correspond
			
 
				+        // to an "in between" bit.  Although it could be removed for "neatness", it actually makes the
			
 
				+        // loops below slightly neater if it's left in.
			
 
				+        //
			
 
				+        // E.g., ".*?cat.+?sat.+?mat.*" -> [ "", "cat", "sat", "mat" ]
			
 
				+        String[] fixedRegexBits = regex.split("\\.[*+]\\??");
			
 
				+
			
 
				+        // Create a pattern that will capture the bits in between the fixed parts of the regex
			
 
				+        //
			
 
				+        // E.g., ".*?cat.+?sat.+?mat.*" -> Pattern (.*?)cat(.+?)sat(.+?)mat(.*)
			
 
				+        Pattern exampleProcessor = Pattern.compile(regex.replaceAll("(\\.[*+]\\??)", "($1)"), Pattern.DOTALL);
			
 
				+
			
 
				+        List<Collection<String>> groupsMatchesFromExamples = new ArrayList<>(fixedRegexBits.length);
			
 
				+        for (int i = 0; i < fixedRegexBits.length; ++i) {
			
 
				+            groupsMatchesFromExamples.add(new ArrayList<>(examples.size()));
			
 
				+        }
			
 
				+        for (String example : examples) {
			
 
				+            Matcher matcher = exampleProcessor.matcher(example);
			
 
				+            if (matcher.matches()) {
			
 
				+                assert matcher.groupCount() == fixedRegexBits.length;
			
 
				+                // E.g., if the input regex was ".*?cat.+?sat.+?mat.*" then the example
			
 
				+                // "the cat sat on the mat" will result in "the ", " ", " on the ", and ""
			
 
				+                // being added to the 4 "in between" collections in that order
			
 
				+                for (int groupNum = 1; groupNum <= matcher.groupCount(); ++groupNum) {
			
 
				+                    groupsMatchesFromExamples.get(groupNum - 1).add(matcher.group(groupNum));
			
 
				+                }
			
 
				+            } else {
			
 
				+                // We should never get here.  If we do it implies a bug in the original categorization,
			
 
				+                // as it's produced a regex that doesn't match the examples.
			
 
				+                assert matcher.matches() : exampleProcessor.pattern() + " did not match " + example;
			
 
				+                Loggers.getLogger(GrokPatternCreator.class).error("[{}] Pattern [{}] did not match example [{}]", jobId,
			
 
				+                        exampleProcessor.pattern(), example);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+        // Finally, for each collection of "in between" bits we look for the best Grok pattern and incorporate
			
 
				+        // it into the overall Grok pattern that will match the each example in its entirety
			
 
				+        for (int inBetweenBitNum = 0; inBetweenBitNum < groupsMatchesFromExamples.size(); ++inBetweenBitNum) {
			
 
				+            // Remember (from the first comment in this method) that the first element in this array is
			
 
				+            // always the empty string
			
 
				+            overallGrokPatternBuilder.append(fixedRegexBits[inBetweenBitNum]);
			
 
				+            appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, inBetweenBitNum == 0,
			
 
				+                    inBetweenBitNum == fixedRegexBits.length - 1, groupsMatchesFromExamples.get(inBetweenBitNum));
			
 
				+        }
			
 
				+        return overallGrokPatternBuilder.toString();
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Given a collection of strings, work out which (if any) of the grok patterns we're allowed
			
 
				+     * to use matches it best.  Then append the appropriate grok language to represent that finding
			
 
				+     * onto the supplied string builder.
			
 
				+     */
			
 
				+    static void appendBestGrokMatchForStrings(Map<String, Integer> fieldNameCountStore, StringBuilder overallGrokPatternBuilder,
			
 
				+                                              boolean isFirst, boolean isLast, Collection<String> mustMatchStrings) {
			
 
				+
			
 
				+        GrokPatternCandidate bestCandidate = null;
			
 
				+        if (mustMatchStrings.isEmpty() == false) {
			
 
				+            for (GrokPatternCandidate candidate : ORDERED_CANDIDATE_GROK_PATTERNS) {
			
 
				+                if (mustMatchStrings.stream().allMatch(candidate.grok::match)) {
			
 
				+                    bestCandidate = candidate;
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        if (bestCandidate == null) {
			
 
				+            if (isLast) {
			
 
				+                overallGrokPatternBuilder.append(".*");
			
 
				+            } else if (isFirst || mustMatchStrings.stream().anyMatch(String::isEmpty)) {
			
 
				+                overallGrokPatternBuilder.append(".*?");
			
 
				+            } else {
			
 
				+                overallGrokPatternBuilder.append(".+?");
			
 
				+            }
			
 
				+        } else {
			
 
				+            Collection<String> prefaces = new ArrayList<>();
			
 
				+            Collection<String> epilogues = new ArrayList<>();
			
 
				+            populatePrefacesAndEpilogues(mustMatchStrings, bestCandidate.grok, prefaces, epilogues);
			
 
				+            appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, isFirst, false, prefaces);
			
 
				+            overallGrokPatternBuilder.append("%{").append(bestCandidate.grokPatternName).append(':')
			
 
				+                    .append(buildFieldName(fieldNameCountStore, bestCandidate.fieldName)).append('}');
			
 
				+            appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, isLast, epilogues);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Given a collection of strings, and a grok pattern that matches some part of them all,
			
 
				+     * return collections of the bits that come before (prefaces) and after (epilogues) the
			
 
				+     * bit that matches.
			
 
				+     */
			
 
				+    static void populatePrefacesAndEpilogues(Collection<String> matchingStrings, Grok grok, Collection<String> prefaces,
			
 
				+                                             Collection<String> epilogues) {
			
 
				+        for (String s : matchingStrings) {
			
 
				+            Map<String, Object> captures = grok.captures(s);
			
 
				+            // If the pattern doesn't match then captures will be null.  But we expect this
			
 
				+            // method to only be called after validating that the pattern does match.
			
 
				+            assert captures != null;
			
 
				+            prefaces.add(captures.getOrDefault(PREFACE, "").toString());
			
 
				+            epilogues.add(captures.getOrDefault(EPILOGUE, "").toString());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * The first time a particular field name is passed, simply return it.
			
 
				+     * The second time return it with "2" appended.
			
 
				+     * The third time return it with "3" appended.
			
 
				+     * Etc.
			
 
				+     */
			
 
				+    static String buildFieldName(Map<String, Integer> fieldNameCountStore, String fieldName) {
			
 
				+        Integer numberSeen = fieldNameCountStore.compute(fieldName, (k, v) -> 1 + ((v == null) ? 0 : v));
			
 
				+        if (numberSeen > 1) {
			
 
				+            return fieldName + numberSeen;
			
 
				+        } else {
			
 
				+            return fieldName;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    static class GrokPatternCandidate {
			
 
				+
			
 
				+        final String grokPatternName;
			
 
				+        final String fieldName;
			
 
				+        final Grok grok;
			
 
				+
			
 
				+        /**
			
 
				+         * Pre/post breaks default to \b, but this may not be appropriate for Grok patterns that start or
			
 
				+         * end with a non "word" character (i.e. letter, number or underscore).  For such patterns use one
			
 
				+         * of the other constructors.
			
 
				+         *
			
 
				+         * In cases where the Grok pattern defined by Logstash already includes conditions on what must
			
 
				+         * come before and after the match, use one of the other constructors and specify an empty string
			
 
				+         * for the pre and/or post breaks.
			
 
				+         * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash.
			
 
				+         * @param fieldName       Name of the field to extract from the match.
			
 
				+         */
			
 
				+        GrokPatternCandidate(String grokPatternName, String fieldName) {
			
 
				+            this(grokPatternName, fieldName, "\\b", "\\b");
			
 
				+        }
			
 
				+
			
 
				+        GrokPatternCandidate(String grokPatternName, String fieldName, String preBreak) {
			
 
				+            this(grokPatternName, fieldName, preBreak, "\\b");
			
 
				+        }
			
 
				+
			
 
				+        /**
			
 
				+         * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash.
			
 
				+         * @param fieldName       Name of the field to extract from the match.
			
 
				+         * @param preBreak        Only consider the match if it's broken from the previous text by this.
			
 
				+         * @param postBreak       Only consider the match if it's broken from the following text by this.
			
 
				+         */
			
 
				+        GrokPatternCandidate(String grokPatternName, String fieldName, String preBreak, String postBreak) {
			
 
				+            this.grokPatternName = grokPatternName;
			
 
				+            this.fieldName = fieldName;
			
 
				+            this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
			
 
				+                    postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/persistence/JobProvider.java
@@ -98,6 +98,7 @@ import org.elasticsearch.xpack.core.ml.job.results.Result;
 
				 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
			
 
				 import org.elasticsearch.xpack.core.ml.utils.MlIndicesUtils;
			
 
				 import org.elasticsearch.xpack.core.security.support.Exceptions;
			
 
				+import org.elasticsearch.xpack.ml.job.categorization.GrokPatternCreator;
			
 
				 import org.elasticsearch.xpack.ml.job.persistence.InfluencersQueryBuilder.InfluencersQuery;
			
 
				 import org.elasticsearch.xpack.ml.job.process.autodetect.params.AutodetectParams;
			
 
				 
			
@@ -486,7 +487,7 @@ public class JobProvider {
 
				         }
			
 
				     }
			
 
				 
			
 
				-    private <T, U> T parseGetHit(GetResponse getResponse, BiFunction<XContentParser, U, T> objectParser, 
			
 
				+    private <T, U> T parseGetHit(GetResponse getResponse, BiFunction<XContentParser, U, T> objectParser,
			
 
				                                  Consumer<Exception> errorHandler) {
			
 
				         BytesReference source = getResponse.getSourceAsBytesRef();
			
 
				 
			
@@ -626,10 +627,11 @@ public class JobProvider {
 
				      * Get a page of {@linkplain CategoryDefinition}s for the given <code>jobId</code>.
			
 
				      * Uses a supplied client, so may run as the currently authenticated user
			
 
				      * @param jobId the job id
			
 
				+     * @param augment Should the category definition be augmented with a Grok pattern?
			
 
				      * @param from  Skip the first N categories. This parameter is for paging
			
 
				      * @param size  Take only this number of categories
			
 
				      */
			
 
				-    public void categoryDefinitions(String jobId, Long categoryId, Integer from, Integer size,
			
 
				+    public void categoryDefinitions(String jobId, Long categoryId, boolean augment, Integer from, Integer size,
			
 
				                                     Consumer<QueryPage<CategoryDefinition>> handler,
			
 
				                                     Consumer<Exception> errorHandler, Client client) {
			
 
				         if (categoryId != null && (from != null || size != null)) {
			
@@ -663,6 +665,9 @@ public class JobProvider {
 
				                              XContentParser parser = XContentFactory.xContent(XContentHelper.xContentType(source))
			
 
				                                      .createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, stream)) {
			
 
				                             CategoryDefinition categoryDefinition = CategoryDefinition.LENIENT_PARSER.apply(parser, null);
			
 
				+                            if (augment) {
			
 
				+                                augmentWithGrokPattern(categoryDefinition);
			
 
				+                            }
			
 
				                             results.add(categoryDefinition);
			
 
				                         } catch (IOException e) {
			
 
				                             throw new ElasticsearchParseException("failed to parse category definition", e);
			
@@ -674,6 +679,17 @@ public class JobProvider {
 
				                 }, e -> errorHandler.accept(mapAuthFailure(e, jobId, GetCategoriesAction.NAME))), client::search);
			
 
				     }
			
 
				 
			
 
				+    void augmentWithGrokPattern(CategoryDefinition categoryDefinition) {
			
 
				+        List<String> examples = categoryDefinition.getExamples();
			
 
				+        String regex = categoryDefinition.getRegex();
			
 
				+        if (examples.isEmpty() || regex.isEmpty()) {
			
 
				+            categoryDefinition.setGrokPattern("");
			
 
				+        } else {
			
 
				+            categoryDefinition.setGrokPattern(GrokPatternCreator.findBestGrokMatchFromExamples(categoryDefinition.getJobId(),
			
 
				+                regex, examples));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     /**
			
 
				      * Search for anomaly records with the parameters in the
			
 
				      * {@link RecordsQueryBuilder}
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/integration/AutodetectResultProcessorIT.java
@@ -461,7 +461,7 @@ public class AutodetectResultProcessorIT extends MlSingleNodeTestCase {
 
				         AtomicReference<Exception> errorHolder = new AtomicReference<>();
			
 
				         AtomicReference<QueryPage<CategoryDefinition>> resultHolder = new AtomicReference<>();
			
 
				         CountDownLatch latch = new CountDownLatch(1);
			
 
				-        jobProvider.categoryDefinitions(JOB_ID, categoryId, null, null, r -> {
			
 
				+        jobProvider.categoryDefinitions(JOB_ID, categoryId, false, null, null, r -> {
			
 
				             resultHolder.set(r);
			
 
				             latch.countDown();
			
 
				         }, e -> {
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreatorTests.java
@@ -0,0 +1,232 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License;
			
 
				+ * you may not use this file except in compliance with the Elastic License.
			
 
				+ */
			
 
				+package org.elasticsearch.xpack.ml.job.categorization;
			
 
				+
			
 
				+import org.elasticsearch.grok.Grok;
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.Collection;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.containsInAnyOrder;
			
 
				+
			
 
				+public class GrokPatternCreatorTests extends ESTestCase {
			
 
				+
			
 
				+    public void testBuildFieldName() {
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        assertEquals("field", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field"));
			
 
				+        assertEquals("field2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field"));
			
 
				+        assertEquals("field3", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field"));
			
 
				+        assertEquals("timestamp", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp"));
			
 
				+        assertEquals("field4", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field"));
			
 
				+        assertEquals("uri", GrokPatternCreator.buildFieldName(fieldNameCountStore, "uri"));
			
 
				+        assertEquals("timestamp2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "timestamp"));
			
 
				+        assertEquals("field5", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field"));
			
 
				+    }
			
 
				+
			
 
				+    public void testPopulatePrefacesAndEpiloguesGivenTimestamp() {
			
 
				+
			
 
				+        Collection<String> matchingStrings = Arrays.asList("[2018-01-25T15:33:23] DEBUG ",
			
 
				+                "[2018-01-24T12:33:23] ERROR ",
			
 
				+                "junk [2018-01-22T07:33:23] INFO ",
			
 
				+                "[2018-01-21T03:33:23] DEBUG ");
			
 
				+        Grok grok = new GrokPatternCreator.GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp").grok;
			
 
				+        Collection<String> prefaces = new ArrayList<>();
			
 
				+        Collection<String> epilogues = new ArrayList<>();
			
 
				+
			
 
				+        GrokPatternCreator.populatePrefacesAndEpilogues(matchingStrings, grok, prefaces, epilogues);
			
 
				+
			
 
				+        assertThat(prefaces, containsInAnyOrder("[", "[", "junk [", "["));
			
 
				+        assertThat(epilogues, containsInAnyOrder("] DEBUG ", "] ERROR ", "] INFO ", "] DEBUG "));
			
 
				+    }
			
 
				+
			
 
				+    public void testPopulatePrefacesAndEpiloguesGivenEmailAddress() {
			
 
				+
			
 
				+        Collection<String> matchingStrings = Arrays.asList("before alice@acme.com after",
			
 
				+                "abc bob@acme.com xyz",
			
 
				+                "carol@acme.com");
			
 
				+        Grok grok = new GrokPatternCreator.GrokPatternCandidate("EMAILADDRESS", "email").grok;
			
 
				+        Collection<String> prefaces = new ArrayList<>();
			
 
				+        Collection<String> epilogues = new ArrayList<>();
			
 
				+
			
 
				+        GrokPatternCreator.populatePrefacesAndEpilogues(matchingStrings, grok, prefaces, epilogues);
			
 
				+
			
 
				+        assertThat(prefaces, containsInAnyOrder("before ", "abc ", ""));
			
 
				+        assertThat(epilogues, containsInAnyOrder(" after", " xyz", ""));
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("[2018-01-25T15:33:23] DEBUG ",
			
 
				+                "[2018-01-24T12:33:23] ERROR ",
			
 
				+                "junk [2018-01-22T07:33:23] INFO ",
			
 
				+                "[2018-01-21T03:33:23] DEBUG ");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".+?%{TIMESTAMP_ISO8601:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenNumbersInBrackets() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("(-2)",
			
 
				+                "  (-3)",
			
 
				+                " (4)",
			
 
				+                " (-5) ");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".+?%{NUMBER:field}.+?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenNegativeNumbersWithoutBreak() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("before-2 ",
			
 
				+                "prior to-3",
			
 
				+                "-4");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        // It seems sensible that we don't detect these suffices as either base 10 or base 16 numbers
			
 
				+        assertEquals(".+?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenHexNumbers() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList(" abc",
			
 
				+                "  123",
			
 
				+                " -123",
			
 
				+                "1f is hex");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".*?%{BASE16NUM:field}.*?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenHostnamesWithNumbers() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("<host1.1.p2ps:",
			
 
				+                "<host2.1.p2ps:");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        // We don't want the .1. in the middle to get detected as a hex number
			
 
				+        assertEquals(".+?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenEmailAddresses() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("before alice@acme.com after",
			
 
				+                "abc bob@acme.com xyz",
			
 
				+                "carol@acme.com");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".*?%{EMAILADDRESS:email}.*?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenUris() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("main site https://www.elastic.co/ with trailing slash",
			
 
				+                "https://www.elastic.co/guide/en/x-pack/current/ml-configuring-categories.html#ml-configuring-categories is a section",
			
 
				+                "download today from https://www.elastic.co/downloads");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".*?%{URI:uri}.*?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testAppendBestGrokMatchForStringsGivenPaths() {
			
 
				+
			
 
				+        Collection<String> mustMatchStrings = Arrays.asList("on Mac /Users/dave",
			
 
				+                "on Windows C:\\Users\\dave",
			
 
				+                "on Linux /home/dave");
			
 
				+
			
 
				+        Map<String, Integer> fieldNameCountStore = new HashMap<>();
			
 
				+        StringBuilder overallGrokPatternBuilder = new StringBuilder();
			
 
				+
			
 
				+        GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);
			
 
				+
			
 
				+        assertEquals(".+?%{PATH:path}.*?", overallGrokPatternBuilder.toString());
			
 
				+    }
			
 
				+
			
 
				+    public void testFindBestGrokMatchFromExamplesGivenNamedLogs() {
			
 
				+
			
 
				+        String regex = ".*?linux.+?named.+?error.+?unexpected.+?RCODE.+?REFUSED.+?resolving.*";
			
 
				+        Collection<String> examples = Arrays.asList(
			
 
				+                "Sep  8 11:55:06 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'elastic.slack.com/A/IN': 95.110.64.205#53",
			
 
				+                "Sep  8 11:55:08 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'slack-imgs.com/A/IN': 95.110.64.205#53",
			
 
				+                "Sep  8 11:55:35 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53",
			
 
				+                "Sep  8 11:55:42 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'b.akamaiedge.net/A/IN': 95.110.64.205#53");
			
 
				+
			
 
				+        assertEquals(".*?%{SYSLOGTIMESTAMP:timestamp}.+?linux.+?named.+?%{NUMBER:field}.+?error.+?" +
			
 
				+                "unexpected.+?RCODE.+?REFUSED.+?resolving.+?%{QUOTEDSTRING:field2}.+?%{IP:ipaddress}.+?%{NUMBER:field3}.*",
			
 
				+                GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples));
			
 
				+    }
			
 
				+
			
 
				+    public void testFindBestGrokMatchFromExamplesGivenCatalinaLogs() {
			
 
				+
			
 
				+        String regex = ".*?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?WARNING.+?Parameters.+?" +
			
 
				+                "Invalid.+?chunk.+?ignored.*";
			
 
				+        // The embedded newline ensures the regular expressions we're using are compiled with Pattern.DOTALL
			
 
				+        Collection<String> examples = Arrays.asList(
			
 
				+                "Aug 29, 2009 12:03:33 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " +
			
 
				+                        "Invalid chunk ignored.",
			
 
				+                "Aug 29, 2009 12:03:40 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " +
			
 
				+                        "Invalid chunk ignored.",
			
 
				+                "Aug 29, 2009 12:03:45 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " +
			
 
				+                        "Invalid chunk ignored.",
			
 
				+                "Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " +
			
 
				+                        "Invalid chunk ignored.");
			
 
				+
			
 
				+        assertEquals(".*?%{CATALINA_DATESTAMP:timestamp}.+?org\\.apache\\.tomcat\\.util\\.http\\.Parameters.+?processParameters.+?" +
			
 
				+                "WARNING.+?Parameters.+?Invalid.+?chunk.+?ignored.*",
			
 
				+                GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples));
			
 
				+    }
			
 
				+
			
 
				+    public void testFindBestGrokMatchFromExamplesGivenMultiTimestampLogs() {
			
 
				+
			
 
				+        String regex = ".*?Authpriv.+?Info.+?sshd.+?subsystem.+?request.+?for.+?sftp.*";
			
 
				+        // Two timestamps: one local, one UTC
			
 
				+        Collection<String> examples = Arrays.asList(
			
 
				+                "559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\t" +
			
 
				+                        "Info\tsshd\tsubsystem request for sftp",
			
 
				+                "559550912548986880\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t9049724\tserv02nw03\t10.120.48.147\tAuthpriv\t" +
			
 
				+                        "Info\tsshd\tsubsystem request for sftp",
			
 
				+                "559550912548986887\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t884343\tserv02tw03\t192.168.121.189\tAuthpriv\t" +
			
 
				+                        "Info\tsshd\tsubsystem request for sftp",
			
 
				+                "559550912603512850\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t8907014\tserv02nw01\t192.168.118.208\tAuthpriv\t" +
			
 
				+                        "Info\tsshd\tsubsystem request for sftp");
			
 
				+
			
 
				+        assertEquals(".*?%{NUMBER:field}.+?%{TIMESTAMP_ISO8601:timestamp}.+?%{TIMESTAMP_ISO8601:timestamp2}.+?%{NUMBER:field2}.+?" +
			
 
				+                "%{IP:ipaddress}.+?Authpriv.+?Info.+?sshd.+?subsystem.+?request.+?for.+?sftp.*",
			
 
				+                GrokPatternCreator.findBestGrokMatchFromExamples("foo", regex, examples));
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/JobProviderTests.java
@@ -61,7 +61,6 @@ import java.util.Date;
 
				 import java.util.HashMap;
			
 
				 import java.util.List;
			
 
				 import java.util.Map;
			
 
				-import java.util.concurrent.ExecutionException;
			
 
				 import java.util.concurrent.atomic.AtomicReference;
			
 
				 import java.util.function.Consumer;
			
 
				 
			
@@ -235,8 +234,7 @@ public class JobProviderTests extends ESTestCase {
 
				         });
			
 
				     }
			
 
				 
			
 
				-    public void testBuckets_OneBucketNoInterim()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBuckets_OneBucketNoInterim() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -268,8 +266,7 @@ public class JobProviderTests extends ESTestCase {
 
				                         ".*"));
			
 
				     }
			
 
				 
			
 
				-    public void testBuckets_OneBucketInterim()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBuckets_OneBucketInterim() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -302,8 +299,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertFalse(queryString.matches("(?s).*is_interim.*"));
			
 
				     }
			
 
				 
			
 
				-    public void testBuckets_UsingBuilder()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBuckets_UsingBuilder() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -339,8 +335,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertFalse(queryString.matches("(?s).*is_interim.*"));
			
 
				     }
			
 
				 
			
 
				-    public void testBucket_NoBucketNoExpand()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBucket_NoBucketNoExpand() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Long timestamp = 98765432123456789L;
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -357,8 +352,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals(ResourceNotFoundException.class, holder[0].getClass());
			
 
				     }
			
 
				 
			
 
				-    public void testBucket_OneBucketNoExpand()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBucket_OneBucketNoExpand() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -384,7 +378,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals(now, b.getTimestamp());
			
 
				     }
			
 
				 
			
 
				-    public void testRecords() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testRecords() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -431,8 +425,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals("irrascible", records.get(1).getFunction());
			
 
				     }
			
 
				 
			
 
				-    public void testRecords_UsingBuilder()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testRecords_UsingBuilder() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -485,7 +478,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals("irrascible", records.get(1).getFunction());
			
 
				     }
			
 
				 
			
 
				-    public void testBucketRecords() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testBucketRecords() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         Bucket bucket = mock(Bucket.class);
			
@@ -532,7 +525,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals("irrascible", records.get(1).getFunction());
			
 
				     }
			
 
				 
			
 
				-    public void testexpandBucket() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testexpandBucket() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         Date now = new Date();
			
 
				         Bucket bucket = new Bucket("foo", now, 22);
			
@@ -559,8 +552,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals(400L, records);
			
 
				     }
			
 
				 
			
 
				-    public void testCategoryDefinitions()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testCategoryDefinitions() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         String terms = "the terms and conditions are not valid here";
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -580,15 +572,14 @@ public class JobProviderTests extends ESTestCase {
 
				         JobProvider provider = createProvider(client);
			
 
				         @SuppressWarnings({"unchecked", "rawtypes"})
			
 
				         QueryPage<CategoryDefinition>[] holder = new QueryPage[1];
			
 
				-        provider.categoryDefinitions(jobId, null, from, size, r -> holder[0] = r,
			
 
				+        provider.categoryDefinitions(jobId, null, false, from, size, r -> holder[0] = r,
			
 
				                 e -> {throw new RuntimeException(e);}, client);
			
 
				         QueryPage<CategoryDefinition> categoryDefinitions = holder[0];
			
 
				         assertEquals(1L, categoryDefinitions.count());
			
 
				         assertEquals(terms, categoryDefinitions.results().get(0).getTerms());
			
 
				     }
			
 
				 
			
 
				-    public void testCategoryDefinition()
			
 
				-            throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testCategoryDefinition() throws IOException {
			
 
				         String jobId = "TestJobIdentification";
			
 
				         String terms = "the terms and conditions are not valid here";
			
 
				 
			
@@ -603,14 +594,14 @@ public class JobProviderTests extends ESTestCase {
 
				         JobProvider provider = createProvider(client);
			
 
				         @SuppressWarnings({"unchecked", "rawtypes"})
			
 
				         QueryPage<CategoryDefinition>[] holder = new QueryPage[1];
			
 
				-        provider.categoryDefinitions(jobId, categoryId, null, null,
			
 
				+        provider.categoryDefinitions(jobId, categoryId, false, null, null,
			
 
				                 r -> holder[0] = r, e -> {throw new RuntimeException(e);}, client);
			
 
				         QueryPage<CategoryDefinition> categoryDefinitions = holder[0];
			
 
				         assertEquals(1L, categoryDefinitions.count());
			
 
				         assertEquals(terms, categoryDefinitions.results().get(0).getTerms());
			
 
				     }
			
 
				 
			
 
				-    public void testInfluencers_NoInterim() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testInfluencers_NoInterim() throws IOException {
			
 
				         String jobId = "TestJobIdentificationForInfluencers";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -670,7 +661,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals(5.0, records.get(1).getInitialInfluencerScore(), 0.00001);
			
 
				     }
			
 
				 
			
 
				-    public void testInfluencers_WithInterim() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testInfluencers_WithInterim() throws IOException {
			
 
				         String jobId = "TestJobIdentificationForInfluencers";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -730,7 +721,7 @@ public class JobProviderTests extends ESTestCase {
 
				         assertEquals(5.0, records.get(1).getInitialInfluencerScore(), 0.00001);
			
 
				     }
			
 
				 
			
 
				-    public void testModelSnapshots() throws InterruptedException, ExecutionException, IOException {
			
 
				+    public void testModelSnapshots() throws IOException {
			
 
				         String jobId = "TestJobIdentificationForInfluencers";
			
 
				         Date now = new Date();
			
 
				         List<Map<String, Object>> source = new ArrayList<>();
			
@@ -851,8 +842,7 @@ public class JobProviderTests extends ESTestCase {
 
				         return getResponse;
			
 
				     }
			
 
				 
			
 
				-    private static SearchResponse createSearchResponse(List<Map<String, Object>> source)
			
 
				-            throws IOException {
			
 
				+    private static SearchResponse createSearchResponse(List<Map<String, Object>> source) throws IOException {
			
 
				         SearchResponse response = mock(SearchResponse.class);
			
 
				         List<SearchHit> list = new ArrayList<>();
			
 
				 
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/CategoryDefinitionTests.java
@@ -25,6 +25,9 @@ public class CategoryDefinitionTests extends AbstractSerializingTestCase<Categor
 
				         categoryDefinition.setRegex(randomAlphaOfLength(10));
			
 
				         categoryDefinition.setMaxMatchingLength(randomLong());
			
 
				         categoryDefinition.setExamples(Arrays.asList(generateRandomStringArray(10, 10, false)));
			
 
				+        if (randomBoolean()) {
			
 
				+            categoryDefinition.setGrokPattern(randomAlphaOfLength(50));
			
 
				+        }
			
 
				         return categoryDefinition;
			
 
				     }