4 years ago · 281ec58b8d
--- a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc
@@ -18,7 +18,7 @@ Returns defaults and limits used by machine learning.
 
				 [[get-ml-info-prereqs]]
			
 
				 == {api-prereq-title}
			
 
				 
			
 
				-Requires the `monitor_ml` cluster privilege. This privilege is included in the 
			
 
				+Requires the `monitor_ml` cluster privilege. This privilege is included in the
			
 
				 `machine_learning_user` built-in role.
			
 
				 
			
 
				 [[get-ml-info-desc]]
			
@@ -50,7 +50,7 @@ This is a possible response:
 
				     "anomaly_detectors" : {
			
 
				       "categorization_analyzer" : {
			
 
				         "char_filter" : [
			
 
				-          "first_non_blank_line"
			
 
				+          "first_line_with_letters"
			
 
				         ],
			
 
				         "tokenizer" : "ml_standard",
			
 
				         "filter" : [
			
--- a/docs/reference/ml/anomaly-detection/ml-configuring-categories.asciidoc
+++ b/docs/reference/ml/anomaly-detection/ml-configuring-categories.asciidoc
@@ -158,7 +158,7 @@ POST _ml/anomaly_detectors/_validate
 
				   "analysis_config" : {
			
 
				     "categorization_analyzer" : {
			
 
				       "char_filter" : [
			
 
				-        "first_non_blank_line"
			
 
				+        "first_line_with_letters"
			
 
				       ],
			
 
				       "tokenizer" : "ml_standard",
			
 
				       "filter" : [
			
@@ -205,7 +205,7 @@ PUT _ml/anomaly_detectors/it_ops_new_logs3
 
				     }],
			
 
				     "categorization_analyzer":{
			
 
				       "char_filter" : [
			
 
				-        "first_non_blank_line" <1>
			
 
				+        "first_line_with_letters" <1>
			
 
				       ],
			
 
				       "tokenizer": {
			
 
				         "type" : "simple_pattern_split",
			
@@ -238,7 +238,7 @@ PUT _ml/anomaly_detectors/it_ops_new_logs3
 
				 ----------------------------------
			
 
				 // TEST[skip:needs-licence]
			
 
				 
			
 
				-<1> Only consider the first non-blank line of the message for categorization purposes.
			
 
				+<1> Only consider the first line of the message with letters for categorization purposes.
			
 
				 <2> Tokens basically consist of hyphens, digits, letters, underscores, dots and slashes.
			
 
				 <3> By default, categorization ignores tokens that begin with a digit.
			
 
				 <4> By default, categorization also ignores tokens that are hexadecimal numbers.
			
--- a/x-pack/plugin/build.gradle
+++ b/x-pack/plugin/build.gradle
@@ -158,6 +158,8 @@ tasks.named("yamlRestTestV7CompatTest").configure {
 
				       // still this is a cat api, and we don't support them with rest api compatibility. (the test would be very hard to transform too)
			
 
				       'ml/trained_model_cat_apis/Test cat trained models',
			
 
				       'service_accounts/10_basic/Test get service accounts', //#76449, will remove upon backport
			
 
				+      // Mute for backport https://github.com/elastic/elasticsearch/pull/77457
			
 
				+      'ml/jobs_crud/Test update job'
			
 
				   ].join(',')
			
 
				   dependsOn "copyExtraResources"
			
 
				 }
			
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java
@@ -173,7 +173,7 @@ public class CategorizationAnalyzerConfig implements ToXContentFragment, Writeab
 
				     public static CategorizationAnalyzerConfig buildStandardCategorizationAnalyzer(List<String> categorizationFilters) {
			
 
				 
			
 
				         return new CategorizationAnalyzerConfig.Builder()
			
 
				-            .addCharFilter("first_non_blank_line")
			
 
				+            .addCharFilter("first_line_with_letters")
			
 
				             .addCategorizationFilters(categorizationFilters)
			
 
				             .setTokenizer("ml_standard")
			
 
				             .addDateWordsTokenFilter()
			
--- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle
+++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
@@ -20,10 +20,11 @@ tasks.named("yamlRestTest").configure {
 
				     // Remove these tests because they don't call an ML endpoint and we don't want
			
 
				     // to grant extra permissions to the users used in this test suite
			
 
				     'ml/ml_classic_analyze/Test analyze API with an analyzer that does what we used to do in native code',
			
 
				-    'ml/ml_standard_analyze/Test analyze API with the standard 7.14 ML analyzer',
			
 
				-    'ml/ml_standard_analyze/Test 7.14 analyzer with blank lines',
			
 
				-    'ml/ml_standard_analyze/Test 7.14 analyzer with multiple multiline messages',
			
 
				-    'ml/ml_standard_analyze/Test 7.14 analyzer with stop words in messages',
			
 
				+    'ml/ml_standard_analyze/Test analyze API with the standard 7.16 ML analyzer',
			
 
				+    'ml/ml_standard_analyze/Test 7.16 analyzer with blank lines',
			
 
				+    'ml/ml_standard_analyze/Test 7.16 analyzer with multiple multiline messages',
			
 
				+    'ml/ml_standard_analyze/Test 7.16 analyzer with stop words in messages',
			
 
				+    'ml/ml_standard_analyze/Test 7.16 analyzer with stop words in messages and strange lines without letters',
			
 
				     // Remove tests that are expected to throw an exception, because we cannot then
			
 
				     // know whether to expect an authorization exception or a validation exception
			
 
				     'ml/3rd_party_deployment/Test start deployment fails with missing model definition',
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -30,6 +30,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode;
 
				 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
			
 
				 import org.elasticsearch.cluster.node.DiscoveryNodes;
			
 
				 import org.elasticsearch.cluster.service.ClusterService;
			
 
				+import org.elasticsearch.common.collect.MapBuilder;
			
 
				 import org.elasticsearch.common.xcontent.ParseField;
			
 
				 import org.elasticsearch.common.breaker.CircuitBreaker;
			
 
				 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
			
@@ -302,6 +303,8 @@ import org.elasticsearch.xpack.ml.job.JobManager;
 
				 import org.elasticsearch.xpack.ml.job.JobManagerHolder;
			
 
				 import org.elasticsearch.xpack.ml.job.NodeLoadDetector;
			
 
				 import org.elasticsearch.xpack.ml.job.UpdateJobProcessNotifier;
			
 
				+import org.elasticsearch.xpack.ml.job.categorization.FirstLineWithLettersCharFilter;
			
 
				+import org.elasticsearch.xpack.ml.job.categorization.FirstLineWithLettersCharFilterFactory;
			
 
				 import org.elasticsearch.xpack.ml.job.categorization.FirstNonBlankLineCharFilter;
			
 
				 import org.elasticsearch.xpack.ml.job.categorization.FirstNonBlankLineCharFilterFactory;
			
 
				 import org.elasticsearch.xpack.ml.job.categorization.MlClassicTokenizer;
			
@@ -1216,7 +1219,10 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin,
 
				     }
			
 
				 
			
 
				     public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
			
 
				-        return Collections.singletonMap(FirstNonBlankLineCharFilter.NAME, FirstNonBlankLineCharFilterFactory::new);
			
 
				+        return MapBuilder.<String, AnalysisProvider<CharFilterFactory>>newMapBuilder()
			
 
				+            .put(FirstNonBlankLineCharFilter.NAME, FirstNonBlankLineCharFilterFactory::new)
			
 
				+            .put(FirstLineWithLettersCharFilter.NAME, FirstLineWithLettersCharFilterFactory::new)
			
 
				+            .map();
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilter.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilter.java
@@ -0,0 +1,104 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.job.categorization;
			
 
				+
			
 
				+import org.apache.lucene.analysis.charfilter.BaseCharFilter;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.Reader;
			
 
				+import java.io.StringReader;
			
 
				+
			
 
				+/**
			
 
				+ * A character filter that keeps the first line with alpha letters in the input, and discards everything before and after it.
			
 
				+ * Treats both <code>\n</code> and <code>\r\n</code> as line endings.
			
 
				+ *
			
 
				+ * If there is a line ending at the end of the first valid line, this is discarded.
			
 
				+ *
			
 
				+ * A line is considered valid if any {@link Character#isLetter} returns
			
 
				+ * <code>true</code>.
			
 
				+ *
			
 
				+ * It is possible to achieve the same effect with a <code>pattern_replace</code> filter, but since this filter
			
 
				+ * needs to be run on every single message to be categorized it is worth having a more performant specialization.
			
 
				+ */
			
 
				+public class FirstLineWithLettersCharFilter extends BaseCharFilter {
			
 
				+
			
 
				+    public static final String NAME = "first_line_with_letters";
			
 
				+
			
 
				+    private Reader transformedInput;
			
 
				+
			
 
				+    FirstLineWithLettersCharFilter(Reader in) {
			
 
				+        super(in);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public int read(char[] cbuf, int off, int len) throws IOException {
			
 
				+        // Buffer all input on the first call.
			
 
				+        if (transformedInput == null) {
			
 
				+            fill();
			
 
				+        }
			
 
				+
			
 
				+        return transformedInput.read(cbuf, off, len);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public int read() throws IOException {
			
 
				+        if (transformedInput == null) {
			
 
				+            fill();
			
 
				+        }
			
 
				+
			
 
				+        return transformedInput.read();
			
 
				+    }
			
 
				+
			
 
				+    private void fill() throws IOException {
			
 
				+        StringBuilder buffered = new StringBuilder();
			
 
				+        char[] temp = new char[1024];
			
 
				+        for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp)) {
			
 
				+            buffered.append(temp, 0, cnt);
			
 
				+        }
			
 
				+        transformedInput = new StringReader(process(buffered).toString());
			
 
				+    }
			
 
				+
			
 
				+    private CharSequence process(CharSequence input) {
			
 
				+
			
 
				+        boolean seenLetter = false;
			
 
				+        int prevNewlineIndex = -1;
			
 
				+        int endIndex = -1;
			
 
				+
			
 
				+        for (int index = 0; index < input.length(); ++index) {
			
 
				+            if (input.charAt(index) == '\n') {
			
 
				+                if (seenLetter) {
			
 
				+                    // With Windows line endings chop the \r as well as the \n
			
 
				+                    endIndex = (input.charAt(index - 1) == '\r') ? (index - 1) : index;
			
 
				+                    break;
			
 
				+                }
			
 
				+                prevNewlineIndex = index;
			
 
				+            } else {
			
 
				+                seenLetter = seenLetter || Character.isLetter(input.charAt(index));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        if (seenLetter == false) {
			
 
				+            return "";
			
 
				+        }
			
 
				+
			
 
				+        if (endIndex == -1) {
			
 
				+            if (prevNewlineIndex == -1) {
			
 
				+                // This is pretty likely, as most log messages _aren't_ multiline, so worth optimising
			
 
				+                // for even though the return at the end of the method would be functionally identical
			
 
				+                return input;
			
 
				+            }
			
 
				+            endIndex = input.length();
			
 
				+        }
			
 
				+
			
 
				+        addOffCorrectMap(0, prevNewlineIndex + 1);
			
 
				+        if (endIndex < input.length()) {
			
 
				+            addOffCorrectMap(endIndex - prevNewlineIndex - 1, input.length() - endIndex + prevNewlineIndex + 1);
			
 
				+        }
			
 
				+        return input.subSequence(prevNewlineIndex + 1, endIndex);
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilterFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilterFactory.java
@@ -0,0 +1,27 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.job.categorization;
			
 
				+
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
			
 
				+
			
 
				+import java.io.Reader;
			
 
				+
			
 
				+public class FirstLineWithLettersCharFilterFactory extends AbstractCharFilterFactory {
			
 
				+
			
 
				+    public FirstLineWithLettersCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
			
 
				+        super(indexSettings, name);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Reader create(Reader tokenStream) {
			
 
				+        return new FirstLineWithLettersCharFilter(tokenStream);
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilterTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/FirstLineWithLettersCharFilterTests.java
@@ -0,0 +1,134 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.job.categorization;
			
 
				+
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.StringReader;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.equalTo;
			
 
				+
			
 
				+public class FirstLineWithLettersCharFilterTests extends ESTestCase {
			
 
				+
			
 
				+    public void testEmpty() throws IOException {
			
 
				+
			
 
				+        String input = "";
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input));
			
 
				+
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+    }
			
 
				+
			
 
				+    public void testAllBlankOneLine() throws IOException {
			
 
				+
			
 
				+        String input = randomFrom("!@#$%^&*()1234567890{}[]\\;':,./<>?`~", "\t", " ", "");
			
 
				+        if (randomBoolean()) {
			
 
				+            input = " " + input;
			
 
				+        }
			
 
				+        if (randomBoolean()) {
			
 
				+            input = input + " ";
			
 
				+        }
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input));
			
 
				+
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+    }
			
 
				+
			
 
				+    public void testNonBlankNoNewlines() throws IOException {
			
 
				+
			
 
				+        String input = "the quick brown fox jumped over the lazy dog";
			
 
				+        if (randomBoolean()) {
			
 
				+            input = " " + input;
			
 
				+        }
			
 
				+        if (randomBoolean()) {
			
 
				+            input = input + " ";
			
 
				+        }
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input));
			
 
				+
			
 
				+        char[] output = new char[input.length()];
			
 
				+        assertThat(filter.read(output, 0, output.length), equalTo(input.length()));
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+        assertThat(new String(output), equalTo(input));
			
 
				+    }
			
 
				+
			
 
				+    public void testNonBlankMultiline() throws IOException {
			
 
				+
			
 
				+        StringBuilder input = new StringBuilder();
			
 
				+        String lineEnding = randomBoolean() ? "\n" : "\r\n";
			
 
				+        for (int lineBeforeNum = randomIntBetween(2, 5); lineBeforeNum > 0; --lineBeforeNum) {
			
 
				+            for (int charNum = randomIntBetween(0, 5); charNum > 0; --charNum) {
			
 
				+                input.append(randomBoolean() ? " " : "\t");
			
 
				+            }
			
 
				+            input.append(lineEnding);
			
 
				+        }
			
 
				+        String lineToKeep = "the quick brown fox jumped over the lazy dog";
			
 
				+        if (randomBoolean()) {
			
 
				+            lineToKeep = " " + lineToKeep;
			
 
				+        }
			
 
				+        if (randomBoolean()) {
			
 
				+            lineToKeep = lineToKeep + " ";
			
 
				+        }
			
 
				+        input.append(lineToKeep).append(lineEnding);
			
 
				+        for (int lineAfterNum = randomIntBetween(2, 5); lineAfterNum > 0; --lineAfterNum) {
			
 
				+            for (int charNum = randomIntBetween(0, 5); charNum > 0; --charNum) {
			
 
				+                input.append(randomBoolean() ? " " : "more");
			
 
				+            }
			
 
				+            if (lineAfterNum > 1 || randomBoolean()) {
			
 
				+                input.append(lineEnding);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input.toString()));
			
 
				+
			
 
				+        char[] output = new char[lineToKeep.length()];
			
 
				+        assertThat(filter.read(output, 0, output.length), equalTo(lineToKeep.length()));
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+        assertThat(new String(output), equalTo(lineToKeep));
			
 
				+    }
			
 
				+
			
 
				+    public void testNoLinesWithLetters() throws IOException {
			
 
				+        StringBuilder input = new StringBuilder();
			
 
				+        String lineEnding = randomBoolean() ? "\n" : "\r\n";
			
 
				+        for (int lineNum = randomIntBetween(2, 5); lineNum > 0; --lineNum) {
			
 
				+            for (int charNum = randomIntBetween(0, 5); charNum > 0; --charNum) {
			
 
				+                input.append(randomFrom("!@#$%^&*()1234567890{}[]\\;':,./<>?`~", "\t", " ", ""));
			
 
				+            }
			
 
				+            if (lineNum > 1 || randomBoolean()) {
			
 
				+                input.append(lineEnding);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input.toString()));
			
 
				+
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+    }
			
 
				+
			
 
				+    public void testCorrect() throws IOException {
			
 
				+
			
 
				+        String input = "  --------------------------------------------------------------------------------\n" +
			
 
				+            "\n" +
			
 
				+            "Alias 'foo' already exists and this prevents setting up ILM for logs\n" +
			
 
				+            "\n" +
			
 
				+            "--------------------------------------------------------------------------------";
			
 
				+        FirstLineWithLettersCharFilter filter = new FirstLineWithLettersCharFilter(new StringReader(input));
			
 
				+
			
 
				+        String expectedOutput = "Alias 'foo' already exists and this prevents setting up ILM for logs";
			
 
				+
			
 
				+        char[] output = new char[expectedOutput.length()];
			
 
				+        assertThat(filter.read(output, 0, output.length), equalTo(expectedOutput.length()));
			
 
				+        assertThat(filter.read(), equalTo(-1));
			
 
				+        assertThat(new String(output), equalTo(expectedOutput));
			
 
				+
			
 
				+        int expectedOutputIndex = input.indexOf(expectedOutput);
			
 
				+        for (int i = 0; i < expectedOutput.length(); ++i) {
			
 
				+            assertThat(filter.correctOffset(i), equalTo(expectedOutputIndex + i));
			
 
				+        }
			
 
				+        // When the input gets chopped by a char filter immediately after a token, that token must be reported as
			
 
				+        // ending at the very end of the original input, otherwise multi-message analysis will have incorrect offsets
			
 
				+        assertThat(filter.correctOffset(expectedOutput.length()), equalTo(input.length()));
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_crud.yml
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_crud.yml
@@ -388,7 +388,7 @@
 
				   - length: { analysis_config.categorization_analyzer.filter: 1 }
			
 
				   - match: { analysis_config.categorization_analyzer.tokenizer: "ml_standard" }
			
 
				   - length: { analysis_config.categorization_analyzer.char_filter: 3 }
			
 
				-  - match: { analysis_config.categorization_analyzer.char_filter.0: "first_non_blank_line" }
			
 
				+  - match: { analysis_config.categorization_analyzer.char_filter.0: "first_line_with_letters" }
			
 
				   - match: { analysis_config.categorization_analyzer.char_filter.1.pattern: "cat1.*" }
			
 
				   - match: { analysis_config.categorization_analyzer.char_filter.2.pattern: "cat2.*" }
			
 
				   - match: { analysis_config.bucket_span: "5m" }
			
--- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/ml_standard_analyze.yml
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/ml_standard_analyze.yml
@@ -1,11 +1,11 @@
 
				 ---
			
 
				-"Test analyze API with the standard 7.14 ML analyzer":
			
 
				+"Test analyze API with the standard 7.16 ML analyzer":
			
 
				   - do:
			
 
				       indices.analyze:
			
 
				         body:  >
			
 
				           {
			
 
				             "char_filter" : [
			
 
				-              "first_non_blank_line"
			
 
				+              "first_line_with_letters"
			
 
				             ],
			
 
				             "tokenizer" : "ml_standard",
			
 
				             "filter" : [
			
@@ -85,13 +85,13 @@
 
				   - match: { tokens.15.position: 22 }
			
 
				 
			
 
				 ---
			
 
				-"Test 7.14 analyzer with blank lines":
			
 
				+"Test 7.16 analyzer with blank lines":
			
 
				   - do:
			
 
				       indices.analyze:
			
 
				         body:  >
			
 
				           {
			
 
				             "char_filter" : [
			
 
				-              "first_non_blank_line"
			
 
				+              "first_line_with_letters"
			
 
				             ],
			
 
				             "tokenizer" : "ml_standard",
			
 
				             "filter" : [
			
@@ -115,13 +115,13 @@
 
				   - match: { tokens.1.position: 1 }
			
 
				 
			
 
				 ---
			
 
				-"Test 7.14 analyzer with multiple multiline messages":
			
 
				+"Test 7.16 analyzer with multiple multiline messages":
			
 
				   - do:
			
 
				       indices.analyze:
			
 
				         body:  >
			
 
				           {
			
 
				             "char_filter" : [
			
 
				-              "first_non_blank_line"
			
 
				+              "first_line_with_letters"
			
 
				             ],
			
 
				             "tokenizer" : "ml_standard",
			
 
				             "filter" : [
			
@@ -168,13 +168,13 @@
 
				   - match: { tokens.6.position: 106 }
			
 
				 
			
 
				 ---
			
 
				-"Test 7.14 analyzer with stop words in messages":
			
 
				+"Test 7.16 analyzer with stop words in messages":
			
 
				   - do:
			
 
				       indices.analyze:
			
 
				         body:  >
			
 
				           {
			
 
				             "char_filter" : [
			
 
				-              "first_non_blank_line"
			
 
				+              "first_line_with_letters"
			
 
				             ],
			
 
				             "tokenizer" : "ml_standard",
			
 
				             "filter" : [
			
@@ -215,3 +215,51 @@
 
				   - match: { tokens.5.start_offset: 92 }
			
 
				   - match: { tokens.5.end_offset: 95 }
			
 
				   - match: { tokens.5.position: 119 }
			
 
				+---
			
 
				+"Test 7.16 analyzer with stop words in messages and strange lines without letters":
			
 
				+  - do:
			
 
				+      indices.analyze:
			
 
				+        body:  >
			
 
				+          {
			
 
				+            "char_filter" : [
			
 
				+              "first_line_with_letters"
			
 
				+            ],
			
 
				+            "tokenizer" : "ml_standard",
			
 
				+            "filter" : [
			
 
				+              { "type" : "stop", "stopwords": [
			
 
				+                "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
			
 
				+                "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
			
 
				+                "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
			
 
				+                "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
			
 
				+                "GMT", "UTC"
			
 
				+              ] }
			
 
				+            ],
			
 
				+            "text" : [
			
 
				+              "\n-----\nMay 27, 2021 @ 19:51:15.288 UTC log message one\n-----\n",
			
 
				+              "\n-----\nMay 27, 2021 @ 19:52:25.288 UTC log message two\n-----\n"
			
 
				+            ]
			
 
				+          }
			
 
				+  - match: { tokens.0.token: "log" }
			
 
				+  - match: { tokens.0.start_offset: 39 }
			
 
				+  - match: { tokens.0.end_offset: 42 }
			
 
				+  - match: { tokens.0.position: 7 }
			
 
				+  - match: { tokens.1.token: "message" }
			
 
				+  - match: { tokens.1.start_offset: 43 }
			
 
				+  - match: { tokens.1.end_offset: 50 }
			
 
				+  - match: { tokens.1.position: 8 }
			
 
				+  - match: { tokens.2.token: "one" }
			
 
				+  - match: { tokens.2.start_offset: 51 }
			
 
				+  - match: { tokens.2.end_offset: 61 }
			
 
				+  - match: { tokens.2.position: 9 }
			
 
				+  - match: { tokens.3.token: "log" }
			
 
				+  - match: { tokens.3.start_offset: 101 }
			
 
				+  - match: { tokens.3.end_offset: 104 }
			
 
				+  - match: { tokens.3.position: 117 }
			
 
				+  - match: { tokens.4.token: "message" }
			
 
				+  - match: { tokens.4.start_offset: 105 }
			
 
				+  - match: { tokens.4.end_offset: 112 }
			
 
				+  - match: { tokens.4.position: 118 }
			
 
				+  - match: { tokens.5.token: "two" }
			
 
				+  - match: { tokens.5.start_offset: 113 }
			
 
				+  - match: { tokens.5.end_offset: 123 }
			
 
				+  - match: { tokens.5.position: 119 }