Bladeren bron

[ML] Improve message misformation error in file structure finder (#42175)

This change replaces the extremely unfriendly message
"Number of messages analyzed must be positive" in the
case where the sample lines were incorrectly grouped
into just one message to an error that more helpfully
explains the likely root cause of the problem.
David Roberts 6 jaren geleden
bovenliggende
commit
178ebd73c8

+ 6 - 0
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java

@@ -76,6 +76,12 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
         }
         // Don't add the last message, as it might be partial and mess up subsequent pattern finding
 
+        if (sampleMessages.isEmpty()) {
+            throw new IllegalArgumentException("Failed to create more than one message from the sample lines provided. (The "
+                + "last is discarded in case the sample is incomplete.) If your sample does contain multiple messages the "
+                + "problem is probably that the primary timestamp format has been incorrectly detected, so try overriding it.");
+        }
+
         FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.SEMI_STRUCTURED_TEXT)
             .setCharset(charsetName)
             .setHasByteOrderMarker(hasByteOrderMarker)

+ 21 - 0
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java

@@ -232,6 +232,27 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
             "\\[%{JAVACLASS:class} *\\] %{JAVALOGMESSAGE:message}] does not match sample messages", e.getMessage());
     }
 
+    public void testErrorOnIncorrectMessageFormation() {
+
+        // This sample causes problems because the (very weird) primary timestamp format
+        // is not detected but a secondary format that only occurs in one line is detected
+        String sample = "Day 21 Month 1 Year 2019 11:04 INFO [localhost] - starting\n" +
+            "Day 21 Month 1 Year 2019 11:04 INFO [localhost] - startup date [Mon Jan 21 11:04:19 CET 2019]\n" +
+            "Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - details\n" +
+            "Day 21 Month 1 Year 2019 11:04 DEBUG [localhost] - more details\n" +
+            "Day 21 Month 1 Year 2019 11:04 WARN [localhost] - something went wrong\n";
+
+        String charset = randomFrom(POSSIBLE_CHARSETS);
+        Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+            () -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, FileStructureOverrides.EMPTY_OVERRIDES,
+                NOOP_TIMEOUT_CHECKER));
+
+        assertEquals("Failed to create more than one message from the sample lines provided. (The last is discarded in "
+            + "case the sample is incomplete.) If your sample does contain multiple messages the problem is probably that "
+            + "the primary timestamp format has been incorrectly detected, so try overriding it.", e.getMessage());
+    }
+
     public void testCreateMultiLineMessageStartRegexGivenNoPrefaces() {
         for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) {
             String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern();