5 years ago · fd554d95e4
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java
@@ -5,6 +5,7 @@
 
				  */
			
 
				 package org.elasticsearch.xpack.ml.filestructurefinder;
			
 
				 
			
 
				+import org.apache.logging.log4j.message.ParameterizedMessage;
			
 
				 import org.elasticsearch.common.collect.Tuple;
			
 
				 import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
			
 
				 import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
			
@@ -34,7 +35,6 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
 
				     private static final String REGEX_NEEDS_ESCAPE_PATTERN = "([\\\\|()\\[\\]{}^$.+*?])";
			
 
				     private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
			
 
				     private static final int LONG_FIELD_THRESHOLD = 100;
			
 
				-
			
 
				     private final List<String> sampleMessages;
			
 
				     private final FileStructure structure;
			
 
				 
			
@@ -80,6 +80,11 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
 
				         for (int index = isHeaderInFile ? 1 : 0; index < rows.size(); ++index) {
			
 
				             List<String> row = rows.get(index);
			
 
				             int lineNumber = lineNumbers.get(index);
			
 
				+            // Indicates an illformatted row. We allow a certain number of these
			
 
				+            if (row.size() != columnNames.length) {
			
 
				+                prevMessageEndLineNumber = lineNumber;
			
 
				+                continue;
			
 
				+            }
			
 
				             Map<String, String> sampleRecord = new LinkedHashMap<>();
			
 
				             Util.filterListToMap(sampleRecord, columnNames,
			
 
				                 trimFields ? row.stream().map(field -> (field == null) ? null : field.trim()).collect(Collectors.toList()) : row);
			
@@ -488,7 +493,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
 
				     }
			
 
				 
			
 
				     static boolean canCreateFromSample(List<String> explanation, String sample, int minFieldsPerRow, CsvPreference csvPreference,
			
 
				-                                       String formatName) {
			
 
				+                                       String formatName, double allowedFractionOfBadLines) {
			
 
				 
			
 
				         // Logstash's CSV parser won't tolerate fields where just part of the
			
 
				         // value is quoted, whereas SuperCSV will, hence this extra check
			
@@ -501,11 +506,13 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
 
				             }
			
 
				         }
			
 
				 
			
 
				+        int numberOfLinesInSample = sampleLines.length;
			
 
				         try (CsvListReader csvReader = new CsvListReader(new StringReader(sample), csvPreference)) {
			
 
				 
			
 
				             int fieldsInFirstRow = -1;
			
 
				             int fieldsInLastRow = -1;
			
 
				 
			
 
				+            List<Integer> illFormattedRows = new ArrayList<>();
			
 
				             int numberOfRows = 0;
			
 
				             try {
			
 
				                 List<String> row;
			
@@ -529,11 +536,27 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
 
				                         --fieldsInThisRow;
			
 
				                     }
			
 
				 
			
 
				-                    if (fieldsInLastRow != fieldsInFirstRow) {
			
 
				-                        explanation.add("Not " + formatName + " because row [" + (numberOfRows - 1) +
			
 
				-                            "] has a different number of fields to the first row: [" + fieldsInFirstRow + "] and [" +
			
 
				-                            fieldsInLastRow + "]");
			
 
				-                        return false;
			
 
				+                    // TODO: might be good one day to gather a distribution of the most common field counts
			
 
				+                    // But, this would require iterating (or at least sampling) all the lines.
			
 
				+                    if (fieldsInThisRow != fieldsInFirstRow) {
			
 
				+                        illFormattedRows.add(numberOfRows - 1);
			
 
				+                        // This calculation is complicated by the possibility of multi-lined CSV columns
			
 
				+                        // `getLineNumber` is a current count of lines, regardless of row count, so
			
 
				+                        // this formula is just an approximation, but gets more accurate the further
			
 
				+                        // through the sample you are.
			
 
				+                        double totalNumberOfRows = (numberOfRows + numberOfLinesInSample - csvReader.getLineNumber());
			
 
				+                        // We should only allow a certain percentage of ill formatted rows
			
 
				+                        // as it may have and down stream effects
			
 
				+                        if (illFormattedRows.size() > Math.ceil(allowedFractionOfBadLines * totalNumberOfRows)) {
			
 
				+                            explanation.add(new ParameterizedMessage(
			
 
				+                                "Not {} because {} or more rows did not have the same number of fields as the first row ({}). Bad rows {}",
			
 
				+                                formatName,
			
 
				+                                illFormattedRows.size(),
			
 
				+                                fieldsInFirstRow,
			
 
				+                                illFormattedRows).getFormattedMessage());
			
 
				+                            return false;
			
 
				+                        }
			
 
				+                        continue;
			
 
				                     }
			
 
				 
			
 
				                     fieldsInLastRow = fieldsInThisRow;
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderFactory.java
@@ -14,6 +14,8 @@ import java.util.Locale;
 
				 
			
 
				 public class DelimitedFileStructureFinderFactory implements FileStructureFinderFactory {
			
 
				 
			
 
				+    static final double DELIMITER_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES = 0.10d;
			
 
				+    static final double FORMAT_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES = 0.05d;
			
 
				     private final CsvPreference csvPreference;
			
 
				     private final int minFieldsPerRow;
			
 
				     private final boolean trimFields;
			
@@ -44,7 +46,7 @@ public class DelimitedFileStructureFinderFactory implements FileStructureFinderF
 
				      * it could have been truncated when the file was sampled.
			
 
				      */
			
 
				     @Override
			
 
				-    public boolean canCreateFromSample(List<String> explanation, String sample) {
			
 
				+    public boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines) {
			
 
				         String formatName;
			
 
				         switch ((char) csvPreference.getDelimiterChar()) {
			
 
				             case ',':
			
@@ -57,7 +59,12 @@ public class DelimitedFileStructureFinderFactory implements FileStructureFinderF
 
				                 formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
			
 
				                 break;
			
 
				         }
			
 
				-        return DelimitedFileStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
			
 
				+        return DelimitedFileStructureFinder.canCreateFromSample(explanation,
			
 
				+            sample,
			
 
				+            minFieldsPerRow,
			
 
				+            csvPreference,
			
 
				+            formatName,
			
 
				+            allowedFractionOfBadLines);
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderFactory.java
@@ -25,10 +25,12 @@ public interface FileStructureFinderFactory {
 
				      * @param explanation List of reasons for making decisions.  May contain items when passed and new reasons
			
 
				      *                    can be appended by this method.
			
 
				      * @param sample A sample from the file to be ingested.
			
 
				+     * @param allowedFractionOfBadLines How many lines of the passed sample are allowed to be considered "bad".
			
 
				+     *                                  Provided as a fraction from interval [0, 1]
			
 
				      * @return <code>true</code> if this factory can create an appropriate
			
 
				      *         file structure given the sample; otherwise <code>false</code>.
			
 
				      */
			
 
				-    boolean canCreateFromSample(List<String> explanation, String sample);
			
 
				+    boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines);
			
 
				 
			
 
				     /**
			
 
				      * Create an object representing the structure of a file.
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureFinderManager.java
@@ -11,6 +11,7 @@ import org.elasticsearch.ElasticsearchException;
 
				 import org.elasticsearch.ElasticsearchTimeoutException;
			
 
				 import org.elasticsearch.common.collect.Tuple;
			
 
				 import org.elasticsearch.common.unit.TimeValue;
			
 
				+import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
			
 
				 
			
 
				 import java.io.BufferedInputStream;
			
 
				 import java.io.BufferedReader;
			
@@ -474,14 +475,17 @@ public final class FileStructureFinderManager {
 
				         Character quote = overrides.getQuote();
			
 
				         Boolean shouldTrimFields = overrides.getShouldTrimFields();
			
 
				         List<FileStructureFinderFactory> factories;
			
 
				+        double allowedFractionOfBadLines = 0.0;
			
 
				         if (delimiter != null) {
			
 
				+            allowedFractionOfBadLines = DelimitedFileStructureFinderFactory.DELIMITER_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
			
 
				 
			
 
				             // If a precise delimiter is specified, we only need one structure finder
			
 
				             // factory, and we'll tolerate as little as one column in the input
			
 
				             factories = Collections.singletonList(new DelimitedFileStructureFinderFactory(delimiter, (quote == null) ? '"' : quote, 1,
			
 
				                 (shouldTrimFields == null) ? (delimiter == '|') : shouldTrimFields));
			
 
				 
			
 
				-        } else if (quote != null || shouldTrimFields != null) {
			
 
				+        } else if (quote != null || shouldTrimFields != null || FileStructure.Format.DELIMITED.equals(overrides.getFormat())) {
			
 
				+            allowedFractionOfBadLines = DelimitedFileStructureFinderFactory.FORMAT_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
			
 
				 
			
 
				             // The delimiter is not specified, but some other aspect of delimited files is,
			
 
				             // so clone our default delimited factories altering the overridden values
			
@@ -499,7 +503,7 @@ public final class FileStructureFinderManager {
 
				 
			
 
				         for (FileStructureFinderFactory factory : factories) {
			
 
				             timeoutChecker.check("high level format detection");
			
 
				-            if (factory.canCreateFromSample(explanation, sample)) {
			
 
				+            if (factory.canCreateFromSample(explanation, sample, allowedFractionOfBadLines)) {
			
 
				                 return factory.createFromSample(explanation, sample, charsetName, hasByteOrderMarker, lineMergeSizeLimit, overrides,
			
 
				                     timeoutChecker);
			
 
				             }
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinderFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinderFactory.java
@@ -30,7 +30,7 @@ public class NdJsonFileStructureFinderFactory implements FileStructureFinderFact
 
				      * documents must be non-empty, to prevent lines containing "{}" from matching.
			
 
				      */
			
 
				     @Override
			
 
				-    public boolean canCreateFromSample(List<String> explanation, String sample) {
			
 
				+    public boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines) {
			
 
				 
			
 
				         int completeDocCount = 0;
			
 
				 
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderFactory.java
@@ -25,7 +25,7 @@ public class TextLogFileStructureFinderFactory implements FileStructureFinderFac
 
				      * non-blank lines.
			
 
				      */
			
 
				     @Override
			
 
				-    public boolean canCreateFromSample(List<String> explanation, String sample) {
			
 
				+    public boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines) {
			
 
				         if (sample.indexOf('\n') < 0) {
			
 
				             explanation.add("Not text because sample contains no newlines");
			
 
				             return false;
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderFactory.java
@@ -43,7 +43,7 @@ public class XmlFileStructureFinderFactory implements FileStructureFinderFactory
 
				      * necessarily have to be complete (as the sample could have truncated it).
			
 
				      */
			
 
				     @Override
			
 
				-    public boolean canCreateFromSample(List<String> explanation, String sample) {
			
 
				+    public boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines) {
			
 
				 
			
 
				         int completeDocCount = 0;
			
 
				         String commonRootElementName = null;
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderFactoryTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderFactoryTests.java
@@ -16,66 +16,66 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
 
				 
			
 
				     public void testCanCreateCsvFromSampleGivenCsv() {
			
 
				 
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateCsvFromSampleGivenTsv() {
			
 
				 
			
 
				-        assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
			
 
				 
			
 
				-        assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateCsvFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateCsvFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     // TSV - no need to check NDJSON, XML or CSV because they come earlier in the order we check formats
			
 
				 
			
 
				     public void testCanCreateTsvFromSampleGivenTsv() {
			
 
				 
			
 
				-        assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
			
 
				+        assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
			
 
				 
			
 
				-        assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
			
 
				+        assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateTsvFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateTsvFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     // Semi-colon delimited - no need to check NDJSON, XML, CSV or TSV because they come earlier in the order we check formats
			
 
				 
			
 
				     public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
			
 
				 
			
 
				-        assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
			
 
				+        assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     // Pipe delimited - no need to check NDJSON, XML, CSV, TSV or semi-colon delimited
			
@@ -83,11 +83,11 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
 
				 
			
 
				     public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreatePipeDelimitedFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderTests.java
@@ -37,7 +37,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         String sample = "time,message\n" +
			
 
				             "2018-05-17T13:41:23,hello\n" +
			
 
				             "2018-05-17T13:41:32,hello again\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -65,6 +65,99 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
			
 
				     }
			
 
				 
			
 
				+    public void testCreateConfigsGivenIncompleteCsv() throws Exception {
			
 
				+        String sample = "time,message\n" +
			
 
				+            "2018-05-17T13:41:23,hello\n" +
			
 
				+            "badrow\n" + // REALLY bad row
			
 
				+            "2018-05-17T13:41:25,hello\n" +
			
 
				+            "2018-05-17T13:41:26,hello\n" +
			
 
				+            "2018-05-17T13:41:27,hello\n" +
			
 
				+            "2018-05-17T13:41:28,hello\n" +
			
 
				+            "2018-05-17T13:41:29,hello\n" +
			
 
				+            "2018-05-17T13:41:30,hello\n" +
			
 
				+            "2018-05-17T13:41:31,hello\n" +
			
 
				+            "2018-05-17T13:41:32,hello\n" +
			
 
				+            "2018-05-17T13:41:35\n" + // Just missing the column
			
 
				+            "2018-05-17T13:41:33,hello again\n";
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, sample, 0.05));
			
 
				+        assertTrue("assertion failed. Explanation " + explanation,
			
 
				+            csvFactory.canCreateFromSample(explanation, sample, 0.10));
			
 
				+
			
 
				+        String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				+        Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
 
				+        FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
			
 
				+            FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
			
 
				+
			
 
				+
			
 
				+        FileStructure structure = structureFinder.getStructure();
			
 
				+
			
 
				+        assertEquals(FileStructure.Format.DELIMITED, structure.getFormat());
			
 
				+        assertEquals(charset, structure.getCharset());
			
 
				+        if (hasByteOrderMarker == null) {
			
 
				+            assertNull(structure.getHasByteOrderMarker());
			
 
				+        } else {
			
 
				+            assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
			
 
				+        }
			
 
				+        assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
			
 
				+        assertEquals("time", structure.getTimestampField());
			
 
				+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
			
 
				+        assertEquals(Arrays.asList("time", "message"), structure.getColumnNames());
			
 
				+        assertEquals(Character.valueOf(','), structure.getDelimiter());
			
 
				+        assertEquals(Character.valueOf('"'), structure.getQuote());
			
 
				+        assertEquals(structure.getNumMessagesAnalyzed(), 10);
			
 
				+        assertTrue(structure.getHasHeaderRow());
			
 
				+        assertNull(structure.getMultilineStartPattern());
			
 
				+        assertNull(structure.getShouldTrimFields());
			
 
				+        assertNull(structure.getGrokPattern());
			
 
				+    }
			
 
				+
			
 
				+    public void testCreateConfigsGivenIncompleteCsvWithMultiLinedRows() throws Exception {
			
 
				+        String sample = "time,message\n" +
			
 
				+            "2018-05-17T13:41:23,\"hello\nnew line\"\n" +
			
 
				+            "\"badrow\n\n\n\n\"\n" + // REALLY bad row
			
 
				+            "2018-05-17T13:41:25,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:26,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:27,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:28,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:29,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:30,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:31,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:32,\"hello\nnew line\"\n" +
			
 
				+            "2018-05-17T13:41:35\n" + // Just missing the column
			
 
				+            "2018-05-17T13:41:33,\"hello again\nnew line\"\n";
			
 
				+        assertFalse(csvFactory.canCreateFromSample(explanation, sample, 0.05));
			
 
				+        assertTrue("assertion failed. Explanation " + explanation,
			
 
				+            csvFactory.canCreateFromSample(explanation, sample, 0.10));
			
 
				+
			
 
				+        String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				+        Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
 
				+        FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
			
 
				+            FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT,
			
 
				+            FileStructureOverrides.builder().setQuote('"').build(),
			
 
				+            NOOP_TIMEOUT_CHECKER);
			
 
				+
			
 
				+        FileStructure structure = structureFinder.getStructure();
			
 
				+
			
 
				+        assertEquals(FileStructure.Format.DELIMITED, structure.getFormat());
			
 
				+        assertEquals(charset, structure.getCharset());
			
 
				+        if (hasByteOrderMarker == null) {
			
 
				+            assertNull(structure.getHasByteOrderMarker());
			
 
				+        } else {
			
 
				+            assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
			
 
				+        }
			
 
				+        assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
			
 
				+        assertEquals("time", structure.getTimestampField());
			
 
				+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
			
 
				+        assertEquals(Arrays.asList("time", "message"), structure.getColumnNames());
			
 
				+        assertEquals(Character.valueOf(','), structure.getDelimiter());
			
 
				+        assertEquals(Character.valueOf('"'), structure.getQuote());
			
 
				+        assertEquals(structure.getNumMessagesAnalyzed(), 10);
			
 
				+        assertTrue(structure.getHasHeaderRow());
			
 
				+        assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", structure.getMultilineStartPattern());
			
 
				+        assertNull(structure.getShouldTrimFields());
			
 
				+        assertNull(structure.getGrokPattern());
			
 
				+    }
			
 
				+
			
 
				     public void testCreateConfigsGivenCompleteCsvAndColumnNamesOverride() throws Exception {
			
 
				 
			
 
				         FileStructureOverrides overrides = FileStructureOverrides.builder().setColumnNames(Arrays.asList("my_time", "my_message")).build();
			
@@ -72,7 +165,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         String sample = "time,message\n" +
			
 
				             "2018-05-17T13:41:23,hello\n" +
			
 
				             "2018-05-17T13:41:32,hello again\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -109,7 +202,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         String sample = "time,message\n" +
			
 
				             "2018-05-17T13:41:23,hello\n" +
			
 
				             "2018-05-17T13:41:32,hello again\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -142,7 +235,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "2018-05-17T13:41:23,\"hello\n" +
			
 
				             "world\",1\n" +
			
 
				             "2019-01-18T14:46:57,\"hello again\n"; // note that this last record is truncated
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -177,7 +270,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -222,7 +315,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -262,7 +355,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -309,7 +402,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
			
 
				             "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -347,7 +440,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
			
 
				             "\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
			
 
				             "\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -387,7 +480,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				             "25.76615\t18.436565\t\"25.7661500000,18.4365650000\"\tJul 1 2019 12:06:08\n" +
			
 
				             "25.76896\t18.43586\t\"25.7689600000,18.4358600000\"\tJul 1 2019 12:13:50\n" +
			
 
				             "25.76423\t18.43705\t\"25.7642300000,18.4370500000\"\tJul 1 2019 12:39:10\n";
			
 
				-        assertTrue(tsvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(tsvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -421,7 +514,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
 
				         String sample = "time.iso8601,message\n" +
			
 
				             "2018-05-17T13:41:23,hello\n" +
			
 
				             "2018-05-17T13:41:32,hello again\n";
			
 
				-        assertTrue(csvFactory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(csvFactory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinderTests.java
@@ -14,7 +14,7 @@ public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
 
				     private FileStructureFinderFactory factory = new NdJsonFileStructureFinderFactory();
			
 
				 
			
 
				     public void testCreateConfigsGivenGoodJson() throws Exception {
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/NdNdJsonFileStructureFinderFactoryTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/NdNdJsonFileStructureFinderFactoryTests.java
@@ -11,36 +11,36 @@ public class NdNdJsonFileStructureFinderFactoryTests extends FileStructureTestCa
 
				 
			
 
				     public void testCanCreateFromSampleGivenNdJson() {
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenXml() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, XML_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenCsv() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenTsv() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenSemiColonDelimited() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderFactoryTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderFactoryTests.java
@@ -14,6 +14,6 @@ public class TextLogFileStructureFinderFactoryTests extends FileStructureTestCas
 
				 
			
 
				     public void testCanCreateFromSampleGivenText() {
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java
@@ -30,7 +30,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				             "continuation line 2.4\n" +
			
 
				             "2019-05-16 16:56:14 line 3 abcdefghijklmnopqrstuvwxyz\n";
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -44,7 +44,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				     }
			
 
				 
			
 
				     public void testCreateConfigsGivenElasticsearchLog() throws Exception {
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -85,7 +85,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				 
			
 
				         FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("M/d/yyyy h:mma").build();
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, sample));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, sample, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -121,7 +121,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				 
			
 
				         FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampField("my_time").build();
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -158,7 +158,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				         FileStructureOverrides overrides = FileStructureOverrides.builder().setGrokPattern("\\[%{TIMESTAMP_ISO8601:timestamp}\\]" +
			
 
				             "\\[%{LOGLEVEL:loglevel} *\\]\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}").build();
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
@@ -199,7 +199,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
 
				         FileStructureOverrides overrides = FileStructureOverrides.builder().setGrokPattern("\\[%{LOGLEVEL:loglevel} *\\]" +
			
 
				             "\\[%{HOSTNAME:node}\\]\\[%{TIMESTAMP_ISO8601:timestamp}\\] \\[%{JAVACLASS:class} *\\] %{JAVALOGMESSAGE:message}").build();
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderFactoryTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderFactoryTests.java
@@ -13,31 +13,31 @@ public class XmlFileStructureFinderFactoryTests extends FileStructureTestCase {
 
				 
			
 
				     public void testCanCreateFromSampleGivenXml() {
			
 
				 
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenCsv() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenTsv() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenSemiColonDelimited() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenPipeDelimited() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE, 0.0));
			
 
				     }
			
 
				 
			
 
				     public void testCanCreateFromSampleGivenText() {
			
 
				 
			
 
				-        assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
			
 
				+        assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderTests.java
@@ -14,7 +14,7 @@ public class XmlFileStructureFinderTests extends FileStructureTestCase {
 
				     private FileStructureFinderFactory factory = new XmlFileStructureFinderFactory();
			
 
				 
			
 
				     public void testCreateConfigsGivenGoodXml() throws Exception {
			
 
				-        assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE));
			
 
				+        assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
			
 
				 
			
 
				         String charset = randomFrom(POSSIBLE_CHARSETS);
			
 
				         Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);