Browse Source

[ML] Allow overriding timestamp field to null in file structure finder (#90764)

Use a magic value of "null" for the timestamp format override to indicate to the analysis that a timestamp is not expected in the input text. This should improve performance when analysing delimited, ndjson or xml formatted text files that don't contain timestamps. For semi-structured text files without timestamps the magic value indicates to treat the text as single line log messages.

see #55219
Ed Savage 3 years ago
parent
commit
f355787165

+ 5 - 0
docs/changelog/90764.yaml

@@ -0,0 +1,5 @@
+pr: 90764
+summary: Allow overriding timestamp field to null in file structure finder
+area: Machine Learning
+type: enhancement
+issues: []

+ 5 - 0
docs/reference/text-structure/apis/find-structure.asciidoc

@@ -219,6 +219,11 @@ structure finder does not consider by default.
 If this parameter is not specified, the structure finder chooses the best
 format from a built-in set.
 
+If the special value `null` is specified the structure finder will not look
+for a primary timestamp in the text. When the format is semi-structured text
+this will result in the structure finder treating the text as single-line
+messages.
+
 The following table provides the appropriate `timeformat` values for some example timestamps:
 
 |===

+ 1 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinder.java

@@ -121,7 +121,7 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
         sampleLines = null;
 
         Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats = TextStructureUtils
-            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
+            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker, overrides.getTimestampFormat());
 
         SortedMap<String, Object> fieldMappings = mappingsAndFieldStats.v1();
 

+ 11 - 7
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/GrokPatternCreator.java

@@ -293,14 +293,18 @@ public final class GrokPatternCreator {
 
         overallGrokPatternBuilder.setLength(0);
 
-        GrokPatternCandidate seedCandidate = new PrecalculatedMappingGrokPatternCandidate(
-            seedPatternName,
-            seedMapping,
-            seedFieldName,
-            grokPatternDefinitions
-        );
+        if (seedPatternName == null) {
+            appendBestGrokMatchForStrings(true, sampleMessages, false, 0);
+        } else {
+            GrokPatternCandidate seedCandidate = new PrecalculatedMappingGrokPatternCandidate(
+                seedPatternName,
+                seedMapping,
+                seedFieldName,
+                grokPatternDefinitions
+            );
 
-        processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0);
+            processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0);
+        }
 
         return overallGrokPatternBuilder.toString().replace("\t", "\\t").replace("\n", "\\n");
     }

+ 120 - 5
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinder.java

@@ -14,6 +14,7 @@ import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 import org.joni.exception.SyntaxException;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -21,6 +22,7 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 public class LogTextStructureFinder implements TextStructureFinder {
 
@@ -28,21 +30,101 @@ public class LogTextStructureFinder implements TextStructureFinder {
     private final List<String> sampleMessages;
     private final TextStructure structure;
 
-    static LogTextStructureFinder makeLogTextStructureFinder(
+    private static LogTextStructureFinder makeSingleLineLogTextStructureFinder(
         List<String> explanation,
-        String sample,
+        String[] sampleLines,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        int lineMergeSizeLimit,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        // treat each line as a single message with no timestamp field
+
+        explanation.add("Timestamp format is explicitly set to \"null\"");
+
+        List<String> sampleMessages = Arrays.asList(sampleLines);
+
+        TextStructure.Builder structureBuilder = new TextStructure.Builder(TextStructure.Format.SEMI_STRUCTURED_TEXT).setCharset(
+            charsetName
+        )
+            .setSampleStart(sampleMessages.stream().limit(2).collect(Collectors.joining("\n", "", "\n")))
+            .setHasByteOrderMarker(hasByteOrderMarker)
+            .setNumLinesAnalyzed(sampleMessages.size())
+            .setNumMessagesAnalyzed(sampleMessages.size());
+
+        Map<String, String> messageMapping = Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, "text");
+        SortedMap<String, Object> fieldMappings = new TreeMap<>();
+        fieldMappings.put("message", messageMapping);
+
+        SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
+        fieldStats.put("message", TextStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker));
+
+        Map<String, String> customGrokPatternDefinitions = Map.of();
+
+        GrokPatternCreator grokPatternCreator = new GrokPatternCreator(
+            explanation,
+            sampleMessages,
+            fieldMappings,
+            fieldStats,
+            customGrokPatternDefinitions,
+            timeoutChecker,
+            Grok.ECS_COMPATIBILITY_MODES[1].equals(overrides.getEcsCompatibility())
+        );
+
+        String grokPattern = overrides.getGrokPattern();
+        if (grokPattern != null) {
+            // Since this Grok pattern came from the end user, it might contain a syntax error
+            try {
+                grokPatternCreator.validateFullLineGrokPattern(grokPattern, "");
+            } catch (SyntaxException e) {
+                throw new IllegalArgumentException("Supplied Grok pattern [" + grokPattern + "] cannot be converted to a valid regex", e);
+            }
+        } else {
+            grokPattern = grokPatternCreator.createGrokPatternFromExamples(null, null, null);
+        }
+
+        TextStructure structure = structureBuilder.setGrokPattern(grokPattern)
+            .setEcsCompatibility(overrides.getEcsCompatibility())
+            .setIngestPipeline(
+                TextStructureUtils.makeIngestPipelineDefinition(
+                    grokPattern,
+                    customGrokPatternDefinitions,
+                    null,
+                    fieldMappings,
+                    null,
+                    null,
+                    false,
+                    false,
+                    overrides.getEcsCompatibility()
+                )
+            )
+            .setMappings(Collections.singletonMap(TextStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
+            .setFieldStats(fieldStats)
+            .setExplanation(explanation)
+            .build();
+
+        return new LogTextStructureFinder(sampleMessages, structure);
+    }
+
+    private static LogTextStructureFinder makeMultiLineLogTextStructureFinder(
+        List<String> explanation,
+        String[] sampleLines,
         String charsetName,
         Boolean hasByteOrderMarker,
         int lineMergeSizeLimit,
         TextStructureOverrides overrides,
         TimeoutChecker timeoutChecker
     ) {
-        String[] sampleLines = sample.split("\n");
         TimestampFormatFinder timestampFormatFinder = populateTimestampFormatFinder(explanation, sampleLines, overrides, timeoutChecker);
         switch (timestampFormatFinder.getNumMatchedFormats()) {
             case 0:
-                // Is it appropriate to treat text that is neither structured nor has
-                // a regular pattern of timestamps as a log? Probably not...
+                // To treat text as comprised of multi-line log messages we require the presence
+                // of at least one timestamp per message.
+                // In cases where it is desired to treat text that is neither structured nor has
+                // a regular pattern of timestamps as log messages we require the optional request
+                // argument "timestamp_format=null" to be passed, in which case the text will be
+                // treated as single line log messages.
                 throw new IllegalArgumentException(
                     "Could not find "
                         + ((overrides.getTimestampFormat() == null) ? "a timestamp" : "the specified timestamp format")
@@ -217,6 +299,39 @@ public class LogTextStructureFinder implements TextStructureFinder {
         return new LogTextStructureFinder(sampleMessages, structure);
     }
 
+    static LogTextStructureFinder makeLogTextStructureFinder(
+        List<String> explanation,
+        String sample,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        int lineMergeSizeLimit,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        String[] sampleLines = sample.split("\n");
+        if (TextStructureUtils.NULL_TIMESTAMP_FORMAT.equals(overrides.getTimestampFormat())) {
+            return makeSingleLineLogTextStructureFinder(
+                explanation,
+                sampleLines,
+                charsetName,
+                hasByteOrderMarker,
+                lineMergeSizeLimit,
+                overrides,
+                timeoutChecker
+            );
+        } else {
+            return makeMultiLineLogTextStructureFinder(
+                explanation,
+                sampleLines,
+                charsetName,
+                hasByteOrderMarker,
+                lineMergeSizeLimit,
+                overrides,
+                timeoutChecker
+            );
+        }
+    }
+
     private LogTextStructureFinder(List<String> sampleMessages, TextStructure structure) {
         this.sampleMessages = Collections.unmodifiableList(sampleMessages);
         this.structure = structure;

+ 1 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinder.java

@@ -87,7 +87,7 @@ public class NdJsonTextStructureFinder implements TextStructureFinder {
         }
 
         Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats = TextStructureUtils
-            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
+            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker, overrides.getTimestampFormat());
 
         Map<String, Object> fieldMappings = mappingsAndFieldStats.v1();
         if (timeField != null) {

+ 117 - 6
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java

@@ -41,6 +41,8 @@ public final class TextStructureUtils {
     public static final String NANOSECOND_DATE_OUTPUT_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX";
     public static final Set<String> CONVERTIBLE_TYPES = Set.of("integer", "long", "float", "double", "boolean");
 
+    public static final String NULL_TIMESTAMP_FORMAT = "null";
+
     private static final Map<String, String> EXTENDED_PATTERNS;
     static {
         Map<String, String> patterns = new HashMap<>();
@@ -114,6 +116,10 @@ public final class TextStructureUtils {
             return null;
         }
 
+        if (NULL_TIMESTAMP_FORMAT.equals(overrides.getTimestampFormat())) {
+            return null;
+        }
+
         StringBuilder exceptionMsg = null;
 
         // Accept the first match from the first sample that is compatible with all the other samples
@@ -256,14 +262,22 @@ public final class TextStructureUtils {
      *                    can be appended by this method.
      * @param sampleRecords The sampled records.
      * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param timestampFormatOverride The format of the timestamp as given in the request overrides.
      * @return A map of field name to mapping settings.
      */
     static Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> guessMappingsAndCalculateFieldStats(
         List<String> explanation,
         List<Map<String, ?>> sampleRecords,
-        TimeoutChecker timeoutChecker
+        TimeoutChecker timeoutChecker,
+        String timestampFormatOverride
     ) {
-        return guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker, DEFAULT_ECS_COMPATIBILITY);
+        return guessMappingsAndCalculateFieldStats(
+            explanation,
+            sampleRecords,
+            timeoutChecker,
+            DEFAULT_ECS_COMPATIBILITY,
+            timestampFormatOverride
+        );
     }
 
     /**
@@ -272,7 +286,7 @@ public final class TextStructureUtils {
      *                    can be appended by this method.
      * @param sampleRecords The sampled records.
      * @param timeoutChecker Will abort the operation if its timeout is exceeded.
-     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
      * @return A map of field name to mapping settings.
      */
     static Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> guessMappingsAndCalculateFieldStats(
@@ -281,6 +295,26 @@ public final class TextStructureUtils {
         TimeoutChecker timeoutChecker,
         boolean ecsCompatibility
     ) {
+        return guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker, ecsCompatibility, null);
+    }
+
+    /**
+     * Given the sampled records, guess appropriate Elasticsearch mappings.
+     * @param explanation List of reasons for making decisions.  May contain items when passed and new reasons
+     *                    can be appended by this method.
+     * @param sampleRecords The sampled records.
+     * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
+     * @param timestampFormatOverride The format of the timestamp as given in the request overrides.
+     * @return A map of field name to mapping settings.
+     */
+    static Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> guessMappingsAndCalculateFieldStats(
+        List<String> explanation,
+        List<Map<String, ?>> sampleRecords,
+        TimeoutChecker timeoutChecker,
+        boolean ecsCompatibility,
+        String timestampFormatOverride
+    ) {
 
         SortedMap<String, Object> mappings = new TreeMap<>();
         SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
@@ -299,7 +333,8 @@ public final class TextStructureUtils {
                 fieldName,
                 fieldValues,
                 timeoutChecker,
-                ecsCompatibility
+                ecsCompatibility,
+                timestampFormatOverride
             );
             if (mappingAndFieldStats != null) {
                 if (mappingAndFieldStats.v1() != null) {
@@ -314,12 +349,50 @@ public final class TextStructureUtils {
         return new Tuple<>(mappings, fieldStats);
     }
 
+    /**
+     * Given the sampled records, guess appropriate Elasticsearch mappings.
+     * @param explanation List of reasons for choosing the overall text structure.  This list
+     *                    may be non-empty when the method is called, and this method may
+     *                    append to it.
+     * @param fieldName Name of the field for which mappings are to be guessed.
+     * @param fieldValues Values of the field for which mappings are to be guessed.  The guessed
+     *                    mapping will be compatible with all the provided values.  Must not be
+     *                    empty.
+     * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
+     * @return A tuple comprised of the field mappings and field stats.
+     */
     static Tuple<Map<String, String>, FieldStats> guessMappingAndCalculateFieldStats(
         List<String> explanation,
         String fieldName,
         List<Object> fieldValues,
         TimeoutChecker timeoutChecker,
         boolean ecsCompatibility
+    ) {
+        return guessMappingAndCalculateFieldStats(explanation, fieldName, fieldValues, timeoutChecker, ecsCompatibility, null);
+    }
+
+    /**
+     * Given the sampled records, guess appropriate Elasticsearch mappings.
+     * @param explanation List of reasons for choosing the overall text structure.  This list
+     *                    may be non-empty when the method is called, and this method may
+     *                    append to it.
+     * @param fieldName Name of the field for which mappings are to be guessed.
+     * @param fieldValues Values of the field for which mappings are to be guessed.  The guessed
+     *                    mapping will be compatible with all the provided values.  Must not be
+     *                    empty.
+     * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
+     * @param timestampFormatOverride The format of the timestamp as given in the request overrides.
+     * @return A tuple comprised of the field mappings and field stats.
+     */
+    static Tuple<Map<String, String>, FieldStats> guessMappingAndCalculateFieldStats(
+        List<String> explanation,
+        String fieldName,
+        List<Object> fieldValues,
+        TimeoutChecker timeoutChecker,
+        boolean ecsCompatibility,
+        String timestampFormatOverride
     ) {
         if (fieldValues == null || fieldValues.isEmpty()) {
             // We can get here if all the records that contained a given field had a null value for it.
@@ -343,12 +416,20 @@ public final class TextStructureUtils {
                 fieldName,
                 fieldValues.stream().flatMap(TextStructureUtils::flatten).collect(Collectors.toList()),
                 timeoutChecker,
-                ecsCompatibility
+                ecsCompatibility,
+                timestampFormatOverride
             );
         }
 
         Collection<String> fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList());
-        Map<String, String> mapping = guessScalarMapping(explanation, fieldName, fieldValuesAsStrings, timeoutChecker, ecsCompatibility);
+        Map<String, String> mapping = guessScalarMapping(
+            explanation,
+            fieldName,
+            fieldValuesAsStrings,
+            timeoutChecker,
+            ecsCompatibility,
+            timestampFormatOverride
+        );
         timeoutChecker.check("mapping determination");
         return new Tuple<>(mapping, calculateFieldStats(mapping, fieldValuesAsStrings, timeoutChecker));
     }
@@ -375,6 +456,7 @@ public final class TextStructureUtils {
      *                    mapping will be compatible with all the provided values.  Must not be
      *                    empty.
      * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
      * @return The sub-section of the index mappings most appropriate for the field.
      */
     static Map<String, String> findTimestampMapping(
@@ -408,6 +490,7 @@ public final class TextStructureUtils {
      *                    mapping will be compatible with all the provided values.  Must not be
      *                    empty.
      * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
      * @return The sub-section of the index mappings most appropriate for the field,
      *         for example <code>{ "type" : "keyword" }</code>.
      */
@@ -417,6 +500,33 @@ public final class TextStructureUtils {
         Collection<String> fieldValues,
         TimeoutChecker timeoutChecker,
         boolean ecsCompatibility
+    ) {
+        return guessScalarMapping(explanation, fieldName, fieldValues, timeoutChecker, ecsCompatibility, null);
+    }
+
+    /**
+     * Given some sample values for a field, guess the most appropriate index mapping for the
+     * field.
+     * @param explanation List of reasons for choosing the overall text structure.  This list
+     *                    may be non-empty when the method is called, and this method may
+     *                    append to it.
+     * @param fieldName Name of the field for which mappings are to be guessed.
+     * @param fieldValues Values of the field for which mappings are to be guessed.  The guessed
+     *                    mapping will be compatible with all the provided values.  Must not be
+     *                    empty.
+     * @param timeoutChecker Will abort the operation if its timeout is exceeded.
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
+     * @param timestampFormatOverride The format of the timestamp as given in the request overrides.
+     * @return The sub-section of the index mappings most appropriate for the field,
+     *         for example <code>{ "type" : "keyword" }</code>.
+     */
+    static Map<String, String> guessScalarMapping(
+        List<String> explanation,
+        String fieldName,
+        Collection<String> fieldValues,
+        TimeoutChecker timeoutChecker,
+        boolean ecsCompatibility,
+        String timestampFormatOverride
     ) {
         assert fieldValues.isEmpty() == false;
 
@@ -499,6 +609,7 @@ public final class TextStructureUtils {
      *                         May be <code>null</code> if {@code timestampField} is also <code>null</code>.
      * @param needClientTimezone Is the timezone of the client supplying data to ingest required to uniquely parse the timestamp?
      * @param needNanosecondPrecision Does the timestamp have more than millisecond accuracy?
+     * @param ecsCompatibility The mode of compatibility with ECS Grok patterns.
      * @return The ingest pipeline definition, or <code>null</code> if none is required.
      */
     public static Map<String, Object> makeIngestPipelineDefinition(

+ 1 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/XmlTextStructureFinder.java

@@ -128,7 +128,7 @@ public class XmlTextStructureFinder implements TextStructureFinder {
         }
 
         Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats = TextStructureUtils
-            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
+            .guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker, overrides.getTimestampFormat());
 
         if (mappingsAndFieldStats.v2() != null) {
             structureBuilder.setFieldStats(mappingsAndFieldStats.v2());

+ 144 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinderTests.java

@@ -11,6 +11,7 @@ import org.elasticsearch.xpack.core.textstructure.structurefinder.FieldStats;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 
 import java.util.Collections;
+import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -97,6 +98,135 @@ public class LogTextStructureFinderTests extends TextStructureTestCase {
             assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
         }
         assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
+        @SuppressWarnings("unchecked")
+        Set<String> keys = ((Map<String, Object>) structure.getMappings().get("properties")).keySet();
+        assertEquals(3, keys.size());
+        assertTrue(keys.contains("message"));
+        assertTrue(keys.contains("loglevel"));
+        assertTrue(keys.contains("@timestamp"));
+    }
+
+    public void testCreateConfigsGivenElasticsearchLogWithNoTimestamps() throws Exception {
+        assertTrue(factory.canCreateFromSample(explanation, TEXT_WITH_NO_TIMESTAMPS_SAMPLE, 0.0));
+
+        String charset = randomFrom(POSSIBLE_CHARSETS);
+        Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
+        IllegalArgumentException e = expectThrows(
+            IllegalArgumentException.class,
+            () -> factory.createFromSample(
+                explanation,
+                TEXT_WITH_NO_TIMESTAMPS_SAMPLE,
+                charset,
+                hasByteOrderMarker,
+                TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT,
+                TextStructureOverrides.EMPTY_OVERRIDES,
+                NOOP_TIMEOUT_CHECKER
+            )
+        );
+
+        assertEquals("Could not find a timestamp in the sample provided", e.getMessage());
+
+        // Now try to determine the format by first specifying that no timestamp is expected in the text samples,
+        // in which case we assume one log message per line.
+        TextStructureOverrides overrides = TextStructureOverrides.builder()
+            .setTimestampFormat(TextStructureUtils.NULL_TIMESTAMP_FORMAT)
+            .build();
+
+        {
+            TextStructureFinder structureFinder = factory.createFromSample(
+                explanation,
+                TEXT_WITH_NO_TIMESTAMPS_SAMPLE,
+                charset,
+                hasByteOrderMarker,
+                TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT,
+                overrides,
+                NOOP_TIMEOUT_CHECKER
+            );
+
+            TextStructure structure = structureFinder.getStructure();
+
+            assertEquals(TextStructure.Format.SEMI_STRUCTURED_TEXT, structure.getFormat());
+            assertEquals(charset, structure.getCharset());
+            if (hasByteOrderMarker == null) {
+                assertNull(structure.getHasByteOrderMarker());
+            } else {
+                assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
+            }
+            assertNull(structure.getExcludeLinesPattern());
+            assertNull(structure.getMultilineStartPattern());
+            assertNull(structure.getDelimiter());
+            assertNull(structure.getQuote());
+            assertNull(structure.getHasHeaderRow());
+            assertNull(structure.getShouldTrimFields());
+            assertEquals("\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
+            assertNull(structure.getTimestampField());
+            assertNull(structure.getJodaTimestampFormats());
+            FieldStats messageFieldStats = structure.getFieldStats().get("message");
+            assertNotNull(messageFieldStats);
+            for (String statMessage : messageFieldStats.getTopHits()
+                .stream()
+                .map(m -> (String) m.get("value"))
+                .collect(Collectors.toList())) {
+                assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
+            }
+            assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
+            @SuppressWarnings("unchecked")
+            Set<String> keys = ((Map<String, Object>) structure.getMappings().get("properties")).keySet();
+            assertEquals(2, keys.size());
+            assertTrue(keys.contains("message"));
+            assertTrue(keys.contains("loglevel"));
+            assertFalse(keys.contains("@timestamp"));
+        }
+
+        {
+            // Finally, test the behaviour when parsing single-line log messages that _do_ contain timestamps
+            // but the timestamp format override has been set to TextStructureUtils.NULL_TIMESTAMP_FORMAT ("null")
+            TextStructureFinder structureFinder = factory.createFromSample(
+                explanation,
+                TEXT_SAMPLE,
+                charset,
+                hasByteOrderMarker,
+                TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT,
+                overrides,
+                NOOP_TIMEOUT_CHECKER
+            );
+
+            TextStructure structure = structureFinder.getStructure();
+
+            assertEquals(TextStructure.Format.SEMI_STRUCTURED_TEXT, structure.getFormat());
+            assertEquals(charset, structure.getCharset());
+            if (hasByteOrderMarker == null) {
+                assertNull(structure.getHasByteOrderMarker());
+            } else {
+                assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
+            }
+            assertNull(structure.getExcludeLinesPattern());
+            assertNull(structure.getMultilineStartPattern());
+            assertNull(structure.getDelimiter());
+            assertNull(structure.getQuote());
+            assertNull(structure.getHasHeaderRow());
+            assertNull(structure.getShouldTrimFields());
+            // a timestamp field is detected but it's not set to be the primary one.
+            assertEquals("\\[%{TIMESTAMP_ISO8601:extra_timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
+            assertNull(structure.getTimestampField());
+            assertNull(structure.getJodaTimestampFormats());
+            FieldStats messageFieldStats = structure.getFieldStats().get("message");
+            assertNotNull(messageFieldStats);
+            for (String statMessage : messageFieldStats.getTopHits()
+                .stream()
+                .map(m -> (String) m.get("value"))
+                .collect(Collectors.toList())) {
+                assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
+            }
+            assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
+            @SuppressWarnings("unchecked")
+            Set<String> keys = ((Map<String, Object>) structure.getMappings().get("properties")).keySet();
+            assertEquals(3, keys.size());
+            assertTrue(keys.contains("message"));
+            assertTrue(keys.contains("loglevel"));
+            assertTrue(keys.contains("extra_timestamp"));
+            assertFalse(keys.contains("@timestamp"));
+        }
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndTimestampFormatOverride() throws Exception {
@@ -192,6 +322,12 @@ public class LogTextStructureFinderTests extends TextStructureTestCase {
             assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
         }
         assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
+        @SuppressWarnings("unchecked")
+        Set<String> keys = ((Map<String, Object>) structure.getMappings().get("properties")).keySet();
+        assertEquals(3, keys.size());
+        assertTrue(keys.contains("message"));
+        assertTrue(keys.contains("loglevel"));
+        assertTrue(keys.contains("@timestamp"));
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception {
@@ -247,6 +383,14 @@ public class LogTextStructureFinderTests extends TextStructureTestCase {
             assertThat(structureFinder.getSampleMessages(), not(hasItem(statMessage)));
         }
         assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
+        @SuppressWarnings("unchecked")
+        Set<String> keys = ((Map<String, Object>) structure.getMappings().get("properties")).keySet();
+        assertEquals(5, keys.size());
+        assertTrue(keys.contains("message"));
+        assertTrue(keys.contains("loglevel"));
+        assertTrue(keys.contains("class"));
+        assertTrue(keys.contains("node"));
+        assertTrue(keys.contains("@timestamp"));
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {

+ 8 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java

@@ -62,6 +62,14 @@ public abstract class TextStructureTestCase extends ESTestCase {
         [2018-05-11T17:07:29,556][INFO ][o.e.n.Node               ] [node-0] node name [node-0], node ID [tJ9u8HcaTbWxRtnlfz1RQA]
         """;
 
+    protected static final String TEXT_WITH_NO_TIMESTAMPS_SAMPLE = """
+        [INFO ][o.e.n.Node               ] [node-0] initializing ...
+        [INFO ][o.e.e.NodeEnvironment    ] [node-0] using [1] data paths, mounts [[/ (/dev/disk1)]], net \
+        usable_space [223.4gb], net total_space [464.7gb], types [hfs]
+        [INFO ][o.e.e.NodeEnvironment    ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]
+        [INFO ][o.e.n.Node               ] [node-0] node name [node-0], node ID [tJ9u8HcaTbWxRtnlfz1RQA]
+        """;
+
     protected static final String TSV_SAMPLE = """
         time\tid\tvalue
         2018-05-17T16:23:40\tkey1\t42.0

+ 347 - 74
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtilsTests.java

@@ -296,9 +296,12 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
     }
 
     public void testGuessMappingGivenNothing() {
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> assertNull(
-            guessMapping(explanation, "foo", Collections.emptyList(), ecsCompatibility)
-        );
+        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+            assertNull(guessMapping(explanation, "foo", Collections.emptyList(), ecsCompatibility));
+            assertNull(
+                guessMapping(explanation, "foo", Collections.emptyList(), ecsCompatibility, TextStructureUtils.NULL_TIMESTAMP_FORMAT)
+            );
+        };
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
     }
 
@@ -308,18 +311,51 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
         Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("ERROR", "INFO", "DEBUG"), ecsCompatibility));
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "not a date"), ecsCompatibility));
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("ERROR", "INFO", "DEBUG"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("2018-06-11T13:26:47Z", "not a date"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
     }
 
     public void testGuessMappingGivenText() {
+
         Map<String, String> expected = Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, "text");
 
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> assertEquals(
-            expected,
-            guessMapping(explanation, "foo", Arrays.asList("a", "the quick brown fox jumped over the lazy dog"), ecsCompatibility)
-        );
+        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+            assertEquals(
+                expected,
+                guessMapping(explanation, "foo", Arrays.asList("a", "the quick brown fox jumped over the lazy dog"), ecsCompatibility)
+            );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("a", "the quick brown fox jumped over the lazy dog"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+        };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
     }
@@ -327,10 +363,22 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
     public void testGuessMappingGivenIp() {
         Map<String, String> expected = Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, "ip");
 
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> assertEquals(
-            expected,
-            guessMapping(explanation, "foo", Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"), ecsCompatibility)
-        );
+        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+            assertEquals(
+                expected,
+                guessMapping(explanation, "foo", Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"), ecsCompatibility)
+            );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+        };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
     }
@@ -344,6 +392,48 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("1", "2", "12345678901234567890"), ecsCompatibility));
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(3.14159265359, 0.0, 1e-308), ecsCompatibility));
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("-1e-1", "-1e308", "1e-308"), ecsCompatibility));
+
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("3.14159265359", "0", "-8"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+            // 12345678901234567890 is too long for long
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("1", "2", "12345678901234567890"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(3.14159265359, 0.0, 1e-308),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("-1e-1", "-1e308", "1e-308"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
@@ -355,22 +445,65 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
         Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("500", "3", "-3"), ecsCompatibility));
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(500, 6, 0), ecsCompatibility));
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("500", "3", "-3"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
+            assertEquals(
+                expected,
+                guessMapping(explanation, "foo", Arrays.asList(500, 6, 0), ecsCompatibility, TextStructureUtils.NULL_TIMESTAMP_FORMAT)
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
     }
 
     public void testGuessMappingGivenDate() {
-        Map<String, String> expected = new HashMap<>();
-        expected.put(TextStructureUtils.MAPPING_TYPE_SETTING, "date");
-        expected.put(TextStructureUtils.MAPPING_FORMAT_SETTING, "iso8601");
-
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> assertEquals(
-            expected,
-            guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"), ecsCompatibility)
-        );
-
-        ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        {
+            Map<String, String> expected = new HashMap<>();
+            expected.put(TextStructureUtils.MAPPING_TYPE_SETTING, "date");
+            expected.put(TextStructureUtils.MAPPING_FORMAT_SETTING, "iso8601");
+
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                assertEquals(
+                    expected,
+                    guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"), ecsCompatibility)
+                );
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
+        {
+            // The special value of "null" for the timestamp format indicates that the analysis
+            // of semi-structured text should assume the absence of any timestamp.
+            // In the case of structured text, there may be timestamps present in multiple fields
+            // which we want the analysis to identify. For now we don't want the user supplied timestamp
+            // format override to affect this behaviour, hence this check.
+            Map<String, String> expected = new HashMap<>();
+            expected.put(TextStructureUtils.MAPPING_TYPE_SETTING, "date");
+            expected.put(TextStructureUtils.MAPPING_FORMAT_SETTING, "iso8601");
+
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                assertEquals(
+                    expected,
+                    guessMapping(
+                        explanation,
+                        "foo",
+                        Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"),
+                        ecsCompatibility,
+                        TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                    )
+                );
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
     }
 
     public void testGuessMappingGivenBoolean() {
@@ -379,6 +512,14 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
         Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("false", "true"), ecsCompatibility));
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(true, false), ecsCompatibility));
+            assertEquals(
+                expected,
+                guessMapping(explanation, "foo", Arrays.asList("false", "true"), ecsCompatibility, TextStructureUtils.NULL_TIMESTAMP_FORMAT)
+            );
+            assertEquals(
+                expected,
+                guessMapping(explanation, "foo", Arrays.asList(true, false), ecsCompatibility, TextStructureUtils.NULL_TIMESTAMP_FORMAT)
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
@@ -391,10 +532,30 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
             Map<String, String> expected = Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, "long");
 
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(42, Arrays.asList(1, -99)), ecsCompatibility));
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(42, Arrays.asList(1, -99)),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
 
             expected = Collections.singletonMap(TextStructureUtils.MAPPING_TYPE_SETTING, "keyword");
 
             assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(new String[] { "x", "y" }, "z"), ecsCompatibility));
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(new String[] { "x", "y" }, "z"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
@@ -413,6 +574,16 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
                     ecsCompatibility
                 )
             );
+            assertEquals(
+                expected,
+                guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(Collections.singletonMap("name", "value1"), Collections.singletonMap("name", "value2")),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
         };
 
         ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
@@ -429,7 +600,18 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
                     ecsCompatibility
                 )
             );
+            assertEquals("Field [foo] has both object and non-object values - this is not supported by Elasticsearch", e.getMessage());
 
+            e = expectThrows(
+                RuntimeException.class,
+                () -> guessMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(Collections.singletonMap("name", "value1"), "value2"),
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                )
+            );
             assertEquals("Field [foo] has both object and non-object values - this is not supported by Elasticsearch", e.getMessage());
         };
 
@@ -791,64 +973,155 @@ public class TextStructureUtilsTests extends TextStructureTestCase {
     }
 
     public void testGuessGeoPoint() {
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
-            Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
-                explanation,
-                "foo",
-                Arrays.asList("POINT (-77.03653 38.897676)", "POINT (-50.03653 28.8973)"),
-                NOOP_TIMEOUT_CHECKER,
-                ecsCompatibility
-            );
-            assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_point"));
-
-            mapping = TextStructureUtils.guessScalarMapping(
-                explanation,
-                "foo",
-                Arrays.asList("POINT (-77.03653 38.897676)", "bar"),
-                NOOP_TIMEOUT_CHECKER,
-                ecsCompatibility
-            );
-            assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
-        };
+        {
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "POINT (-50.03653 28.8973)"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_point"));
 
-        ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+                mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "bar"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
+
+        // There should be no behavioural change between not specifying a timestamp format at all
+        // and explicitly specifying it as the special string "null" (other than performance)
+        {
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "POINT (-50.03653 28.8973)"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_point"));
+
+                mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "bar"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
     }
 
     public void testGuessGeoShape() {
-        Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
-            Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
-                explanation,
-                "foo",
-                Arrays.asList(
-                    "POINT (-77.03653 38.897676)",
-                    "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)",
-                    "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0))",
-                    "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0), "
-                        + "(100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2))",
-                    "MULTIPOINT (102.0 2.0, 103.0 2.0)",
-                    "MULTILINESTRING ((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0), (100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0),"
-                        + " (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8))",
-                    "MULTIPOLYGON (((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0, 102.0 2.0)), ((100.0 0.0, 101.0 0.0, 101.0 1.0, "
-                        + "100.0 1.0, 100.0 0.0), (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2)))",
-                    "GEOMETRYCOLLECTION (POINT (100.0 0.0), LINESTRING (101.0 0.0, 102.0 1.0))",
-                    "BBOX (100.0, 102.0, 2.0, 0.0)"
-                ),
-                NOOP_TIMEOUT_CHECKER,
-                ecsCompatibility
-            );
-            assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_shape"));
-
-            mapping = TextStructureUtils.guessScalarMapping(
-                explanation,
-                "foo",
-                Arrays.asList("POINT (-77.03653 38.897676)", "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)", "bar"),
-                NOOP_TIMEOUT_CHECKER,
-                ecsCompatibility
-            );
-            assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
-        };
+        {
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(
+                        "POINT (-77.03653 38.897676)",
+                        "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)",
+                        "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0))",
+                        "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0), "
+                            + "(100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2))",
+                        "MULTIPOINT (102.0 2.0, 103.0 2.0)",
+                        "MULTILINESTRING ((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0), (100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0),"
+                            + " (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8))",
+                        "MULTIPOLYGON (((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0, 102.0 2.0)), ((100.0 0.0, 101.0 0.0, 101.0 1.0, "
+                            + "100.0 1.0, 100.0 0.0), (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2)))",
+                        "GEOMETRYCOLLECTION (POINT (100.0 0.0), LINESTRING (101.0 0.0, 102.0 1.0))",
+                        "BBOX (100.0, 102.0, 2.0, 0.0)"
+                    ),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_shape"));
 
-        ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+                mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)", "bar"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
+
+        // There should be no behavioural change between not specifying a timestamp format at all
+        // and explicitly specifying it as the special string "null" (other than performance)
+        {
+            Consumer<Boolean> testGuessMappingGivenEcsCompatibility = (ecsCompatibility) -> {
+                Map<String, String> mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList(
+                        "POINT (-77.03653 38.897676)",
+                        "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)",
+                        "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0))",
+                        "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0), "
+                            + "(100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2))",
+                        "MULTIPOINT (102.0 2.0, 103.0 2.0)",
+                        "MULTILINESTRING ((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0), (100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0),"
+                            + " (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8))",
+                        "MULTIPOLYGON (((102.0 2.0, 103.0 2.0, 103.0 3.0, 102.0 3.0, 102.0 2.0)), ((100.0 0.0, 101.0 0.0, 101.0 1.0, "
+                            + "100.0 1.0, 100.0 0.0), (100.2 0.2, 100.8 0.2, 100.8 0.8, 100.2 0.8, 100.2 0.2)))",
+                        "GEOMETRYCOLLECTION (POINT (100.0 0.0), LINESTRING (101.0 0.0, 102.0 1.0))",
+                        "BBOX (100.0, 102.0, 2.0, 0.0)"
+                    ),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("geo_shape"));
+
+                mapping = TextStructureUtils.guessScalarMapping(
+                    explanation,
+                    "foo",
+                    Arrays.asList("POINT (-77.03653 38.897676)", "LINESTRING (-77.03653 38.897676, -77.009051 38.889939)", "bar"),
+                    NOOP_TIMEOUT_CHECKER,
+                    ecsCompatibility,
+                    TextStructureUtils.NULL_TIMESTAMP_FORMAT
+                );
+                assertThat(mapping.get(TextStructureUtils.MAPPING_TYPE_SETTING), equalTo("keyword"));
+            };
+
+            ecsCompatibilityModes.forEach(testGuessMappingGivenEcsCompatibility);
+        }
+    }
+
+    private Map<String, String> guessMapping(
+        List<String> explanation,
+        String fieldName,
+        List<Object> fieldValues,
+        boolean ecsCompatibility,
+        String timestampFormatOverride
+    ) {
+        Tuple<Map<String, String>, FieldStats> mappingAndFieldStats = TextStructureUtils.guessMappingAndCalculateFieldStats(
+            explanation,
+            fieldName,
+            fieldValues,
+            NOOP_TIMEOUT_CHECKER,
+            ecsCompatibility,
+            timestampFormatOverride
+        );
+        return (mappingAndFieldStats == null) ? null : mappingAndFieldStats.v1();
     }
 
     private Map<String, String> guessMapping(