Browse Source

[ML] Return both Joda and Java formats from structure finder (#33900)

Previously the timestamp_formats field in the response
from the find_file_structure endpoint contained Joda
timestamp formats.  This change makes that clear by
renaming the field to joda_timestamp_formats, and also
adds a java_timestamp_formats field containing the
equivalent Java time format strings.
David Roberts 7 years ago
parent
commit
dfe5af0411
17 changed files with 1254 additions and 233 deletions
  1. 790 2
      docs/reference/ml/apis/find-file-structure.asciidoc
  2. 61 23
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java
  3. 2 1
      x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java
  4. 2 1
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java
  5. 1 1
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java
  6. 2 1
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/JsonFileStructureFinder.java
  7. 4 3
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java
  8. 148 72
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java
  9. 2 1
      x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java
  10. 9 9
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderTests.java
  11. 9 9
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java
  12. 1 1
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/JsonFileStructureFinderTests.java
  13. 12 12
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java
  14. 205 93
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java
  15. 1 1
      x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderTests.java
  16. 1 1
      x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json
  17. 4 2
      x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml

+ 790 - 2
docs/reference/ml/apis/find-file-structure.asciidoc

@@ -159,7 +159,7 @@ format corresponds to the primary timestamp, but you do not want to specify the
 full `grok_pattern`.
 
 If this parameter is not specified, the structure finder chooses the best format from
-the formats it knows, which are:
+the formats it knows, which are these Joda formats and their Java time equivalents:
 
 * `dd/MMM/YYYY:HH:mm:ss Z`
 * `EEE MMM dd HH:mm zzz YYYY`
@@ -178,7 +178,7 @@ the formats it knows, which are:
 * `MMM dd HH:mm:ss`
 * `MMM dd HH:mm:ss,SSS`
 * `MMM dd YYYY HH:mm:ss`
-* `MMM dd, YYYY K:mm:ss a`
+* `MMM dd, YYYY h:mm:ss a`
 * `TAI64N`
 * `UNIX`
 * `UNIX_MS`
@@ -487,3 +487,791 @@ If the request does not encounter errors, you receive the following result:
     may provide clues that the data needs to be cleaned or transformed prior
     to use by other {ml} functionality.
 
+The next example shows how it's possible to find the structure of some New York
+City yellow cab trip data. The first `curl` command downloads the data, the
+first 20000 lines of which are then piped into the `find_file_structure`
+endpoint. The `lines_to_sample` query parameter of the endpoint is set to 20000
+to match what is specified in the `head` command.
+
+[source,js]
+----
+curl -s "s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-06.csv" | head -20000 | curl -s -H "Content-Type: application/json" -XPOST "localhost:9200/_xpack/ml/find_file_structure?pretty&lines_to_sample=20000" -T -
+----
+// NOTCONSOLE
+// Not converting to console because this shows how curl can be used
+
+--
+NOTE: The `Content-Type: application/json` header must be set even though in
+this case the data is not JSON.  (Alternatively the `Content-Type` can be set
+to any other supported by Elasticsearch, but it must be set.)
+--
+
+If the request does not encounter errors, you receive the following result:
+[source,js]
+----
+{
+  "num_lines_analyzed" : 20000,
+  "num_messages_analyzed" : 19998, <1>
+  "sample_start" : "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount\n\n1,2018-06-01 00:15:40,2018-06-01 00:16:46,1,.00,1,N,145,145,2,3,0.5,0.5,0,0,0.3,4.3\n",
+  "charset" : "UTF-8",
+  "has_byte_order_marker" : false,
+  "format" : "delimited", <2>
+  "multiline_start_pattern" : "^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
+  "exclude_lines_pattern" : "^\"?VendorID\"?,\"?tpep_pickup_datetime\"?,\"?tpep_dropoff_datetime\"?,\"?passenger_count\"?,\"?trip_distance\"?,\"?RatecodeID\"?,\"?store_and_fwd_flag\"?,\"?PULocationID\"?,\"?DOLocationID\"?,\"?payment_type\"?,\"?fare_amount\"?,\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
+  "column_names" : [ <3>
+    "VendorID",
+    "tpep_pickup_datetime",
+    "tpep_dropoff_datetime",
+    "passenger_count",
+    "trip_distance",
+    "RatecodeID",
+    "store_and_fwd_flag",
+    "PULocationID",
+    "DOLocationID",
+    "payment_type",
+    "fare_amount",
+    "extra",
+    "mta_tax",
+    "tip_amount",
+    "tolls_amount",
+    "improvement_surcharge",
+    "total_amount"
+  ],
+  "has_header_row" : true, <4>
+  "delimiter" : ",", <5>
+  "quote" : "\"", <6>
+  "timestamp_field" : "tpep_pickup_datetime", <7>
+  "joda_timestamp_formats" : [ <8>
+    "YYYY-MM-dd HH:mm:ss"
+  ],
+  "java_timestamp_formats" : [ <9>
+    "yyyy-MM-dd HH:mm:ss"
+  ],
+  "need_client_timezone" : true, <10>
+  "mappings" : {
+    "@timestamp" : {
+      "type" : "date"
+    },
+    "DOLocationID" : {
+      "type" : "long"
+    },
+    "PULocationID" : {
+      "type" : "long"
+    },
+    "RatecodeID" : {
+      "type" : "long"
+    },
+    "VendorID" : {
+      "type" : "long"
+    },
+    "extra" : {
+      "type" : "double"
+    },
+    "fare_amount" : {
+      "type" : "double"
+    },
+    "improvement_surcharge" : {
+      "type" : "double"
+    },
+    "mta_tax" : {
+      "type" : "double"
+    },
+    "passenger_count" : {
+      "type" : "long"
+    },
+    "payment_type" : {
+      "type" : "long"
+    },
+    "store_and_fwd_flag" : {
+      "type" : "keyword"
+    },
+    "tip_amount" : {
+      "type" : "double"
+    },
+    "tolls_amount" : {
+      "type" : "double"
+    },
+    "total_amount" : {
+      "type" : "double"
+    },
+    "tpep_dropoff_datetime" : {
+      "type" : "date",
+      "format" : "YYYY-MM-dd HH:mm:ss"
+    },
+    "tpep_pickup_datetime" : {
+      "type" : "date",
+      "format" : "YYYY-MM-dd HH:mm:ss"
+    },
+    "trip_distance" : {
+      "type" : "double"
+    }
+  },
+  "field_stats" : {
+    "DOLocationID" : {
+      "count" : 19998,
+      "cardinality" : 240,
+      "min_value" : 1,
+      "max_value" : 265,
+      "mean_value" : 150.26532653265312,
+      "median_value" : 148,
+      "top_hits" : [
+        {
+          "value" : 79,
+          "count" : 760
+        },
+        {
+          "value" : 48,
+          "count" : 683
+        },
+        {
+          "value" : 68,
+          "count" : 529
+        },
+        {
+          "value" : 170,
+          "count" : 506
+        },
+        {
+          "value" : 107,
+          "count" : 468
+        },
+        {
+          "value" : 249,
+          "count" : 457
+        },
+        {
+          "value" : 230,
+          "count" : 441
+        },
+        {
+          "value" : 186,
+          "count" : 432
+        },
+        {
+          "value" : 141,
+          "count" : 409
+        },
+        {
+          "value" : 263,
+          "count" : 386
+        }
+      ]
+    },
+    "PULocationID" : {
+      "count" : 19998,
+      "cardinality" : 154,
+      "min_value" : 1,
+      "max_value" : 265,
+      "mean_value" : 153.4042404240424,
+      "median_value" : 148,
+      "top_hits" : [
+        {
+          "value" : 79,
+          "count" : 1067
+        },
+        {
+          "value" : 230,
+          "count" : 949
+        },
+        {
+          "value" : 148,
+          "count" : 940
+        },
+        {
+          "value" : 132,
+          "count" : 897
+        },
+        {
+          "value" : 48,
+          "count" : 853
+        },
+        {
+          "value" : 161,
+          "count" : 820
+        },
+        {
+          "value" : 234,
+          "count" : 750
+        },
+        {
+          "value" : 249,
+          "count" : 722
+        },
+        {
+          "value" : 164,
+          "count" : 663
+        },
+        {
+          "value" : 114,
+          "count" : 646
+        }
+      ]
+    },
+    "RatecodeID" : {
+      "count" : 19998,
+      "cardinality" : 5,
+      "min_value" : 1,
+      "max_value" : 5,
+      "mean_value" : 1.0656565656565653,
+      "median_value" : 1,
+      "top_hits" : [
+        {
+          "value" : 1,
+          "count" : 19311
+        },
+        {
+          "value" : 2,
+          "count" : 468
+        },
+        {
+          "value" : 5,
+          "count" : 195
+        },
+        {
+          "value" : 4,
+          "count" : 17
+        },
+        {
+          "value" : 3,
+          "count" : 7
+        }
+      ]
+    },
+    "VendorID" : {
+      "count" : 19998,
+      "cardinality" : 2,
+      "min_value" : 1,
+      "max_value" : 2,
+      "mean_value" : 1.59005900590059,
+      "median_value" : 2,
+      "top_hits" : [
+        {
+          "value" : 2,
+          "count" : 11800
+        },
+        {
+          "value" : 1,
+          "count" : 8198
+        }
+      ]
+    },
+    "extra" : {
+      "count" : 19998,
+      "cardinality" : 3,
+      "min_value" : -0.5,
+      "max_value" : 0.5,
+      "mean_value" : 0.4815981598159816,
+      "median_value" : 0.5,
+      "top_hits" : [
+        {
+          "value" : 0.5,
+          "count" : 19281
+        },
+        {
+          "value" : 0,
+          "count" : 698
+        },
+        {
+          "value" : -0.5,
+          "count" : 19
+        }
+      ]
+    },
+    "fare_amount" : {
+      "count" : 19998,
+      "cardinality" : 208,
+      "min_value" : -100,
+      "max_value" : 300,
+      "mean_value" : 13.937719771977209,
+      "median_value" : 9.5,
+      "top_hits" : [
+        {
+          "value" : 6,
+          "count" : 1004
+        },
+        {
+          "value" : 6.5,
+          "count" : 935
+        },
+        {
+          "value" : 5.5,
+          "count" : 909
+        },
+        {
+          "value" : 7,
+          "count" : 903
+        },
+        {
+          "value" : 5,
+          "count" : 889
+        },
+        {
+          "value" : 7.5,
+          "count" : 854
+        },
+        {
+          "value" : 4.5,
+          "count" : 802
+        },
+        {
+          "value" : 8.5,
+          "count" : 790
+        },
+        {
+          "value" : 8,
+          "count" : 789
+        },
+        {
+          "value" : 9,
+          "count" : 711
+        }
+      ]
+    },
+    "improvement_surcharge" : {
+      "count" : 19998,
+      "cardinality" : 3,
+      "min_value" : -0.3,
+      "max_value" : 0.3,
+      "mean_value" : 0.29915991599159913,
+      "median_value" : 0.3,
+      "top_hits" : [
+        {
+          "value" : 0.3,
+          "count" : 19964
+        },
+        {
+          "value" : -0.3,
+          "count" : 22
+        },
+        {
+          "value" : 0,
+          "count" : 12
+        }
+      ]
+    },
+    "mta_tax" : {
+      "count" : 19998,
+      "cardinality" : 3,
+      "min_value" : -0.5,
+      "max_value" : 0.5,
+      "mean_value" : 0.4962246224622462,
+      "median_value" : 0.5,
+      "top_hits" : [
+        {
+          "value" : 0.5,
+          "count" : 19868
+        },
+        {
+          "value" : 0,
+          "count" : 109
+        },
+        {
+          "value" : -0.5,
+          "count" : 21
+        }
+      ]
+    },
+    "passenger_count" : {
+      "count" : 19998,
+      "cardinality" : 7,
+      "min_value" : 0,
+      "max_value" : 6,
+      "mean_value" : 1.6201620162016201,
+      "median_value" : 1,
+      "top_hits" : [
+        {
+          "value" : 1,
+          "count" : 14219
+        },
+        {
+          "value" : 2,
+          "count" : 2886
+        },
+        {
+          "value" : 5,
+          "count" : 1047
+        },
+        {
+          "value" : 3,
+          "count" : 804
+        },
+        {
+          "value" : 6,
+          "count" : 523
+        },
+        {
+          "value" : 4,
+          "count" : 406
+        },
+        {
+          "value" : 0,
+          "count" : 113
+        }
+      ]
+    },
+    "payment_type" : {
+      "count" : 19998,
+      "cardinality" : 4,
+      "min_value" : 1,
+      "max_value" : 4,
+      "mean_value" : 1.315631563156316,
+      "median_value" : 1,
+      "top_hits" : [
+        {
+          "value" : 1,
+          "count" : 13936
+        },
+        {
+          "value" : 2,
+          "count" : 5857
+        },
+        {
+          "value" : 3,
+          "count" : 160
+        },
+        {
+          "value" : 4,
+          "count" : 45
+        }
+      ]
+    },
+    "store_and_fwd_flag" : {
+      "count" : 19998,
+      "cardinality" : 2,
+      "top_hits" : [
+        {
+          "value" : "N",
+          "count" : 19910
+        },
+        {
+          "value" : "Y",
+          "count" : 88
+        }
+      ]
+    },
+    "tip_amount" : {
+      "count" : 19998,
+      "cardinality" : 717,
+      "min_value" : 0,
+      "max_value" : 128,
+      "mean_value" : 2.010959095909593,
+      "median_value" : 1.45,
+      "top_hits" : [
+        {
+          "value" : 0,
+          "count" : 6917
+        },
+        {
+          "value" : 1,
+          "count" : 1178
+        },
+        {
+          "value" : 2,
+          "count" : 624
+        },
+        {
+          "value" : 3,
+          "count" : 248
+        },
+        {
+          "value" : 1.56,
+          "count" : 206
+        },
+        {
+          "value" : 1.46,
+          "count" : 205
+        },
+        {
+          "value" : 1.76,
+          "count" : 196
+        },
+        {
+          "value" : 1.45,
+          "count" : 195
+        },
+        {
+          "value" : 1.36,
+          "count" : 191
+        },
+        {
+          "value" : 1.5,
+          "count" : 187
+        }
+      ]
+    },
+    "tolls_amount" : {
+      "count" : 19998,
+      "cardinality" : 26,
+      "min_value" : 0,
+      "max_value" : 35,
+      "mean_value" : 0.2729697969796978,
+      "median_value" : 0,
+      "top_hits" : [
+        {
+          "value" : 0,
+          "count" : 19107
+        },
+        {
+          "value" : 5.76,
+          "count" : 791
+        },
+        {
+          "value" : 10.5,
+          "count" : 36
+        },
+        {
+          "value" : 2.64,
+          "count" : 21
+        },
+        {
+          "value" : 11.52,
+          "count" : 8
+        },
+        {
+          "value" : 5.54,
+          "count" : 4
+        },
+        {
+          "value" : 8.5,
+          "count" : 4
+        },
+        {
+          "value" : 17.28,
+          "count" : 4
+        },
+        {
+          "value" : 2,
+          "count" : 2
+        },
+        {
+          "value" : 2.16,
+          "count" : 2
+        }
+      ]
+    },
+    "total_amount" : {
+      "count" : 19998,
+      "cardinality" : 1267,
+      "min_value" : -100.3,
+      "max_value" : 389.12,
+      "mean_value" : 17.499898989898995,
+      "median_value" : 12.35,
+      "top_hits" : [
+        {
+          "value" : 7.3,
+          "count" : 478
+        },
+        {
+          "value" : 8.3,
+          "count" : 443
+        },
+        {
+          "value" : 8.8,
+          "count" : 420
+        },
+        {
+          "value" : 6.8,
+          "count" : 406
+        },
+        {
+          "value" : 7.8,
+          "count" : 405
+        },
+        {
+          "value" : 6.3,
+          "count" : 371
+        },
+        {
+          "value" : 9.8,
+          "count" : 368
+        },
+        {
+          "value" : 5.8,
+          "count" : 362
+        },
+        {
+          "value" : 9.3,
+          "count" : 332
+        },
+        {
+          "value" : 10.3,
+          "count" : 332
+        }
+      ]
+    },
+    "tpep_dropoff_datetime" : {
+      "count" : 19998,
+      "cardinality" : 9066,
+      "top_hits" : [
+        {
+          "value" : "2018-06-01 01:12:12",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:32:15",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:44:27",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:46:42",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 01:03:22",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 01:05:13",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:11:20",
+          "count" : 8
+        },
+        {
+          "value" : "2018-06-01 00:16:03",
+          "count" : 8
+        },
+        {
+          "value" : "2018-06-01 00:19:47",
+          "count" : 8
+        },
+        {
+          "value" : "2018-06-01 00:25:17",
+          "count" : 8
+        }
+      ]
+    },
+    "tpep_pickup_datetime" : {
+      "count" : 19998,
+      "cardinality" : 8760,
+      "top_hits" : [
+        {
+          "value" : "2018-06-01 00:01:23",
+          "count" : 12
+        },
+        {
+          "value" : "2018-06-01 00:04:31",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:05:38",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:09:50",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:12:01",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:14:17",
+          "count" : 10
+        },
+        {
+          "value" : "2018-06-01 00:00:34",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:00:40",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:02:53",
+          "count" : 9
+        },
+        {
+          "value" : "2018-06-01 00:05:40",
+          "count" : 9
+        }
+      ]
+    },
+    "trip_distance" : {
+      "count" : 19998,
+      "cardinality" : 1687,
+      "min_value" : 0,
+      "max_value" : 64.63,
+      "mean_value" : 3.6521062106210715,
+      "median_value" : 2.16,
+      "top_hits" : [
+        {
+          "value" : 0.9,
+          "count" : 335
+        },
+        {
+          "value" : 0.8,
+          "count" : 320
+        },
+        {
+          "value" : 1.1,
+          "count" : 316
+        },
+        {
+          "value" : 0.7,
+          "count" : 304
+        },
+        {
+          "value" : 1.2,
+          "count" : 303
+        },
+        {
+          "value" : 1,
+          "count" : 296
+        },
+        {
+          "value" : 1.3,
+          "count" : 280
+        },
+        {
+          "value" : 1.5,
+          "count" : 268
+        },
+        {
+          "value" : 1.6,
+          "count" : 268
+        },
+        {
+          "value" : 0.6,
+          "count" : 256
+        }
+      ]
+    }
+  }
+}
+----
+// NOTCONSOLE
+
+<1> `num_messages_analyzed` is 2 lower than `num_lines_analyzed` because only
+    data records count as messages. The first line contains the column names
+    and in this sample the second line is blank.
+<2> Unlike the first example, in this case the `format` has been identified as
+    `delimited`.
+<3> Because the `format` is `delimited`, the `column_names` field in the output
+    lists the column names in the order they appear in the sample.
+<4> `has_header_row` indicates that for this sample the column names were in
+    the first row of the sample. (If they hadn't been then it would have been
+    a good idea to specify them in the `column_names` query parameter.)
+<5> The `delimiter` for this sample is a comma, as it's a CSV file.
+<6> The `quote` character is the default double quote. (The structure finder
+    does not attempt to deduce any other quote character, so if you have a
+    delimited file that's quoted with some other character you must specify it
+    using the `quote` query parameter.)
+<7> The `timestamp_field` has been chosen to be `tpep_pickup_datetime`.
+    `tpep_dropoff_datetime` would work just as well, but `tpep_pickup_datetime`
+    was chosen because it comes first in the column order. If you prefer
+    `tpep_dropoff_datetime` then force it to be chosen using the
+    `timestamp_field` query parameter.
+<8> `joda_timestamp_formats` are used to tell Logstash and Ingest pipeline how
+    to parse timestamps.
+<9> `java_timestamp_formats` are the Java time formats recognized in the time
+    fields.  In future Ingest pipeline will switch to use this format.
+<10> The timestamp format in this sample doesn't specify a timezone, so to
+     accurately convert them to UTC timestamps to store in Elasticsearch it's
+     necessary to supply the timezone they relate to. `need_client_timezone`
+     will be `false` for timestamp formats that include the timezone.
+

+ 61 - 23
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructure.java

@@ -99,7 +99,8 @@ public class FileStructure implements ToXContentObject, Writeable {
     public static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
     public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
     public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
-    public static final ParseField TIMESTAMP_FORMATS = new ParseField("timestamp_formats");
+    public static final ParseField JODA_TIMESTAMP_FORMATS = new ParseField("joda_timestamp_formats");
+    public static final ParseField JAVA_TIMESTAMP_FORMATS = new ParseField("java_timestamp_formats");
     public static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone");
     public static final ParseField MAPPINGS = new ParseField("mappings");
     public static final ParseField FIELD_STATS = new ParseField("field_stats");
@@ -123,7 +124,8 @@ public class FileStructure implements ToXContentObject, Writeable {
         PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
         PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
         PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
-        PARSER.declareStringArray(Builder::setTimestampFormats, TIMESTAMP_FORMATS);
+        PARSER.declareStringArray(Builder::setJodaTimestampFormats, JODA_TIMESTAMP_FORMATS);
+        PARSER.declareStringArray(Builder::setJavaTimestampFormats, JAVA_TIMESTAMP_FORMATS);
         PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE);
         PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS);
         PARSER.declareObject(Builder::setFieldStats, (p, c) -> {
@@ -150,7 +152,8 @@ public class FileStructure implements ToXContentObject, Writeable {
     private final Character quote;
     private final Boolean shouldTrimFields;
     private final String grokPattern;
-    private final List<String> timestampFormats;
+    private final List<String> jodaTimestampFormats;
+    private final List<String> javaTimestampFormats;
     private final String timestampField;
     private final boolean needClientTimezone;
     private final SortedMap<String, Object> mappings;
@@ -160,8 +163,9 @@ public class FileStructure implements ToXContentObject, Writeable {
     public FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
                          Format format, String multilineStartPattern, String excludeLinesPattern, List<String> columnNames,
                          Boolean hasHeaderRow, Character delimiter, Character quote, Boolean shouldTrimFields, String grokPattern,
-                         String timestampField, List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
-                         Map<String, FieldStats> fieldStats, List<String> explanation) {
+                         String timestampField, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
+                         boolean needClientTimezone, Map<String, Object> mappings, Map<String, FieldStats> fieldStats,
+                         List<String> explanation) {
 
         this.numLinesAnalyzed = numLinesAnalyzed;
         this.numMessagesAnalyzed = numMessagesAnalyzed;
@@ -178,7 +182,10 @@ public class FileStructure implements ToXContentObject, Writeable {
         this.shouldTrimFields = shouldTrimFields;
         this.grokPattern = grokPattern;
         this.timestampField = timestampField;
-        this.timestampFormats = (timestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(timestampFormats));
+        this.jodaTimestampFormats =
+            (jodaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(jodaTimestampFormats));
+        this.javaTimestampFormats =
+            (javaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(javaTimestampFormats));
         this.needClientTimezone = needClientTimezone;
         this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings));
         this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats));
@@ -200,7 +207,8 @@ public class FileStructure implements ToXContentObject, Writeable {
         quote = in.readBoolean() ? (char) in.readVInt() : null;
         shouldTrimFields = in.readOptionalBoolean();
         grokPattern = in.readOptionalString();
-        timestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null;
+        jodaTimestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null;
+        javaTimestampFormats = in.readBoolean() ? Collections.unmodifiableList(in.readList(StreamInput::readString)) : null;
         timestampField = in.readOptionalString();
         needClientTimezone = in.readBoolean();
         mappings = Collections.unmodifiableSortedMap(new TreeMap<>(in.readMap()));
@@ -239,11 +247,17 @@ public class FileStructure implements ToXContentObject, Writeable {
         }
         out.writeOptionalBoolean(shouldTrimFields);
         out.writeOptionalString(grokPattern);
-        if (timestampFormats == null) {
+        if (jodaTimestampFormats == null) {
             out.writeBoolean(false);
         } else {
             out.writeBoolean(true);
-            out.writeCollection(timestampFormats, StreamOutput::writeString);
+            out.writeCollection(jodaTimestampFormats, StreamOutput::writeString);
+        }
+        if (javaTimestampFormats == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeCollection(javaTimestampFormats, StreamOutput::writeString);
         }
         out.writeOptionalString(timestampField);
         out.writeBoolean(needClientTimezone);
@@ -312,8 +326,12 @@ public class FileStructure implements ToXContentObject, Writeable {
         return timestampField;
     }
 
-    public List<String> getTimestampFormats() {
-        return timestampFormats;
+    public List<String> getJodaTimestampFormats() {
+        return jodaTimestampFormats;
+    }
+
+    public List<String> getJavaTimestampFormats() {
+        return javaTimestampFormats;
     }
 
     public boolean needClientTimezone() {
@@ -371,8 +389,11 @@ public class FileStructure implements ToXContentObject, Writeable {
         if (timestampField != null && timestampField.isEmpty() == false) {
             builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
         }
-        if (timestampFormats != null && timestampFormats.isEmpty() == false) {
-            builder.field(TIMESTAMP_FORMATS.getPreferredName(), timestampFormats);
+        if (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false) {
+            builder.field(JODA_TIMESTAMP_FORMATS.getPreferredName(), jodaTimestampFormats);
+        }
+        if (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false) {
+            builder.field(JAVA_TIMESTAMP_FORMATS.getPreferredName(), javaTimestampFormats);
         }
         builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone);
         builder.field(MAPPINGS.getPreferredName(), mappings);
@@ -396,7 +417,7 @@ public class FileStructure implements ToXContentObject, Writeable {
 
         return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
             multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
-            timestampField, timestampFormats, needClientTimezone, mappings, fieldStats, explanation);
+            timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation);
     }
 
     @Override
@@ -413,7 +434,6 @@ public class FileStructure implements ToXContentObject, Writeable {
         FileStructure that = (FileStructure) other;
         return this.numLinesAnalyzed == that.numLinesAnalyzed &&
             this.numMessagesAnalyzed == that.numMessagesAnalyzed &&
-            this.needClientTimezone == that.needClientTimezone &&
             Objects.equals(this.sampleStart, that.sampleStart) &&
             Objects.equals(this.charset, that.charset) &&
             Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) &&
@@ -427,7 +447,9 @@ public class FileStructure implements ToXContentObject, Writeable {
             Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
             Objects.equals(this.grokPattern, that.grokPattern) &&
             Objects.equals(this.timestampField, that.timestampField) &&
-            Objects.equals(this.timestampFormats, that.timestampFormats) &&
+            Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) &&
+            Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) &&
+            this.needClientTimezone == that.needClientTimezone &&
             Objects.equals(this.mappings, that.mappings) &&
             Objects.equals(this.fieldStats, that.fieldStats) &&
             Objects.equals(this.explanation, that.explanation);
@@ -450,7 +472,8 @@ public class FileStructure implements ToXContentObject, Writeable {
         private Boolean shouldTrimFields;
         private String grokPattern;
         private String timestampField;
-        private List<String> timestampFormats;
+        private List<String> jodaTimestampFormats;
+        private List<String> javaTimestampFormats;
         private boolean needClientTimezone;
         private Map<String, Object> mappings;
         private Map<String, FieldStats> fieldStats = Collections.emptyMap();
@@ -539,8 +562,13 @@ public class FileStructure implements ToXContentObject, Writeable {
             return this;
         }
 
-        public Builder setTimestampFormats(List<String> timestampFormats) {
-            this.timestampFormats = timestampFormats;
+        public Builder setJodaTimestampFormats(List<String> jodaTimestampFormats) {
+            this.jodaTimestampFormats = jodaTimestampFormats;
+            return this;
+        }
+
+        public Builder setJavaTimestampFormats(List<String> javaTimestampFormats) {
+            this.javaTimestampFormats = javaTimestampFormats;
             return this;
         }
 
@@ -652,11 +680,21 @@ public class FileStructure implements ToXContentObject, Writeable {
                     throw new IllegalStateException("enum value [" + format + "] missing from switch.");
             }
 
-            if ((timestampField == null) != (timestampFormats == null || timestampFormats.isEmpty())) {
-                throw new IllegalArgumentException("Timestamp field and timestamp formats must both be specified or neither be specified.");
+            boolean isTimestampFieldSpecified = (timestampField != null);
+            boolean isJodaTimestampFormatsSpecified = (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false);
+            boolean isJavaTimestampFormatsSpecified = (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false);
+
+            if (isTimestampFieldSpecified != isJodaTimestampFormatsSpecified) {
+                throw new IllegalArgumentException(
+                    "Timestamp field and Joda timestamp formats must both be specified or neither be specified.");
+            }
+
+            if (isTimestampFieldSpecified != isJavaTimestampFormatsSpecified) {
+                throw new IllegalArgumentException(
+                    "Timestamp field and Java timestamp formats must both be specified or neither be specified.");
             }
 
-            if (needClientTimezone && timestampField == null) {
+            if (needClientTimezone && isTimestampFieldSpecified == false) {
                 throw new IllegalArgumentException("Client timezone cannot be needed if there is no timestamp field.");
             }
 
@@ -670,7 +708,7 @@ public class FileStructure implements ToXContentObject, Writeable {
 
             return new FileStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
                 multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
-                timestampField, timestampFormats, needClientTimezone, mappings, fieldStats, explanation);
+                timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation);
         }
     }
 }

+ 2 - 1
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FileStructureTests.java

@@ -63,7 +63,8 @@ public class FileStructureTests extends AbstractSerializingTestCase<FileStructur
 
         if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT || randomBoolean()) {
             builder.setTimestampField(randomAlphaOfLength(10));
-            builder.setTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
+            builder.setJodaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
+            builder.setJavaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
             builder.setNeedClientTimezone(randomBoolean());
         }
 

+ 2 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java

@@ -141,7 +141,8 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
             }
 
             structureBuilder.setTimestampField(timeField.v1())
-                .setTimestampFormats(timeField.v2().dateFormats)
+                .setJodaTimestampFormats(timeField.v2().jodaTimestampFormats)
+                .setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
                 .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing())
                 .setMultilineStartPattern(timeLineRegex);
         }

+ 1 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java

@@ -240,7 +240,7 @@ public final class FileStructureUtils {
         Iterator<String> iter = fieldValues.iterator();
         TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next());
         while (timestampMatch != null && iter.hasNext()) {
-            // To be mapped as type date all the values must match the same date format - it is
+            // To be mapped as type date all the values must match the same timestamp format - it is
             // not acceptable for all values to be dates, but with different formats
             if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex)) == false) {
                 timestampMatch = null;

+ 2 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/JsonFileStructureFinder.java

@@ -55,7 +55,8 @@ public class JsonFileStructureFinder implements FileStructureFinder {
         Tuple<String, TimestampMatch> timeField = FileStructureUtils.guessTimestampField(explanation, sampleRecords, overrides);
         if (timeField != null) {
             structureBuilder.setTimestampField(timeField.v1())
-                .setTimestampFormats(timeField.v2().dateFormats)
+                .setJodaTimestampFormats(timeField.v2().jodaTimestampFormats)
+                .setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
                 .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing());
         }
 

+ 4 - 3
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java

@@ -112,7 +112,8 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
 
         FileStructure structure = structureBuilder
             .setTimestampField(interimTimestampField)
-            .setTimestampFormats(bestTimestamp.v1().dateFormats)
+            .setJodaTimestampFormats(bestTimestamp.v1().jodaTimestampFormats)
+            .setJavaTimestampFormats(bestTimestamp.v1().javaTimestampFormats)
             .setNeedClientTimezone(bestTimestamp.v1().hasTimezoneDependentParsing())
             .setGrokPattern(grokPattern)
             .setMappings(mappings)
@@ -147,8 +148,8 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
         for (String sampleLine : sampleLines) {
             TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine, overrides.getTimestampFormat());
             if (match != null) {
-                TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.dateFormats, match.simplePattern,
-                    match.grokPatternName, "");
+                TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.jodaTimestampFormats,
+                    match.javaTimestampFormats, match.simplePattern, match.grokPatternName, "");
                 timestampMatches.compute(pureMatch, (k, v) -> {
                     if (v == null) {
                         return new Tuple<>(weightForMatch(match.preface), new HashSet<>(Collections.singletonList(match.preface)));

+ 148 - 72
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java

@@ -12,7 +12,6 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.regex.Matcher;
@@ -30,8 +29,12 @@ public final class TimestampFormatFinder {
     private static final String PREFACE = "preface";
     private static final String EPILOGUE = "epilogue";
 
-    private static final Pattern FRACTIONAL_SECOND_INTERPRETER = Pattern.compile("([:.,])(\\d{3,9})");
+    private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,";
+    private static final Pattern FRACTIONAL_SECOND_INTERPRETER = Pattern.compile("([" + FRACTIONAL_SECOND_SEPARATORS + "])(\\d{3,9})");
     private static final char DEFAULT_FRACTIONAL_SECOND_SEPARATOR = ',';
+    private static final Pattern FRACTIONAL_SECOND_TIMESTAMP_FORMAT_PATTERN =
+        Pattern.compile("([" + FRACTIONAL_SECOND_SEPARATORS + "]S{3,9})");
+    private static final String DEFAULT_FRACTIONAL_SECOND_FORMAT = DEFAULT_FRACTIONAL_SECOND_SEPARATOR + "SSS";
 
     /**
      * The timestamp patterns are complex and it can be slow to prove they do not
@@ -48,7 +51,9 @@ public final class TimestampFormatFinder {
         // The end of some number (likely year or day) followed by a space then HH:mm
         Pattern.compile("\\d \\d{2}:\\d{2}\\b"),
         // HH:mm:ss surrounded by spaces
-        Pattern.compile(" \\d{2}:\\d{2}:\\d{2} ")
+        Pattern.compile(" \\d{2}:\\d{2}:\\d{2} "),
+        // Literal 'T' surrounded by numbers
+        Pattern.compile("\\dT\\d")
     );
 
     /**
@@ -59,81 +64,107 @@ public final class TimestampFormatFinder {
         // The TOMCAT_DATESTAMP format has to come before ISO8601 because it's basically ISO8601 but
         // with a space before the timezone, and because the timezone is optional in ISO8601 it will
         // be recognised as that with the timezone missed off if ISO8601 is checked first
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS Z", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS Z", "yyyy-MM-dd HH:mm:ss,SSS XX",
+            "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
             "\\b20\\d{2}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9} (?:Z|[+-]%{HOUR}%{MINUTE})\\b",
             "TOMCAT_DATESTAMP", Arrays.asList(0, 1)),
         // The Elasticsearch ISO8601 parser requires a literal T between the date and time, so
         // longhand formats are needed if there's a space instead
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZ", "yyyy-MM-dd HH:mm:ss,SSSXX",
+            "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}(?:Z|[+-]%{HOUR}%{MINUTE})\\b",
             "TIMESTAMP_ISO8601", Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZZ", "yyyy-MM-dd HH:mm:ss,SSSXXX",
+            "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}[+-]%{HOUR}:%{MINUTE}\\b",
             "TIMESTAMP_ISO8601", Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss,SSS",
+            "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}\\b", "TIMESTAMP_ISO8601",
             Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZ", "yyyy-MM-dd HH:mm:ssXX", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)(?:Z|[+-]%{HOUR}%{MINUTE})\\b", "TIMESTAMP_ISO8601",
             Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZZ", "yyyy-MM-dd HH:mm:ssXXX", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[+-]%{HOUR}:%{MINUTE}\\b", "TIMESTAMP_ISO8601",
             Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
+        new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
             "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)\\b", "TIMESTAMP_ISO8601",
             Arrays.asList(0, 1)),
-        new CandidateTimestampFormat("ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "\\b%{TIMESTAMP_ISO8601}\\b",
-            "TIMESTAMP_ISO8601"),
-        new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm:ss zzz",
+        // When using Java time the Elasticsearch ISO8601 parser for fractional time requires that the fractional
+        // separator match the current JVM locale, which is too restrictive for arbitrary log file parsing
+        new CandidateTimestampFormat("ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSSXX",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}",
+            "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY}T%{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}(?:Z|[+-]%{HOUR}%{MINUTE})\\b",
+            "TIMESTAMP_ISO8601", Collections.singletonList(3)),
+        new CandidateTimestampFormat("ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSSXXX",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}",
+            "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY}T%{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}[+-]%{HOUR}:%{MINUTE}\\b",
+            "TIMESTAMP_ISO8601", Collections.singletonList(3)),
+        new CandidateTimestampFormat("ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSS",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}",
+            "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY}T%{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}\\b", "TIMESTAMP_ISO8601",
+            Collections.singletonList(3)),
+        new CandidateTimestampFormat("ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "\\b%{TIMESTAMP_ISO8601}\\b",
+            "TIMESTAMP_ISO8601", Collections.singletonList(3)),
+        new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm:ss zzz", "EEE MMM dd yyyy HH:mm:ss zzz",
             "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2} ",
             "\\b%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{TZ}\\b", "DATESTAMP_RFC822", Arrays.asList(1, 2)),
-        new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm zzz", "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2} ",
+        new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm zzz", "EEE MMM dd yyyy HH:mm zzz",
+            "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2} ",
             "\\b%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE} %{TZ}\\b", "DATESTAMP_RFC822", Collections.singletonList(1)),
-        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss ZZ",
+        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss ZZ", "EEE, dd MMM yyyy HH:mm:ss XXX",
             "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ",
             "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:Z|[+-]%{HOUR}:%{MINUTE})\\b",
             "DATESTAMP_RFC2822", Arrays.asList(1, 2)),
-        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss Z",
+        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss Z", "EEE, dd MMM yyyy HH:mm:ss XX",
             "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ",
             "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:Z|[+-]%{HOUR}%{MINUTE})\\b",
             "DATESTAMP_RFC2822", Arrays.asList(1, 2)),
-        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm ZZ", "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ",
+        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm ZZ", "EEE, dd MMM yyyy HH:mm XXX",
+            "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ",
             "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE} (?:Z|[+-]%{HOUR}:%{MINUTE})\\b", "DATESTAMP_RFC2822",
             Collections.singletonList(1)),
-        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm Z", "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ",
+        new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm Z", "EEE, dd MMM yyyy HH:mm XX",
+            "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ",
             "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE} (?:Z|[+-]%{HOUR}%{MINUTE})\\b", "DATESTAMP_RFC2822",
             Collections.singletonList(1)),
-        new CandidateTimestampFormat("EEE MMM dd HH:mm:ss zzz YYYY",
+        new CandidateTimestampFormat("EEE MMM dd HH:mm:ss zzz YYYY", "EEE MMM dd HH:mm:ss zzz yyyy",
             "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b",
             "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{TZ} %{YEAR}\\b", "DATESTAMP_OTHER",
             Arrays.asList(1, 2)),
-        new CandidateTimestampFormat("EEE MMM dd HH:mm zzz YYYY",
+        new CandidateTimestampFormat("EEE MMM dd HH:mm zzz YYYY", "EEE MMM dd HH:mm zzz yyyy",
             "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b",
             "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE} %{TZ} %{YEAR}\\b", "DATESTAMP_OTHER", Collections.singletonList(1)),
-        new CandidateTimestampFormat("YYYYMMddHHmmss", "\\b\\d{14}\\b",
+        new CandidateTimestampFormat("YYYYMMddHHmmss", "yyyyMMddHHmmss", "\\b\\d{14}\\b",
             "\\b20\\d{2}%{MONTHNUM2}(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01]))(?:2[0123]|[01][0-9])%{MINUTE}(?:[0-5][0-9]|60)\\b",
             "DATESTAMP_EVENTLOG"),
-        new CandidateTimestampFormat("EEE MMM dd HH:mm:ss YYYY",
+        new CandidateTimestampFormat("EEE MMM dd HH:mm:ss YYYY", "EEE MMM dd HH:mm:ss yyyy",
             "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\b",
             "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{YEAR}\\b", "HTTPDERROR_DATE", Arrays.asList(1, 2)),
         new CandidateTimestampFormat(Arrays.asList("MMM dd HH:mm:ss,SSS", "MMM  d HH:mm:ss,SSS"),
+            Arrays.asList("MMM dd HH:mm:ss,SSS", "MMM  d HH:mm:ss,SSS"),
             "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2},\\d{3}",
             "%{MONTH} +%{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}\\b", "SYSLOGTIMESTAMP",
             Collections.singletonList(1)),
         new CandidateTimestampFormat(Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+            Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
             "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "%{MONTH} +%{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)\\b",
             "SYSLOGTIMESTAMP", Collections.singletonList(1)),
-        new CandidateTimestampFormat("dd/MMM/YYYY:HH:mm:ss Z", "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ",
+        new CandidateTimestampFormat("dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
+            "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ",
             "\\b%{MONTHDAY}/%{MONTH}/%{YEAR}:%{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) [+-]?%{HOUR}%{MINUTE}\\b", "HTTPDATE"),
-        new CandidateTimestampFormat("MMM dd, YYYY K:mm:ss a", "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b",
+        new CandidateTimestampFormat("MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
+            "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b",
             "%{MONTH} %{MONTHDAY}, 20\\d{2} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:AM|PM)\\b", "CATALINA_DATESTAMP"),
         new CandidateTimestampFormat(Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"),
+            Arrays.asList("MMM dd yyyy HH:mm:ss", "MMM  d yyyy HH:mm:ss"),
             "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2}\\b",
             "%{MONTH} +%{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)\\b", "CISCOTIMESTAMP", Collections.singletonList(1)),
-        new CandidateTimestampFormat("UNIX_MS", "\\b\\d{13}\\b", "\\b\\d{13}\\b", "POSINT"),
-        new CandidateTimestampFormat("UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "\\b\\d{10}\\.(?:\\d{3}){1,3}\\b", "NUMBER"),
-        new CandidateTimestampFormat("UNIX", "\\b\\d{10}\\b", "\\b\\d{10}\\b", "POSINT"),
-        new CandidateTimestampFormat("TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM")
+        new CandidateTimestampFormat("UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "\\b\\d{13}\\b", "POSINT"),
+        new CandidateTimestampFormat("UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "\\b\\d{10}\\.(?:\\d{3}){1,3}\\b", "NUMBER"),
+        new CandidateTimestampFormat("UNIX", "UNIX", "\\b\\d{10}\\b", "\\b\\d{10}\\b", "POSINT"),
+        new CandidateTimestampFormat("TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM")
     );
 
     private TimestampFormatFinder() {
@@ -151,7 +182,7 @@ public final class TimestampFormatFinder {
     /**
      * Find the first timestamp format that matches part of the supplied value.
      * @param text The value that the returned timestamp format must exist within.
-     * @param requiredFormat A date format that any returned match must support.
+     * @param requiredFormat A timestamp format that any returned match must support.
      * @return The timestamp format, or <code>null</code> if none matches.
      */
     public static TimestampMatch findFirstMatch(String text, String requiredFormat) {
@@ -174,14 +205,19 @@ public final class TimestampFormatFinder {
      * excluding a specified number of candidate formats.
      * @param text             The value that the returned timestamp format must exist within.
      * @param ignoreCandidates The number of candidate formats to exclude from the search.
-     * @param requiredFormat A date format that any returned match must support.
+     * @param requiredFormat A timestamp format that any returned match must support.
      * @return The timestamp format, or <code>null</code> if none matches.
      */
     public static TimestampMatch findFirstMatch(String text, int ignoreCandidates, String requiredFormat) {
+        if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
+            return null;
+        }
         Boolean[] quickRuleoutMatches = new Boolean[QUICK_RULE_OUT_PATTERNS.size()];
         int index = ignoreCandidates;
+        String adjustedRequiredFormat = adjustRequiredFormat(requiredFormat);
         for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) {
-            if (requiredFormat == null || candidate.dateFormats.contains(requiredFormat)) {
+            if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) ||
+                candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) {
                 boolean quicklyRuledOut = false;
                 for (Integer quickRuleOutIndex : candidate.quickRuleOutIndices) {
                     if (quickRuleoutMatches[quickRuleOutIndex] == null) {
@@ -219,7 +255,7 @@ public final class TimestampFormatFinder {
     /**
      * Find the best timestamp format for matching an entire field value.
      * @param text The value that the returned timestamp format must match in its entirety.
-     * @param requiredFormat A date format that any returned match must support.
+     * @param requiredFormat A timestamp format that any returned match must support.
      * @return The timestamp format, or <code>null</code> if none matches.
      */
     public static TimestampMatch findFirstFullMatch(String text, String requiredFormat) {
@@ -242,13 +278,18 @@ public final class TimestampFormatFinder {
      * excluding a specified number of candidate formats.
      * @param text The value that the returned timestamp format must match in its entirety.
      * @param ignoreCandidates The number of candidate formats to exclude from the search.
-     * @param requiredFormat A date format that any returned match must support.
+     * @param requiredFormat A timestamp format that any returned match must support.
      * @return The timestamp format, or <code>null</code> if none matches.
      */
     public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates, String requiredFormat) {
+        if (ignoreCandidates >= ORDERED_CANDIDATE_FORMATS.size()) {
+            return null;
+        }
         int index = ignoreCandidates;
+        String adjustedRequiredFormat = adjustRequiredFormat(requiredFormat);
         for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) {
-            if (requiredFormat == null || candidate.dateFormats.contains(requiredFormat)) {
+            if (adjustedRequiredFormat == null || candidate.jodaTimestampFormats.contains(adjustedRequiredFormat) ||
+                candidate.javaTimestampFormats.contains(adjustedRequiredFormat)) {
                 Map<String, Object> captures = candidate.strictFullMatchGrok.captures(text);
                 if (captures != null) {
                     return makeTimestampMatch(candidate, index, "", text, "");
@@ -259,16 +300,32 @@ public final class TimestampFormatFinder {
         return null;
     }
 
+    /**
+     * If a required timestamp format contains a fractional seconds component, adjust it to the
+     * fractional seconds format that's in the candidate timestamp formats, i.e. ",SSS".  So, for
+     * example, "YYYY-MM-dd HH:mm:ss.SSSSSSSSS Z" would get adjusted to "YYYY-MM-dd HH:mm:ss,SSS Z".
+     */
+    static String adjustRequiredFormat(String requiredFormat) {
+
+        return (requiredFormat == null) ? null :
+            FRACTIONAL_SECOND_TIMESTAMP_FORMAT_PATTERN.matcher(requiredFormat).replaceFirst(DEFAULT_FRACTIONAL_SECOND_FORMAT);
+    }
+
     private static TimestampMatch makeTimestampMatch(CandidateTimestampFormat chosenTimestampFormat, int chosenIndex,
                                                      String preface, String matchedDate, String epilogue) {
         Tuple<Character, Integer> fractionalSecondsInterpretation = interpretFractionalSeconds(matchedDate);
-        List<String> dateFormats = chosenTimestampFormat.dateFormats;
+        List<String> jodaTimestampFormats = chosenTimestampFormat.jodaTimestampFormats;
+        List<String> javaTimestampFormats = chosenTimestampFormat.javaTimestampFormats;
         Pattern simplePattern = chosenTimestampFormat.simplePattern;
         char separator = fractionalSecondsInterpretation.v1();
         if (separator != DEFAULT_FRACTIONAL_SECOND_SEPARATOR) {
-            dateFormats = dateFormats.stream().map(dateFormat -> dateFormat.replace(DEFAULT_FRACTIONAL_SECOND_SEPARATOR, separator))
+            jodaTimestampFormats = jodaTimestampFormats.stream()
+                .map(jodaTimestampFormat -> jodaTimestampFormat.replace(DEFAULT_FRACTIONAL_SECOND_SEPARATOR, separator))
+                .collect(Collectors.toList());
+            javaTimestampFormats = javaTimestampFormats.stream()
+                .map(javaTimestampFormat -> javaTimestampFormat.replace(DEFAULT_FRACTIONAL_SECOND_SEPARATOR, separator))
                 .collect(Collectors.toList());
-            if (dateFormats.stream().noneMatch(dateFormat -> dateFormat.startsWith("UNIX"))) {
+            if (jodaTimestampFormats.stream().noneMatch(jodaTimestampFormat -> jodaTimestampFormat.startsWith("UNIX"))) {
                 String patternStr = simplePattern.pattern();
                 int separatorPos = patternStr.lastIndexOf(DEFAULT_FRACTIONAL_SECOND_SEPARATOR);
                 if (separatorPos >= 0) {
@@ -281,11 +338,15 @@ public final class TimestampFormatFinder {
         int numberOfDigitsInFractionalComponent = fractionalSecondsInterpretation.v2();
         if (numberOfDigitsInFractionalComponent > 3) {
             String fractionalSecondsFormat = "SSSSSSSSS".substring(0, numberOfDigitsInFractionalComponent);
-            dateFormats = dateFormats.stream().map(dateFormat -> dateFormat.replace("SSS", fractionalSecondsFormat))
+            jodaTimestampFormats = jodaTimestampFormats.stream()
+                .map(jodaTimestampFormat -> jodaTimestampFormat.replace("SSS", fractionalSecondsFormat))
+                .collect(Collectors.toList());
+            javaTimestampFormats = javaTimestampFormats.stream()
+                .map(javaTimestampFormat -> javaTimestampFormat.replace("SSS", fractionalSecondsFormat))
                 .collect(Collectors.toList());
         }
-        return new TimestampMatch(chosenIndex, preface, dateFormats, simplePattern, chosenTimestampFormat.standardGrokPatternName,
-            epilogue);
+        return new TimestampMatch(chosenIndex, preface, jodaTimestampFormats, javaTimestampFormats, simplePattern,
+            chosenTimestampFormat.standardGrokPatternName, epilogue);
     }
 
     /**
@@ -323,7 +384,12 @@ public final class TimestampFormatFinder {
         /**
          * Time format specifier(s) that will work with Logstash and Ingest pipeline date parsers.
          */
-        public final List<String> dateFormats;
+        public final List<String> jodaTimestampFormats;
+
+        /**
+         * Time format specifier(s) that will work with Logstash and Ingest pipeline date parsers.
+         */
+        public final List<String> javaTimestampFormats;
 
         /**
          * A simple regex that will work in many languages to detect whether the timestamp format
@@ -341,25 +407,25 @@ public final class TimestampFormatFinder {
          */
         public final String epilogue;
 
-        TimestampMatch(int candidateIndex, String preface, String dateFormat, String simpleRegex, String grokPatternName, String epilogue) {
-            this(candidateIndex, preface, Collections.singletonList(dateFormat), simpleRegex, grokPatternName, epilogue);
-        }
-
-        TimestampMatch(int candidateIndex, String preface, String dateFormat, String simpleRegex, String grokPatternName, String epilogue,
-                       boolean hasFractionalComponentSmallerThanMillisecond) {
-            this(candidateIndex, preface, Collections.singletonList(dateFormat), simpleRegex, grokPatternName, epilogue);
+        TimestampMatch(int candidateIndex, String preface, String jodaTimestampFormat, String javaTimestampFormat, String simpleRegex,
+                       String grokPatternName, String epilogue) {
+            this(candidateIndex, preface, Collections.singletonList(jodaTimestampFormat), Collections.singletonList(javaTimestampFormat),
+                simpleRegex, grokPatternName, epilogue);
         }
 
-        TimestampMatch(int candidateIndex, String preface, List<String> dateFormats, String simpleRegex, String grokPatternName,
-                       String epilogue) {
-            this(candidateIndex, preface, dateFormats, Pattern.compile(simpleRegex), grokPatternName, epilogue);
+        TimestampMatch(int candidateIndex, String preface, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
+                       String simpleRegex, String grokPatternName, String epilogue) {
+            this(candidateIndex, preface, jodaTimestampFormats, javaTimestampFormats, Pattern.compile(simpleRegex), grokPatternName,
+                epilogue);
         }
 
-        TimestampMatch(int candidateIndex, String preface, List<String> dateFormats, Pattern simplePattern, String grokPatternName,
+        TimestampMatch(int candidateIndex, String preface, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
+                       Pattern simplePattern, String grokPatternName,
                        String epilogue) {
             this.candidateIndex = candidateIndex;
             this.preface = preface;
-            this.dateFormats = dateFormats;
+            this.jodaTimestampFormats = Collections.unmodifiableList(jodaTimestampFormats);
+            this.javaTimestampFormats = Collections.unmodifiableList(javaTimestampFormats);
             this.simplePattern = simplePattern;
             this.grokPatternName = grokPatternName;
             this.epilogue = epilogue;
@@ -370,8 +436,8 @@ public final class TimestampFormatFinder {
          * I.e., does the textual representation NOT define the timezone?
          */
         public boolean hasTimezoneDependentParsing() {
-            return dateFormats.stream()
-                .anyMatch(dateFormat -> dateFormat.contains("HH") && dateFormat.toLowerCase(Locale.ROOT).indexOf('z') == -1);
+            return javaTimestampFormats.stream().anyMatch(javaTimestampFormat ->
+                javaTimestampFormat.indexOf('X') == -1 && javaTimestampFormat.indexOf('z') == -1 && javaTimestampFormat.contains("mm"));
         }
 
         /**
@@ -380,13 +446,13 @@ public final class TimestampFormatFinder {
          * and possibly also a "format" setting.
          */
         public Map<String, String> getEsDateMappingTypeWithFormat() {
-            if (dateFormats.contains("TAI64N")) {
-                // There's no format for TAI64N in the date formats used in mappings
+            if (jodaTimestampFormats.contains("TAI64N")) {
+                // There's no format for TAI64N in the timestamp formats used in mappings
                 return Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword");
             }
             Map<String, String> mapping = new LinkedHashMap<>();
             mapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date");
-            String formats = dateFormats.stream().flatMap(format -> {
+            String formats = jodaTimestampFormats.stream().flatMap(format -> {
                 switch (format) {
                     case "ISO8601":
                         return Stream.empty();
@@ -406,7 +472,8 @@ public final class TimestampFormatFinder {
 
         @Override
         public int hashCode() {
-            return Objects.hash(candidateIndex, preface, dateFormats, simplePattern.pattern(), grokPatternName, epilogue);
+            return Objects.hash(candidateIndex, preface, jodaTimestampFormats, javaTimestampFormats, simplePattern.pattern(),
+                grokPatternName, epilogue);
         }
 
         @Override
@@ -421,7 +488,8 @@ public final class TimestampFormatFinder {
             TimestampMatch that = (TimestampMatch) other;
             return this.candidateIndex == that.candidateIndex &&
                 Objects.equals(this.preface, that.preface) &&
-                Objects.equals(this.dateFormats, that.dateFormats) &&
+                Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) &&
+                Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) &&
                 Objects.equals(this.simplePattern.pattern(), that.simplePattern.pattern()) &&
                 Objects.equals(this.grokPatternName, that.grokPatternName) &&
                 Objects.equals(this.epilogue, that.epilogue);
@@ -430,7 +498,8 @@ public final class TimestampFormatFinder {
         @Override
         public String toString() {
             return "index = " + candidateIndex + (preface.isEmpty() ? "" : ", preface = '" + preface + "'") +
-                ", date formats = " + dateFormats.stream().collect(Collectors.joining("', '", "[ '", "' ]")) +
+                ", Joda timestamp formats = " + jodaTimestampFormats.stream().collect(Collectors.joining("', '", "[ '", "' ]")) +
+                ", Java timestamp formats = " + javaTimestampFormats.stream().collect(Collectors.joining("', '", "[ '", "' ]")) +
                 ", simple pattern = '" + simplePattern.pattern() + "', grok pattern = '" + grokPatternName + "'" +
                 (epilogue.isEmpty() ? "" : ", epilogue = '" + epilogue + "'");
         }
@@ -438,29 +507,36 @@ public final class TimestampFormatFinder {
 
     static final class CandidateTimestampFormat {
 
-        final List<String> dateFormats;
+        final List<String> jodaTimestampFormats;
+        final List<String> javaTimestampFormats;
         final Pattern simplePattern;
         final Grok strictSearchGrok;
         final Grok strictFullMatchGrok;
         final String standardGrokPatternName;
         final List<Integer> quickRuleOutIndices;
 
-        CandidateTimestampFormat(String dateFormat, String simpleRegex, String strictGrokPattern, String standardGrokPatternName) {
-            this(Collections.singletonList(dateFormat), simpleRegex, strictGrokPattern, standardGrokPatternName);
+        CandidateTimestampFormat(String jodaTimestampFormat, String javaTimestampFormat, String simpleRegex, String strictGrokPattern,
+                                 String standardGrokPatternName) {
+            this(Collections.singletonList(jodaTimestampFormat), Collections.singletonList(javaTimestampFormat), simpleRegex,
+                strictGrokPattern, standardGrokPatternName);
         }
 
-        CandidateTimestampFormat(String dateFormat, String simpleRegex, String strictGrokPattern, String standardGrokPatternName,
-                                 List<Integer> quickRuleOutIndices) {
-            this(Collections.singletonList(dateFormat), simpleRegex, strictGrokPattern, standardGrokPatternName, quickRuleOutIndices);
+        CandidateTimestampFormat(String jodaTimestampFormat, String javaTimestampFormat, String simpleRegex, String strictGrokPattern,
+                                 String standardGrokPatternName, List<Integer> quickRuleOutIndices) {
+            this(Collections.singletonList(jodaTimestampFormat), Collections.singletonList(javaTimestampFormat), simpleRegex,
+                strictGrokPattern, standardGrokPatternName, quickRuleOutIndices);
         }
 
-        CandidateTimestampFormat(List<String> dateFormats, String simpleRegex, String strictGrokPattern, String standardGrokPatternName) {
-            this(dateFormats, simpleRegex, strictGrokPattern, standardGrokPatternName, Collections.emptyList());
+        CandidateTimestampFormat(List<String> jodaTimestampFormats, List<String> javaTimestampFormats, String simpleRegex,
+                                 String strictGrokPattern, String standardGrokPatternName) {
+            this(jodaTimestampFormats, javaTimestampFormats, simpleRegex, strictGrokPattern, standardGrokPatternName,
+                Collections.emptyList());
         }
 
-        CandidateTimestampFormat(List<String> dateFormats, String simpleRegex, String strictGrokPattern, String standardGrokPatternName,
-                                 List<Integer> quickRuleOutIndices) {
-            this.dateFormats = dateFormats;
+        CandidateTimestampFormat(List<String> jodaTimestampFormats, List<String> javaTimestampFormats, String simpleRegex,
+                                 String strictGrokPattern, String standardGrokPatternName, List<Integer> quickRuleOutIndices) {
+            this.jodaTimestampFormats = jodaTimestampFormats;
+            this.javaTimestampFormats = javaTimestampFormats;
             this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE);
             // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
             this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +

+ 2 - 1
x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java

@@ -93,7 +93,8 @@ public class XmlFileStructureFinder implements FileStructureFinder {
         Tuple<String, TimestampMatch> timeField = FileStructureUtils.guessTimestampField(explanation, sampleRecords, overrides);
         if (timeField != null) {
             structureBuilder.setTimestampField(timeField.v1())
-                .setTimestampFormats(timeField.v2().dateFormats)
+                .setJodaTimestampFormats(timeField.v2().jodaTimestampFormats)
+                .setJavaTimestampFormats(timeField.v2().javaTimestampFormats)
                 .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing());
         }
 

+ 9 - 9
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinderTests.java

@@ -50,7 +50,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
         assertEquals(Arrays.asList("time", "message"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("time", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCompleteCsvAndColumnNamesOverride() throws Exception {
@@ -84,7 +84,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
         assertEquals(Arrays.asList("my_time", "my_message"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("my_time", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCompleteCsvAndHasHeaderRowOverride() throws Exception {
@@ -120,7 +120,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
         assertEquals(Arrays.asList("column1", "column2"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertNull(structure.getTimestampField());
-        assertNull(structure.getTimestampFormats());
+        assertNull(structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithIncompleteLastRecord() throws Exception {
@@ -153,7 +153,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
         assertEquals(Arrays.asList("message", "time", "count"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("time", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithTrailingNulls() throws Exception {
@@ -193,7 +193,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
             "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount", "column18", "column19"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("tpep_pickup_datetime", structure.getTimestampField());
-        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithTrailingNullsAndOverriddenTimeField() throws Exception {
@@ -237,7 +237,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
             "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount", "column18", "column19"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("tpep_dropoff_datetime", structure.getTimestampField());
-        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeader() throws Exception {
@@ -277,7 +277,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
             "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("tpep_pickup_datetime", structure.getTimestampField());
-        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeaderAndColumnNamesOverride() throws Exception {
@@ -324,7 +324,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
             structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("my_tpep_pickup_datetime", structure.getTimestampField());
-        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenCsvWithTimeLastColumn() throws Exception {
@@ -357,7 +357,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
         assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getColumnNames());
         assertNull(structure.getGrokPattern());
         assertEquals("timestamp", structure.getTimestampField());
-        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss.SSSSSS"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss.SSSSSS"), structure.getJodaTimestampFormats());
     }
 
     public void testFindHeaderFromSampleGivenHeaderInSample() throws IOException {

+ 9 - 9
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java

@@ -39,7 +39,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("field1", match.v1());
-        assertThat(match.v2().dateFormats, contains("ISO8601"));
+        assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -52,7 +52,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), overrides);
         assertNotNull(match);
         assertEquals("field1", match.v1());
-        assertThat(match.v2().dateFormats, contains("ISO8601"));
+        assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -76,7 +76,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), overrides);
         assertNotNull(match);
         assertEquals("field1", match.v1());
-        assertThat(match.v2().dateFormats, contains("ISO8601"));
+        assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -99,7 +99,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("field1", match.v1());
-        assertThat(match.v2().dateFormats, contains("ISO8601"));
+        assertThat(match.v2().jodaTimestampFormats, contains("ISO8601"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -128,7 +128,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("time", match.v1());
-        assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
+        assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -145,7 +145,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("time", match.v1());
-        assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
+        assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -176,7 +176,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("time", match.v1());
-        assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
+        assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS"));
         assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName);
     }
 
@@ -193,7 +193,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("time", match.v1());
-        assertThat(match.v2().dateFormats, contains("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"));
+        assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"));
         assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName);
     }
 
@@ -226,7 +226,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
             FileStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2), EMPTY_OVERRIDES);
         assertNotNull(match);
         assertEquals("time2", match.v1());
-        assertThat(match.v2().dateFormats, contains("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"));
+        assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"));
         assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName);
     }
 

+ 1 - 1
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/JsonFileStructureFinderTests.java

@@ -38,6 +38,6 @@ public class JsonFileStructureFinderTests extends FileStructureTestCase {
         assertNull(structure.getShouldTrimFields());
         assertNull(structure.getGrokPattern());
         assertEquals("timestamp", structure.getTimestampField());
-        assertEquals(Collections.singletonList("UNIX_MS"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
     }
 }

+ 12 - 12
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java

@@ -119,14 +119,14 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
             assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
         }
         assertNull(structure.getExcludeLinesPattern());
-        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
+        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", structure.getMultilineStartPattern());
         assertNull(structure.getDelimiter());
         assertNull(structure.getQuote());
         assertNull(structure.getHasHeaderRow());
         assertNull(structure.getShouldTrimFields());
         assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
         assertEquals("timestamp", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() throws Exception {
@@ -149,14 +149,14 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
             assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
         }
         assertNull(structure.getExcludeLinesPattern());
-        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
+        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", structure.getMultilineStartPattern());
         assertNull(structure.getDelimiter());
         assertNull(structure.getQuote());
         assertNull(structure.getHasHeaderRow());
         assertNull(structure.getShouldTrimFields());
         assertEquals("\\[%{TIMESTAMP_ISO8601:my_time}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
         assertEquals("my_time", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception {
@@ -180,7 +180,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
             assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker());
         }
         assertNull(structure.getExcludeLinesPattern());
-        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
+        assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", structure.getMultilineStartPattern());
         assertNull(structure.getDelimiter());
         assertNull(structure.getQuote());
         assertNull(structure.getHasHeaderRow());
@@ -188,7 +188,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
         assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} *\\]" +
             "\\[%{JAVACLASS:class} *\\] \\[%{HOSTNAME:node}\\] %{JAVALOGMESSAGE:message}", structure.getGrokPattern());
         assertEquals("timestamp", structure.getTimestampField());
-        assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
     }
 
     public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {
@@ -312,8 +312,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
         Tuple<TimestampMatch, Set<String>> mostLikelyMatch =
             TextLogFileStructureFinder.mostLikelyTimestamp(sample.split("\n"), FileStructureOverrides.EMPTY_OVERRIDES);
         assertNotNull(mostLikelyMatch);
-        assertEquals(new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""),
-            mostLikelyMatch.v1());
+        assertEquals(new TimestampMatch(9, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSS",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), mostLikelyMatch.v1());
     }
 
     public void testMostLikelyTimestampGivenExceptionTrace() {
@@ -325,8 +325,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
         // Even though many lines have a timestamp near the end (in the Lucene version information),
         // these are so far along the lines that the weight of the timestamp near the beginning of the
         // first line should take precedence
-        assertEquals(new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""),
-            mostLikelyMatch.v1());
+        assertEquals(new TimestampMatch(9, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSS",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), mostLikelyMatch.v1());
     }
 
     public void testMostLikelyTimestampGivenExceptionTraceAndTimestampFormatOverride() {
@@ -338,8 +338,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
         assertNotNull(mostLikelyMatch);
 
         // The override should force the seemingly inferior choice of timestamp
-        assertEquals(new TimestampMatch(6, "", "YYYY-MM-dd HH:mm:ss", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601",
-                ""), mostLikelyMatch.v1());
+        assertEquals(new TimestampMatch(6, "", "YYYY-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}",
+            "TIMESTAMP_ISO8601", ""), mostLikelyMatch.v1());
     }
 
     public void testMostLikelyTimestampGivenExceptionTraceAndImpossibleTimestampFormatOverride() {

+ 205 - 93
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java

@@ -6,14 +6,11 @@
 package org.elasticsearch.xpack.ml.filestructurefinder;
 
 import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.common.time.DateFormatters;
 import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder.TimestampMatch;
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.joda.time.format.ISODateTimeFormat;
 
 import java.util.Arrays;
+import java.util.List;
 import java.util.Locale;
 
 public class TimestampFormatFinderTests extends FileStructureTestCase {
@@ -28,199 +25,206 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
 
     public void testFindFirstMatchGivenOnlyIso8601() {
 
-        TimestampMatch expected = new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601",
-            "");
+        validateTimestampMatch(new TimestampMatch(7, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSSXX",
+                "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15T16:14:56,374Z",
+            1526400896374L);
+        validateTimestampMatch(new TimestampMatch(7, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSSXX",
+                "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15T17:14:56,374+0100",
+            1526400896374L);
+        validateTimestampMatch(new TimestampMatch(8, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSSXXX",
+                "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15T17:14:56,374+01:00",
+            1526400896374L);
+        validateTimestampMatch(new TimestampMatch(9, "", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSS",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15T17:14:56,374", 1526400896374L);
 
-        checkAndValidateDateFormat(expected, "2018-05-15T16:14:56,374Z", 1526400896374L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374+0100", 1526400896374L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374+01:00", 1526400896374L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374", 1526400896374L);
-        checkAndValidateDateFormat(expected, "2018-05-15T16:14:56Z", 1526400896000L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56+0100", 1526400896000L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56+01:00", 1526400896000L);
-        checkAndValidateDateFormat(expected, "2018-05-15T17:14:56", 1526400896000L);
+        TimestampMatch pureIso8601Expected = new TimestampMatch(10, "", "ISO8601", "ISO8601",
+            "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", "");
 
-        checkAndValidateDateFormat(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ",
+        validateTimestampMatch(pureIso8601Expected, "2018-05-15T16:14:56Z", 1526400896000L);
+        validateTimestampMatch(pureIso8601Expected, "2018-05-15T17:14:56+0100", 1526400896000L);
+        validateTimestampMatch(pureIso8601Expected, "2018-05-15T17:14:56+01:00", 1526400896000L);
+        validateTimestampMatch(pureIso8601Expected, "2018-05-15T17:14:56", 1526400896000L);
+
+        validateTimestampMatch(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ", "yyyy-MM-dd HH:mm:ss,SSSXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 16:14:56,374Z",
             1526400896374L);
-        checkAndValidateDateFormat(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ",
+        validateTimestampMatch(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ", "yyyy-MM-dd HH:mm:ss,SSSXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374+0100",
             1526400896374L);
-        checkAndValidateDateFormat(new TimestampMatch(2, "", "YYYY-MM-dd HH:mm:ss,SSSZZ",
+        validateTimestampMatch(new TimestampMatch(2, "", "YYYY-MM-dd HH:mm:ss,SSSZZ", "yyyy-MM-dd HH:mm:ss,SSSXXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374+01:00",
             1526400896374L);
-        checkAndValidateDateFormat(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss,SSS",
+        validateTimestampMatch(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss,SSS",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374", 1526400896374L);
-        checkAndValidateDateFormat(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ",
+        validateTimestampMatch(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ", "yyyy-MM-dd HH:mm:ssXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 16:14:56Z", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ",
+        validateTimestampMatch(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ", "yyyy-MM-dd HH:mm:ssXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56+0100", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(5, "", "YYYY-MM-dd HH:mm:ssZZ",
+        validateTimestampMatch(new TimestampMatch(5, "", "YYYY-MM-dd HH:mm:ssZZ", "yyyy-MM-dd HH:mm:ssXXX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56+01:00", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(6, "", "YYYY-MM-dd HH:mm:ss",
+        validateTimestampMatch(new TimestampMatch(6, "", "YYYY-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56", 1526400896000L);
     }
 
-    public void testFindFirstMatchGivenOnlyKnownDateFormat() {
+    public void testFindFirstMatchGivenOnlyKnownTimestampFormat() {
 
         // Note: some of the time formats give millisecond accuracy, some second accuracy and some minute accuracy
 
-        checkAndValidateDateFormat(new TimestampMatch(0, "", "YYYY-MM-dd HH:mm:ss,SSS Z",
+        validateTimestampMatch(new TimestampMatch(0, "", "YYYY-MM-dd HH:mm:ss,SSS Z", "yyyy-MM-dd HH:mm:ss,SSS XX",
                 "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TOMCAT_DATESTAMP", ""), "2018-05-15 17:14:56,374 +0100",
             1526400896374L);
 
-        checkAndValidateDateFormat(new TimestampMatch(8, "", "EEE MMM dd YYYY HH:mm:ss zzz",
+        validateTimestampMatch(new TimestampMatch(11, "", "EEE MMM dd YYYY HH:mm:ss zzz", "EEE MMM dd yyyy HH:mm:ss zzz",
                 "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC822", ""),
             "Tue May 15 2018 16:14:56 UTC", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(9, "", "EEE MMM dd YYYY HH:mm zzz",
+        validateTimestampMatch(new TimestampMatch(12, "", "EEE MMM dd YYYY HH:mm zzz", "EEE MMM dd yyyy HH:mm zzz",
                 "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC822", ""),
             "Tue May 15 2018 16:14 UTC", 1526400840000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(10, "", "EEE, dd MMM YYYY HH:mm:ss ZZ",
+        validateTimestampMatch(new TimestampMatch(13, "", "EEE, dd MMM YYYY HH:mm:ss ZZ", "EEE, dd MMM yyyy HH:mm:ss XXX",
                 "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""),
             "Tue, 15 May 2018 17:14:56 +01:00", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(11, "", "EEE, dd MMM YYYY HH:mm:ss Z",
+        validateTimestampMatch(new TimestampMatch(14, "", "EEE, dd MMM YYYY HH:mm:ss Z", "EEE, dd MMM yyyy HH:mm:ss XX",
                 "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""),
             "Tue, 15 May 2018 17:14:56 +0100", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(12, "", "EEE, dd MMM YYYY HH:mm ZZ",
+        validateTimestampMatch(new TimestampMatch(15, "", "EEE, dd MMM YYYY HH:mm ZZ", "EEE, dd MMM yyyy HH:mm XXX",
                 "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""),
             "Tue, 15 May 2018 17:14 +01:00", 1526400840000L);
-        checkAndValidateDateFormat(new TimestampMatch(13, "", "EEE, dd MMM YYYY HH:mm Z",
+        validateTimestampMatch(new TimestampMatch(16, "", "EEE, dd MMM YYYY HH:mm Z", "EEE, dd MMM yyyy HH:mm XX",
                 "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""), "Tue, 15 May 2018 17:14 +0100",
             1526400840000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(14, "", "EEE MMM dd HH:mm:ss zzz YYYY",
+        validateTimestampMatch(new TimestampMatch(17, "", "EEE MMM dd HH:mm:ss zzz YYYY", "EEE MMM dd HH:mm:ss zzz yyyy",
                 "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", "DATESTAMP_OTHER", ""),
             "Tue May 15 16:14:56 UTC 2018", 1526400896000L);
-        checkAndValidateDateFormat(new TimestampMatch(15, "", "EEE MMM dd HH:mm zzz YYYY",
+        validateTimestampMatch(new TimestampMatch(18, "", "EEE MMM dd HH:mm zzz YYYY", "EEE MMM dd HH:mm zzz yyyy",
                 "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", "DATESTAMP_OTHER", ""),
             "Tue May 15 16:14 UTC 2018", 1526400840000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(16, "", "YYYYMMddHHmmss", "\\b\\d{14}\\b", "DATESTAMP_EVENTLOG", ""),
+        validateTimestampMatch(new TimestampMatch(19, "", "YYYYMMddHHmmss", "yyyyMMddHHmmss", "\\b\\d{14}\\b",
+                "DATESTAMP_EVENTLOG", ""),
             "20180515171456", 1526400896000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(17, "", "EEE MMM dd HH:mm:ss YYYY",
+        validateTimestampMatch(new TimestampMatch(20, "", "EEE MMM dd HH:mm:ss YYYY", "EEE MMM dd HH:mm:ss yyyy",
                 "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\b", "HTTPDERROR_DATE", ""),
             "Tue May 15 17:14:56 2018", 1526400896000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(18, "", Arrays.asList("MMM dd HH:mm:ss.SSS", "MMM  d HH:mm:ss.SSS"),
+        validateTimestampMatch(new TimestampMatch(21, "", Arrays.asList("MMM dd HH:mm:ss.SSS", "MMM  d HH:mm:ss.SSS"),
+            Arrays.asList("MMM dd HH:mm:ss.SSS", "MMM  d HH:mm:ss.SSS"),
             "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "SYSLOGTIMESTAMP", ""), "May 15 17:14:56.725", 1526400896725L);
-        checkAndValidateDateFormat(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+        validateTimestampMatch(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+            Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
             "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", ""), "May 15 17:14:56", 1526400896000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(20, "", "dd/MMM/YYYY:HH:mm:ss Z",
+        validateTimestampMatch(new TimestampMatch(23, "", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
                 "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE", ""), "15/May/2018:17:14:56 +0100", 1526400896000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(21, "", "MMM dd, YYYY K:mm:ss a",
+        validateTimestampMatch(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
                 "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP", ""), "May 15, 2018 5:14:56 PM",
             1526400896000L);
 
-        checkAndValidateDateFormat(new TimestampMatch(22, "", Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"),
+        validateTimestampMatch(new TimestampMatch(25, "", Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM  d YYYY HH:mm:ss"),
+                Arrays.asList("MMM dd yyyy HH:mm:ss", "MMM  d yyyy HH:mm:ss"),
                 "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2}\\b", "CISCOTIMESTAMP", ""), "May 15 2018 17:14:56",
             1526400896000L);
     }
 
     public void testFindFirstMatchGivenOnlySystemDate() {
 
-        assertEquals(new TimestampMatch(23, "", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
+        assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
             TimestampFormatFinder.findFirstMatch("1526400896374"));
-        assertEquals(new TimestampMatch(23, "", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
+        assertEquals(new TimestampMatch(26, "", "UNIX_MS", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""),
             TimestampFormatFinder.findFirstFullMatch("1526400896374"));
 
-        assertEquals(new TimestampMatch(24, "", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
+        assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
             TimestampFormatFinder.findFirstMatch("1526400896.736"));
-        assertEquals(new TimestampMatch(24, "", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
+        assertEquals(new TimestampMatch(27, "", "UNIX", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""),
             TimestampFormatFinder.findFirstFullMatch("1526400896.736"));
-        assertEquals(new TimestampMatch(25, "", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
+        assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
             TimestampFormatFinder.findFirstMatch("1526400896"));
-        assertEquals(new TimestampMatch(25, "", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
+        assertEquals(new TimestampMatch(28, "", "UNIX", "UNIX", "\\b\\d{10}\\b", "POSINT", ""),
             TimestampFormatFinder.findFirstFullMatch("1526400896"));
 
-        assertEquals(new TimestampMatch(26, "", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
+        assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
             TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980"));
-        assertEquals(new TimestampMatch(26, "", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
+        assertEquals(new TimestampMatch(29, "", "TAI64N", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""),
             TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980"));
     }
 
-    private void checkAndValidateDateFormat(TimestampMatch expected, String text, long expectedEpochMs) {
-
-        assertEquals(expected, TimestampFormatFinder.findFirstMatch(text));
-        assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text));
-
-        // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London
-        DateTimeZone zone = DateTimeZone.forID("Europe/London");
-        DateTime parsed;
-        for (int i = 0; i < expected.dateFormats.size(); ++i) {
-            try {
-                String dateFormat = expected.dateFormats.get(i);
-                switch (dateFormat) {
-                    case "ISO8601":
-                        parsed = ISODateTimeFormat.dateTimeParser().withZone(zone).withDefaultYear(2018).parseDateTime(text);
-                        break;
-                    default:
-                        DateTimeFormatter parser = DateTimeFormat.forPattern(dateFormat).withZone(zone).withLocale(Locale.UK);
-                        parsed = parser.withDefaultYear(2018).parseDateTime(text);
-                        break;
-                }
-                if (expectedEpochMs == parsed.getMillis()) {
-                    break;
-                }
-                // If the last one isn't right then propagate
-                if (i == expected.dateFormats.size() - 1) {
-                    assertEquals(expectedEpochMs, parsed.getMillis());
-                }
-            } catch (RuntimeException e) {
-                // If the last one throws then propagate
-                if (i == expected.dateFormats.size() - 1) {
-                    throw e;
-                }
-            }
-        }
-        assertTrue(expected.simplePattern.matcher(text).find());
-    }
-
     public void testFindFirstMatchGivenRealLogMessages() {
 
-        assertEquals(new TimestampMatch(7, "[", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601",
+        assertEquals(new TimestampMatch(9, "[", "ISO8601", "yyyy-MM-dd'T'HH:mm:ss,SSS",
+                "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601",
                 "][INFO ][o.e.e.NodeEnvironment    ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"),
             TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment    ] [node-0] " +
                 "heap size [3.9gb], compressed ordinary object pointers [true]"));
 
-        assertEquals(new TimestampMatch(20, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z",
+        assertEquals(new TimestampMatch(23, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", "dd/MMM/yyyy:HH:mm:ss XX",
                 "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE",
                 "] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"),
             TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " +
                 "\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"));
 
-        assertEquals(new TimestampMatch(21, "", "MMM dd, YYYY K:mm:ss a",
+        assertEquals(new TimestampMatch(24, "", "MMM dd, YYYY h:mm:ss a", "MMM dd, yyyy h:mm:ss a",
                 "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP",
                 " org.apache.tomcat.util.http.Parameters processParameters"),
             TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters"));
 
-        assertEquals(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+        assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+                Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
                 "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " +
                     "[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"),
             TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " +
                 "opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"));
 
-        assertEquals(new TimestampMatch(7, "559550912540598297\t", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}",
+        assertEquals(new TimestampMatch(10, "559550912540598297\t", "ISO8601", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}",
                 "TIMESTAMP_ISO8601",
                 "\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"),
             TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" +
                 "192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"));
 
-        assertEquals(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+        assertEquals(new TimestampMatch(22, "", Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
+                Arrays.asList("MMM dd HH:mm:ss", "MMM  d HH:mm:ss"),
                 "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP",
                 " dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"),
             TimestampFormatFinder.findFirstMatch("Sep  8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " +
                 "'www.elastic.co/A/IN': 95.110.68.206#53"));
 
-        assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}",
-                "TIMESTAMP_ISO8601",
+        assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
+                "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601",
                 "|INFO    |VirtualServer |1  |client  'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " +
                     "'User1'(id:2) in channel '3er Instanz'(id:2)"),
             TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO    |VirtualServer |1  |client " +
                 " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)"));
+
+        // Differs from the above as the required format is specified
+        assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS",
+                "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "TIMESTAMP_ISO8601",
+                "|INFO    |VirtualServer |1  |client  'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " +
+                    "'User1'(id:2) in channel '3er Instanz'(id:2)"),
+            TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO    |VirtualServer |1  |client " +
+                " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
+                randomFrom("YYYY-MM-dd HH:mm:ss.SSSSSS", "yyyy-MM-dd HH:mm:ss.SSSSSS")));
+
+        // Non-matching required format specified
+        assertNull(TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO    |VirtualServer |1  |client " +
+                " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)",
+            randomFrom("UNIX", "EEE MMM dd YYYY HH:mm zzz")));
+    }
+
+    public void testAdjustRequiredFormat() {
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSSSSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSSSSSSSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSSSSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS Z", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSSSSSSSS Z"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSS"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSSSSS"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss,SSSSSSSSS"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSS"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSSSSS"));
+        assertEquals("YYYY-MM-dd HH:mm:ss,SSS", TimestampFormatFinder.adjustRequiredFormat("YYYY-MM-dd HH:mm:ss.SSSSSSSSS"));
     }
 
     public void testInterpretFractionalSeconds() {
@@ -239,4 +243,112 @@ public class TimestampFormatFinderTests extends FileStructureTestCase {
         assertEquals(new Tuple<>(',', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764 Z"));
         assertEquals(new Tuple<>('.', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25.764 Z"));
     }
+
+    private void validateTimestampMatch(TimestampMatch expected, String text, long expectedEpochMs) {
+
+        assertEquals(expected, TimestampFormatFinder.findFirstMatch(text));
+        assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text));
+        assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, expected.candidateIndex));
+        assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, expected.candidateIndex));
+        assertNull(TimestampFormatFinder.findFirstMatch(text, Integer.MAX_VALUE));
+        assertNull(TimestampFormatFinder.findFirstFullMatch(text, Integer.MAX_VALUE));
+        assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.jodaTimestampFormats)));
+        assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.jodaTimestampFormats)));
+        assertEquals(expected, TimestampFormatFinder.findFirstMatch(text, randomFrom(expected.javaTimestampFormats)));
+        assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text, randomFrom(expected.javaTimestampFormats)));
+        assertNull(TimestampFormatFinder.findFirstMatch(text, "wrong format"));
+        assertNull(TimestampFormatFinder.findFirstFullMatch(text, "wrong format"));
+
+        validateJodaTimestampFormats(expected.jodaTimestampFormats, text, expectedEpochMs);
+        validateJavaTimestampFormats(expected.javaTimestampFormats, text, expectedEpochMs);
+
+        assertTrue(expected.simplePattern.matcher(text).find());
+    }
+
+    private void validateJodaTimestampFormats(List<String> jodaTimestampFormats, String text, long expectedEpochMs) {
+
+        // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London.
+        // This is the timezone that will be used for any text representations that don't include it.
+        org.joda.time.DateTimeZone defaultZone = org.joda.time.DateTimeZone.forID("Europe/London");
+        org.joda.time.DateTime parsed;
+        for (int i = 0; i < jodaTimestampFormats.size(); ++i) {
+            try {
+                String timestampFormat = jodaTimestampFormats.get(i);
+                switch (timestampFormat) {
+                    case "ISO8601":
+                        parsed = org.joda.time.format.ISODateTimeFormat.dateTimeParser()
+                            .withZone(defaultZone).withDefaultYear(2018).parseDateTime(text);
+                        break;
+                    default:
+                        org.joda.time.format.DateTimeFormatter parser =
+                            org.joda.time.format.DateTimeFormat.forPattern(timestampFormat).withZone(defaultZone).withLocale(Locale.ROOT);
+                        parsed = parser.withDefaultYear(2018).parseDateTime(text);
+                        break;
+                }
+                if (expectedEpochMs == parsed.getMillis()) {
+                    break;
+                }
+                // If the last one isn't right then propagate
+                if (i == jodaTimestampFormats.size() - 1) {
+                    assertEquals(expectedEpochMs, parsed.getMillis());
+                }
+            } catch (RuntimeException e) {
+                // If the last one throws then propagate
+                if (i == jodaTimestampFormats.size() - 1) {
+                    throw e;
+                }
+            }
+        }
+    }
+
+    private void validateJavaTimestampFormats(List<String> javaTimestampFormats, String text, long expectedEpochMs) {
+
+        // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London.
+        // This is the timezone that will be used for any text representations that don't include it.
+        java.time.ZoneId defaultZone = java.time.ZoneId.of("Europe/London");
+        java.time.temporal.TemporalAccessor parsed;
+        for (int i = 0; i < javaTimestampFormats.size(); ++i) {
+            try {
+                String timestampFormat = javaTimestampFormats.get(i);
+                switch (timestampFormat) {
+                    case "ISO8601":
+                        parsed = DateFormatters.forPattern("strict_date_optional_time_nanos").withZone(defaultZone).parse(text);
+                        break;
+                    default:
+                        java.time.format.DateTimeFormatter parser = new java.time.format.DateTimeFormatterBuilder()
+                            .appendPattern(timestampFormat).parseDefaulting(java.time.temporal.ChronoField.YEAR_OF_ERA, 2018)
+                            .toFormatter(Locale.ROOT);
+                        // This next line parses the textual date without any default timezone, so if
+                        // the text doesn't contain the timezone then the resulting temporal accessor
+                        // will be incomplete (i.e. impossible to convert to an Instant).  You would
+                        // hope that it would be possible to specify a timezone to be used only in this
+                        // case, and in Java 9 and 10 it is, by adding withZone(zone) before the
+                        // parse(text) call.  However, with Java 8 this overrides any timezone parsed
+                        // from the text.  The solution is to parse twice, once without a default
+                        // timezone and then again with a default timezone if the first parse didn't
+                        // find one in the text.
+                        parsed = parser.parse(text);
+                        if (parsed.query(java.time.temporal.TemporalQueries.zone()) == null) {
+                            // TODO: when Java 8 is no longer supported remove the two
+                            // lines and comment above and the closing brace below
+                            parsed = parser.withZone(defaultZone).parse(text);
+                        }
+                        break;
+                }
+                long actualEpochMs = java.time.Instant.from(parsed).toEpochMilli();
+                if (expectedEpochMs == actualEpochMs) {
+                    break;
+                }
+                // If the last one isn't right then propagate
+                if (i == javaTimestampFormats.size() - 1) {
+                    assertEquals(expectedEpochMs, actualEpochMs);
+                }
+            } catch (RuntimeException e) {
+                // If the last one throws then propagate
+                if (i == javaTimestampFormats.size() - 1) {
+                    throw e;
+                }
+            }
+        }
+    }
 }

+ 1 - 1
x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinderTests.java

@@ -38,6 +38,6 @@ public class XmlFileStructureFinderTests extends FileStructureTestCase {
         assertNull(structure.getShouldTrimFields());
         assertNull(structure.getGrokPattern());
         assertEquals("timestamp", structure.getTimestampField());
-        assertEquals(Collections.singletonList("UNIX_MS"), structure.getTimestampFormats());
+        assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
     }
 }

+ 1 - 1
x-pack/plugin/src/test/resources/rest-api-spec/api/xpack.ml.find_file_structure.json

@@ -49,7 +49,7 @@
         },
         "timestamp_format": {
           "type": "string",
-          "description": "Optional parameter to specify the timestamp format in the file"
+          "description": "Optional parameter to specify the timestamp format in the file - may be either a Joda or Java time format"
         },
         "explain": {
           "type": "boolean",

+ 4 - 2
x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml

@@ -27,7 +27,8 @@
   - match: { has_byte_order_marker: false }
   - match: { format: json }
   - match: { timestamp_field: time }
-  - match: { timestamp_formats.0: UNIX }
+  - match: { joda_timestamp_formats.0: UNIX }
+  - match: { java_timestamp_formats.0: UNIX }
   - match: { need_client_timezone: false }
   - match: { mappings.airline.type: keyword }
   - match: { mappings.responsetime.type: double }
@@ -83,7 +84,8 @@
   - match: { has_byte_order_marker: false }
   - match: { format: json }
   - match: { timestamp_field: time }
-  - match: { timestamp_formats.0: UNIX }
+  - match: { joda_timestamp_formats.0: UNIX }
+  - match: { java_timestamp_formats.0: UNIX }
   - match: { need_client_timezone: false }
   - match: { mappings.airline.type: keyword }
   - match: { mappings.responsetime.type: double }