Browse Source

[text structure] Find field and message structure endpoints (#105660)

* Extract AbstractFindStructureRequest

* Extract FindStructureResponse

* Extract RestFindStructureRequestParser

* FindFieldStructure endpoint

* FindMessageStructure endpoint

* Improve FindTextStructureResponseTests

* REST API spec + YAML REST tests

* Lint fixes

* Remove POST find_field_structure

* Update docs/changelog/105660.yaml

* Update changelog

* Fix text_structure.find_field_structure.json

* Fix find_field_structure yaml rest test

* Fix FindTextStructureResponseTests

* Fix YAML tests with security

* Remove unreachable code

* DelimitedTextStructureFinder::createFromMessages

* NdJsonTextStructureFinderFactory::createFromMessages

* XmlTextStructureFinderFactory::createFromMessages

* LogTextStructureFinderFactory::createFromMessages

* Lint fixes

* Add createFromMessages to TextStructureFinderFactory interface

* Wire createFromMessages in the endpoints

* Uppercase UTF-8

* REST test for semi-structured messages

* Restrict query params to applicable endpoints

* typo

* Polish thread scheduling

* Propagate parent task in search request

* No header row for find message/field structure

* Expose findTextStructure more consistently

* Move text structure query params to shared doc

* Rename "find structure API" -> "find text structure API"

* Find message structure API docs

* Find field structure docs

* Maybe fix docs error?

* bugfix

* Fix docs?

* Fix find-field-structure test from docs

* Improve docs

* Add param documents_to_sample to docs

* improve docs
Jan Kuipers 1 year ago
parent
commit
c70956ac16
42 changed files with 2640 additions and 734 deletions
  1. 5 0
      docs/changelog/105660.yaml
  2. 316 0
      docs/reference/text-structure/apis/find-field-structure.asciidoc
  3. 292 0
      docs/reference/text-structure/apis/find-message-structure.asciidoc
  4. 215 0
      docs/reference/text-structure/apis/find-structure-shared.asciidoc
  5. 16 185
      docs/reference/text-structure/apis/find-structure.asciidoc
  6. 4 0
      docs/reference/text-structure/apis/index.asciidoc
  7. 90 0
      rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.find_field_structure.json
  8. 80 0
      rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.find_message_structure.json
  9. 377 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/AbstractFindStructureRequest.java
  10. 98 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindFieldStructureAction.java
  11. 97 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindMessageStructureAction.java
  12. 5 384
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindStructureAction.java
  13. 61 0
      x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindStructureResponse.java
  14. 0 29
      x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/textstructure/action/FindTextStructureActionResponseTests.java
  15. 33 0
      x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/textstructure/action/FindTextStructureResponseTests.java
  16. 2 0
      x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java
  17. 63 0
      x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/find_field_structure.yml
  18. 56 0
      x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/find_message_structure.yml
  19. 1 1
      x-pack/plugin/text-structure/qa/text-structure-with-security/build.gradle
  20. 12 0
      x-pack/plugin/text-structure/qa/text-structure-with-security/roles.yml
  21. 14 1
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java
  22. 51 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindFieldStructureAction.java
  23. 55 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindMessageStructureAction.java
  24. 2 36
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java
  25. 73 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureArgumentsParser.java
  26. 31 23
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinder.java
  27. 39 1
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderFactory.java
  28. 125 16
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinder.java
  29. 13 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinderFactory.java
  30. 23 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderFactory.java
  31. 9 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureFinderFactory.java
  32. 72 19
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureFinderManager.java
  33. 2 1
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureOverrides.java
  34. 53 11
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/XmlTextStructureFinderFactory.java
  35. 94 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindFieldStructureAction.java
  36. 56 0
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindMessageStructureAction.java
  37. 11 27
      x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindStructureAction.java
  38. 18 0
      x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderFactoryTests.java
  39. 24 0
      x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderTests.java
  40. 16 0
      x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinderTests.java
  41. 18 0
      x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderFactoryTests.java
  42. 18 0
      x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/XmlTextStructureFinderFactoryTests.java

+ 5 - 0
docs/changelog/105660.yaml

@@ -0,0 +1,5 @@
+pr: 105660
+summary: "Text structure endpoints to determine the structure of a list of messages and of an indexed field"
+area: Machine Learning
+type: feature
+issues: []

+ 316 - 0
docs/reference/text-structure/apis/find-field-structure.asciidoc

@@ -0,0 +1,316 @@
+[role="xpack"]
+[[find-field-structure]]
+= Find field structure API
+
+Finds the structure of a field in an Elasticsearch index.
+
+[discrete]
+[[find-field-structure-request]]
+== {api-request-title}
+
+`GET _text_structure/find_field_structure`
+
+[discrete]
+[[find-field-structure-prereqs]]
+== {api-prereq-title}
+
+* If the {es} {security-features} are enabled, you must have `monitor_text_structure` or
+`monitor` cluster privileges to use this API. See
+<<security-privileges>>.
+
+[discrete]
+[[find-field-structure-desc]]
+== {api-description-title}
+
+This API provides a starting point for extracting further information from log messages
+already ingested into {es}. For example, if you have ingested data into a very simple
+index that has just `@timestamp` and `message` fields, you can use this API to
+see what common structure exists in the `message` field.
+
+The response from the API contains:
+
+* Sample messages.
+* Statistics that reveal the most common values for all fields detected within
+the text and basic numeric statistics for numeric fields.
+* Information about the structure of the text, which is useful when you write
+ingest configurations to index it or similarly formatted text.
+* Appropriate mappings for an {es} index, which you could use to ingest the text.
+
+All this information can be calculated by the structure finder with no guidance.
+However, you can optionally override some of the decisions about the text
+structure by specifying one or more query parameters.
+
+Details of the output can be seen in the <<find-field-structure-examples,examples>>.
+
+If the structure finder produces unexpected results,
+specify the `explain` query parameter and an `explanation` will appear in
+the response. It helps determine why the returned structure was
+chosen.
+
+[discrete]
+[[find-field-structure-query-parms]]
+== {api-query-parms-title}
+
+`index`::
+(Required, string) The name of the index containing the field.
+
+`field`::
+(Required, string) The name of the field that's analyzed.
+
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-column-names]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-delimiter]
+
+`documents_to_sample`::
+(Optional, unsigned integer) The number of documents to include in the structural
+analysis. The minimum is 2; the default is 1000.
+
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-explain]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-format]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-grok-pattern]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-ecs-compatibility]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-quote]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-should-trim-fields]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timeout]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-field]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-format]
+
+[discrete]
+[[find-field-structure-examples]]
+== {api-examples-title}
+
+[discrete]
+[[find-field-structure-example]]
+=== Analyzing Elasticsearch log files
+
+Suppose you have a list of {es} log messages in an index.
+You can analyze them with the `find_field_structure` endpoint as follows:
+
+[source,console]
+----
+POST _bulk?refresh=true
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [rest-root]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-core]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-redact]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [ingest-user-agent]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-monitoring]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-s3]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-analytics]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-ent-search]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-autoscaling]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-painless]]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-expression]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-eql]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:43,291][INFO ][o.e.e.NodeEnvironment    ] [laptop] heap size [16gb], compressed ordinary object pointers [true]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:46,098][INFO ][o.e.x.s.Security         ] [laptop] Security is enabled"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:47,227][INFO ][o.e.x.p.ProfilingPlugin  ] [laptop] Profiling is enabled"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:47,259][INFO ][o.e.x.p.ProfilingPlugin  ] [laptop] profiling index templates will not be installed or reinstalled"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:47,755][INFO ][o.e.i.r.RecoverySettings ] [laptop] using rate limit [40mb] with [default=40mb, read=0b, write=0b, max=0b]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:47,787][INFO ][o.e.d.DiscoveryModule    ] [laptop] using discovery type [multi-node] and seed hosts providers [settings]"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:49,188][INFO ][o.e.n.Node               ] [laptop] initialized"}
+{"index":{"_index":"test-logs"}}
+{"message":"[2024-03-05T10:52:49,199][INFO ][o.e.n.Node               ] [laptop] starting ..."}
+
+GET _text_structure/find_field_structure?index=test-logs&field=message
+----
+// TEST
+
+If the request does not encounter errors, you receive the following result:
+
+[source,console-result]
+----
+{
+  "num_lines_analyzed" : 22,
+  "num_messages_analyzed" : 22,
+  "sample_start" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128\n[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]\n", <3>
+  "charset" : "UTF-8",
+  "format" : "semi_structured_text",
+  "multiline_start_pattern" : "^\\[\\b\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}",
+  "grok_pattern" : "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*",
+  "ecs_compatibility" : "disabled",
+  "timestamp_field" : "timestamp",
+  "joda_timestamp_formats" : [
+    "ISO8601"
+  ],
+  "java_timestamp_formats" : [
+    "ISO8601"
+  ],
+  "need_client_timezone" : true,
+  "mappings" : {
+    "properties" : {
+      "@timestamp" : {
+        "type" : "date"
+      },
+      "loglevel" : {
+        "type" : "keyword"
+      },
+      "message" : {
+        "type" : "text"
+      }
+    }
+  },
+  "ingest_pipeline" : {
+    "description" : "Ingest pipeline created by text structure finder",
+    "processors" : [
+      {
+        "grok" : {
+          "field" : "message",
+          "patterns" : [
+            "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*"
+          ],
+          "ecs_compatibility" : "disabled"
+        }
+      },
+      {
+        "date" : {
+          "field" : "timestamp",
+          "timezone" : "{{ event.timezone }}",
+          "formats" : [
+            "ISO8601"
+          ]
+        }
+      },
+      {
+        "remove" : {
+          "field" : "timestamp"
+        }
+      }
+    ]
+  },
+  "field_stats" : {
+    "loglevel" : {
+      "count" : 22,
+      "cardinality" : 1,
+      "top_hits" : [
+        {
+          "value" : "INFO",
+          "count" : 22
+        }
+      ]
+    },
+    "message" : {
+      "count" : 22,
+      "cardinality" : 22,
+      "top_hits" : [
+        {
+          "value" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [rest-root]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [ingest-user-agent]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-core]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-redact]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-painless]]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-s3]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-analytics]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-autoscaling]",
+          "count" : 1
+        }
+      ]
+    },
+    "timestamp" : {
+      "count" : 22,
+      "cardinality" : 14,
+      "earliest" : "2024-03-05T10:52:36,256",
+      "latest" : "2024-03-05T10:52:49,199",
+      "top_hits" : [
+        {
+          "value" : "2024-03-05T10:52:41,044",
+          "count" : 6
+        },
+        {
+          "value" : "2024-03-05T10:52:41,043",
+          "count" : 3
+        },
+        {
+          "value" : "2024-03-05T10:52:41,059",
+          "count" : 2
+        },
+        {
+          "value" : "2024-03-05T10:52:36,256",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:41,038",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:41,042",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:43,291",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:46,098",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:47,227",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:47,259",
+          "count" : 1
+        }
+      ]
+    }
+  }
+}
+----
+// TESTRESPONSE[s/"sample_start" : ".*",/"sample_start" : "$body.sample_start",/]
+// The substitution is because the text is pre-processed by the test harness,
+// so the fields may get reordered in the JSON the endpoint sees
+
+For a detailed description of the response format, or for additional examples
+on ingesting delimited text (such as CSV) or newline-delimited JSON, refer to the
+<<find-structure-examples,examples of the find text structure endpoint>>.

+ 292 - 0
docs/reference/text-structure/apis/find-message-structure.asciidoc

@@ -0,0 +1,292 @@
+[role="xpack"]
+[[find-message-structure]]
+= Find messages structure API
+
+Finds the structure of a list of text messages.
+
+[discrete]
+[[find-message-structure-request]]
+== {api-request-title}
+
+`GET _text_structure/find_message_structure` +
+`POST _text_structure/find_message_structure`
+
+[discrete]
+[[find-message-structure-prereqs]]
+== {api-prereq-title}
+
+* If the {es} {security-features} are enabled, you must have `monitor_text_structure` or
+`monitor` cluster privileges to use this API. See
+<<security-privileges>>.
+
+[discrete]
+[[find-message-structure-desc]]
+== {api-description-title}
+
+This API provides a starting point for ingesting data into {es} in a format that
+is suitable for subsequent use with other {stack} functionality. Use this
+API in preference to `find_structure` when your input text has already been
+split up into separate messages by some other process.
+
+The response from the API contains:
+
+* Sample messages.
+* Statistics that reveal the most common values for all fields detected within
+the text and basic numeric statistics for numeric fields.
+* Information about the structure of the text, which is useful when you write
+ingest configurations to index it or similarly formatted text.
+* Appropriate mappings for an {es} index, which you could use to ingest the text.
+
+All this information can be calculated by the structure finder with no guidance.
+However, you can optionally override some of the decisions about the text
+structure by specifying one or more query parameters.
+
+Details of the output can be seen in the <<find-message-structure-examples,examples>>.
+
+If the structure finder produces unexpected results,
+specify the `explain` query parameter and an `explanation` will appear in
+the response. It helps determine why the returned structure was
+chosen.
+
+[discrete]
+[[find-message-structure-query-parms]]
+== {api-query-parms-title}
+
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-column-names]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-delimiter]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-explain]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-format]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-grok-pattern]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-ecs-compatibility]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-quote]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-should-trim-fields]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timeout]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-field]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-format]
+
+[discrete]
+[[find-message-structure-request-body]]
+== {api-request-body-title}
+
+`messages`::
+(Required, array of strings)
+The list of messages you want to analyze.
+
+[discrete]
+[[find-message-structure-examples]]
+== {api-examples-title}
+
+[discrete]
+[[find-message-structure-example]]
+=== Analyzing Elasticsearch log files
+
+Suppose you have a list of {es} logs messages.
+You can send it to the `find_message_structure` endpoint as follows:
+
+[source,console]
+----
+POST _text_structure/find_message_structure
+{
+  "messages": [
+    "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128",
+    "[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]",
+    "[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [rest-root]",
+    "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-core]",
+    "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-redact]",
+    "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [ingest-user-agent]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-monitoring]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-s3]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-analytics]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-ent-search]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-autoscaling]",
+    "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-painless]]",
+    "[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-expression]",
+    "[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-eql]",
+    "[2024-03-05T10:52:43,291][INFO ][o.e.e.NodeEnvironment    ] [laptop] heap size [16gb], compressed ordinary object pointers [true]",
+    "[2024-03-05T10:52:46,098][INFO ][o.e.x.s.Security         ] [laptop] Security is enabled",
+    "[2024-03-05T10:52:47,227][INFO ][o.e.x.p.ProfilingPlugin  ] [laptop] Profiling is enabled",
+    "[2024-03-05T10:52:47,259][INFO ][o.e.x.p.ProfilingPlugin  ] [laptop] profiling index templates will not be installed or reinstalled",
+    "[2024-03-05T10:52:47,755][INFO ][o.e.i.r.RecoverySettings ] [laptop] using rate limit [40mb] with [default=40mb, read=0b, write=0b, max=0b]",
+    "[2024-03-05T10:52:47,787][INFO ][o.e.d.DiscoveryModule    ] [laptop] using discovery type [multi-node] and seed hosts providers [settings]",
+    "[2024-03-05T10:52:49,188][INFO ][o.e.n.Node               ] [laptop] initialized",
+    "[2024-03-05T10:52:49,199][INFO ][o.e.n.Node               ] [laptop] starting ..."
+  ]
+}
+----
+// TEST
+
+If the request does not encounter errors, you receive the following result:
+
+[source,console-result]
+----
+{
+  "num_lines_analyzed" : 22,
+  "num_messages_analyzed" : 22,
+  "sample_start" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128\n[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]\n", <3>
+  "charset" : "UTF-8",
+  "format" : "semi_structured_text",
+  "multiline_start_pattern" : "^\\[\\b\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}",
+  "grok_pattern" : "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*",
+  "ecs_compatibility" : "disabled",
+  "timestamp_field" : "timestamp",
+  "joda_timestamp_formats" : [
+    "ISO8601"
+  ],
+  "java_timestamp_formats" : [
+    "ISO8601"
+  ],
+  "need_client_timezone" : true,
+  "mappings" : {
+    "properties" : {
+      "@timestamp" : {
+        "type" : "date"
+      },
+      "loglevel" : {
+        "type" : "keyword"
+      },
+      "message" : {
+        "type" : "text"
+      }
+    }
+  },
+  "ingest_pipeline" : {
+    "description" : "Ingest pipeline created by text structure finder",
+    "processors" : [
+      {
+        "grok" : {
+          "field" : "message",
+          "patterns" : [
+            "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*"
+          ],
+          "ecs_compatibility" : "disabled"
+        }
+      },
+      {
+        "date" : {
+          "field" : "timestamp",
+          "timezone" : "{{ event.timezone }}",
+          "formats" : [
+            "ISO8601"
+          ]
+        }
+      },
+      {
+        "remove" : {
+          "field" : "timestamp"
+        }
+      }
+    ]
+  },
+  "field_stats" : {
+    "loglevel" : {
+      "count" : 22,
+      "cardinality" : 1,
+      "top_hits" : [
+        {
+          "value" : "INFO",
+          "count" : 22
+        }
+      ]
+    },
+    "message" : {
+      "count" : 22,
+      "cardinality" : 22,
+      "top_hits" : [
+        {
+          "value" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-url]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [rest-root]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [ingest-user-agent]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-core]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-redact]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [lang-painless]]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [repository-s3]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-analytics]",
+          "count" : 1
+        },
+        {
+          "value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService     ] [laptop] loaded module [x-pack-autoscaling]",
+          "count" : 1
+        }
+      ]
+    },
+    "timestamp" : {
+      "count" : 22,
+      "cardinality" : 14,
+      "earliest" : "2024-03-05T10:52:36,256",
+      "latest" : "2024-03-05T10:52:49,199",
+      "top_hits" : [
+        {
+          "value" : "2024-03-05T10:52:41,044",
+          "count" : 6
+        },
+        {
+          "value" : "2024-03-05T10:52:41,043",
+          "count" : 3
+        },
+        {
+          "value" : "2024-03-05T10:52:41,059",
+          "count" : 2
+        },
+        {
+          "value" : "2024-03-05T10:52:36,256",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:41,038",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:41,042",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:43,291",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:46,098",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:47,227",
+          "count" : 1
+        },
+        {
+          "value" : "2024-03-05T10:52:47,259",
+          "count" : 1
+        }
+      ]
+    }
+  }
+}
+----
+// TESTRESPONSE
+
+For a detailed description of the response format, or for additional examples
+on ingesting delimited text (such as CSV) or newline-delimited JSON, refer to the
+<<find-structure-examples,examples of the find text structure endpoint>>.

+ 215 - 0
docs/reference/text-structure/apis/find-structure-shared.asciidoc

@@ -0,0 +1,215 @@
+tag::param-charset[]
+`charset`::
+(Optional, string) The text's character set. It must be a character set that is
+supported by the JVM that {es} uses. For example, `UTF-8`, `UTF-16LE`,
+`windows-1252`, or `EUC-JP`. If this parameter is not specified, the structure
+finder chooses an appropriate character set.
+end::param-charset[]
+
+tag::param-column-names[]
+`column_names`::
+(Optional, string) If you have set `format` to `delimited`, you can specify the
+column names in a comma-separated list. If this parameter is not specified, the
+structure finder uses the column names from the header row of the text. If the
+text does not have a header row, columns are named "column1", "column2",
+"column3", etc.
+end::param-column-names[]
+
+tag::param-delimiter[]
+`delimiter`::
+(Optional, string) If you have set `format` to `delimited`, you can specify the
+character used to delimit the values in each row. Only a single character is
+supported; the delimiter cannot have multiple characters. By default, the API
+considers the following possibilities: comma, tab, semi-colon, and pipe (`|`).
+In this default scenario, all rows must have the same number of fields for the
+delimited format to be detected. If you specify a delimiter, up to 10% of the
+rows can have a different number of columns than the first row.
+end::param-delimiter[]
+
+tag::param-explain[]
+`explain`::
+(Optional, Boolean) If `true`, the response includes a
+field named `explanation`, which is an array of strings that indicate how the
+structure finder produced its result. The default value is `false`.
+end::param-explain[]
+
+tag::param-format[]
+`format`::
+(Optional, string) The high level structure of the text. Valid values are
+`ndjson`, `xml`, `delimited`, and `semi_structured_text`. By default, the API
+chooses the format. In this default scenario, all rows must have the same number
+of fields for a delimited format to be detected. If the `format` is set to
+`delimited` and the `delimiter` is not set, however, the API tolerates up to 5%
+of rows that have a different number of columns than the first row.
+end::param-format[]
+
+tag::param-grok-pattern[]
+`grok_pattern`::
+(Optional, string) If you have set `format` to `semi_structured_text`, you can
+specify a Grok pattern that is used to extract fields from every message in the
+text. The name of the timestamp field in the Grok pattern must match what is
+specified in the `timestamp_field` parameter. If that parameter is not
+specified, the name of the timestamp field in the Grok pattern must match
+"timestamp". If `grok_pattern` is not specified, the structure finder creates a
+Grok pattern.
+end::param-grok-pattern[]
+
+tag::param-ecs-compatibility[]
+`ecs_compatibility`::
+(Optional, string) The mode of compatibility with ECS compliant Grok patterns.
+Use this parameter to specify whether to use ECS Grok patterns instead of
+legacy ones when the structure finder creates a Grok pattern. Valid values
+are `disabled` and `v1`. The default value is `disabled`. This setting primarily
+has an impact when a whole message Grok pattern such as `%{CATALINALOG}`
+matches the input. If the structure finder identifies a common structure but
+has no idea of meaning then generic field names such as `path`, `ipaddress`,
+`field1` and `field2` are used in the `grok_pattern` output, with the intention
+that a user who knows the meanings rename these fields before using it.
+end::param-ecs-compatibility[]
+
+tag::param-has-header-row[]
+`has_header_row`::
+(Optional, Boolean) If you have set `format` to `delimited`, you can use this
+parameter to indicate whether the column names are in the first row of the text.
+If this parameter is not specified, the structure finder guesses based on the
+similarity of the first row of the text to other rows.
+end::param-has-header-row[]
+
+tag::param-line-merge-size-limit[]
+`line_merge_size_limit`::
+(Optional, unsigned integer) The maximum number of characters in a message when
+lines are merged to form messages while analyzing semi-structured text. The
+default is `10000`. If you have extremely long messages you may need to increase
+this, but be aware that this may lead to very long processing times if the way
+to group lines into messages is misdetected.
+end::param-line-merge-size-limit[]
+
+tag::param-lines-to-sample[]
+`lines_to_sample`::
+(Optional, unsigned integer) The number of lines to include in the structural
+analysis, starting from the beginning of the text. The minimum is 2; the default
+is `1000`. If the value of this parameter is greater than the number of lines in
+the text, the analysis proceeds (as long as there are at least two lines in the
+text) for all of the lines.
++
+--
+NOTE: The number of lines and the variation of the lines affects the speed of
+the analysis. For example, if you upload text where the first 1000 lines
+are all variations on the same message, the analysis will find more commonality
+than would be seen with a bigger sample. If possible, however, it is more
+efficient to upload sample text with more variety in the first 1000 lines than
+to request analysis of 100000 lines to achieve some variety.
+
+--
+end::param-lines-to-sample[]
+
+tag::param-quote[]
+`quote`::
+(Optional, string) If you have set `format` to `delimited`, you can specify the
+character used to quote the values in each row if they contain newlines or the
+delimiter character. Only a single character is supported. If this parameter is
+not specified, the default value is a double quote (`"`). If your delimited text
+format does not use quoting, a workaround is to set this argument to a character
+that does not appear anywhere in the sample.
+end::param-quote[]
+
+tag::param-should-trim-fields[]
+`should_trim_fields`::
+(Optional, Boolean) If you have set `format` to `delimited`, you can specify
+whether values between delimiters should have whitespace trimmed from them. If
+this parameter is not specified and the delimiter is pipe (`|`), the default
+value is `true`. Otherwise, the default value is `false`.
+end::param-should-trim-fields[]
+
+tag::param-timeout[]
+`timeout`::
+(Optional, <<time-units,time units>>) Sets the maximum amount of time that the
+structure analysis may take. If the analysis is still running when the timeout
+expires then it will be stopped. The default value is 25 seconds.
+end::param-timeout[]
+
+tag::param-timestamp-field[]
+`timestamp_field`::
+(Optional, string) The name of the field that contains the primary timestamp of
+each record in the text. In particular, if the text were ingested into an index,
+this is the field that would be used to populate the `@timestamp` field.
++
+--
+If the `format` is `semi_structured_text`, this field must match the name of the
+appropriate extraction in the `grok_pattern`. Therefore, for semi-structured
+text, it is best not to specify this parameter unless `grok_pattern` is
+also specified.
+
+For structured text, if you specify this parameter, the field must exist
+within the text.
+
+If this parameter is not specified, the structure finder makes a decision about
+which field (if any) is the primary timestamp field. For structured text,
+it is not compulsory to have a timestamp in the text.
+--
+end::param-timestamp-field[]
+
+tag::param-timestamp-format[]
+`timestamp_format`::
+(Optional, string) The Java time format of the timestamp field in the text.
++
+--
+Only a subset of Java time format letter groups are supported:
+
+* `a`
+* `d`
+* `dd`
+* `EEE`
+* `EEEE`
+* `H`
+* `HH`
+* `h`
+* `M`
+* `MM`
+* `MMM`
+* `MMMM`
+* `mm`
+* `ss`
+* `XX`
+* `XXX`
+* `yy`
+* `yyyy`
+* `zzz`
+
+Additionally `S` letter groups (fractional seconds) of length one to nine are
+supported providing they occur after `ss` and separated from the `ss` by a `.`,
+`,` or `:`. Spacing and punctuation is also permitted with the exception of `?`,
+newline and carriage return, together with literal text enclosed in single
+quotes. For example, `MM/dd HH.mm.ss,SSSSSS 'in' yyyy` is a valid override
+format.
+
+One valuable use case for this parameter is when the format is semi-structured
+text, there are multiple timestamp formats in the text, and you know which
+format corresponds to the primary timestamp, but you do not want to specify the
+full `grok_pattern`. Another is when the timestamp format is one that the
+structure finder does not consider by default.
+
+If this parameter is not specified, the structure finder chooses the best
+format from a built-in set.
+
+If the special value `null` is specified the structure finder will not look
+for a primary timestamp in the text. When the format is semi-structured text
+this will result in the structure finder treating the text as single-line
+messages.
+
+The following table provides the appropriate `timeformat` values for some example timestamps:
+
+|===
+| Timeformat                 | Presentation
+
+| yyyy-MM-dd HH:mm:ssZ       | 2019-04-20 13:15:22+0000
+| EEE, d MMM yyyy HH:mm:ss Z | Sat, 20 Apr 2019 13:15:22 +0000
+| dd.MM.yy HH:mm:ss.SSS      | 20.04.19 13:15:22.285
+|===
+
+Refer to
+https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[the Java date/time format documentation]
+for more information about date and time format syntax.
+
+--
+end::param-timestamp-format[]

+ 16 - 185
docs/reference/text-structure/apis/find-structure.asciidoc

@@ -1,6 +1,6 @@
 [role="xpack"]
 [[find-structure]]
-= Find structure API
+= Find text structure API
 
 Finds the structure of text. The text must
 contain data that is suitable to be ingested into the
@@ -55,190 +55,21 @@ chosen.
 [[find-structure-query-parms]]
 == {api-query-parms-title}
 
-`charset`::
-(Optional, string) The text's character set. It must be a character set that is
-supported by the JVM that {es} uses. For example, `UTF-8`, `UTF-16LE`,
-`windows-1252`, or `EUC-JP`. If this parameter is not specified, the structure
-finder chooses an appropriate character set.
-
-`column_names`::
-(Optional, string) If you have set `format` to `delimited`, you can specify the
-column names in a comma-separated list. If this parameter is not specified, the
-structure finder uses the column names from the header row of the text. If the
-text does not have a header role, columns are named "column1", "column2",
-"column3", etc.
-
-`delimiter`::
-(Optional, string) If you have set `format` to `delimited`, you can specify the
-character used to delimit the values in each row. Only a single character is
-supported; the delimiter cannot have multiple characters. By default, the API
-considers the following possibilities: comma, tab, semi-colon, and pipe (`|`).
-In this default scenario, all rows must have the same number of fields for the
-delimited format to be detected. If you specify a delimiter, up to 10% of the
-rows can have a different number of columns than the first row.
-
-`explain`::
-(Optional, Boolean) If this parameter is set to `true`, the response includes a
-field named `explanation`, which is an array of strings that indicate how the
-structure finder produced its result. The default value is `false`.
-
-`format`::
-(Optional, string) The high level structure of the text. Valid values are
-`ndjson`, `xml`, `delimited`, and `semi_structured_text`. By default, the API
-chooses the format. In this default scenario, all rows must have the same number
-of fields for a delimited format to be detected. If the `format` is set to
-`delimited` and the `delimiter` is not set, however, the API tolerates up to 5%
-of rows that have a different number of columns than the first row.
-
-`grok_pattern`::
-(Optional, string) If you have set `format` to `semi_structured_text`, you can
-specify a Grok pattern that is used to extract fields from every message in the
-text. The name of the timestamp field in the Grok pattern must match what is
-specified in the `timestamp_field` parameter. If that parameter is not
-specified, the name of the timestamp field in the Grok pattern must match
-"timestamp". If `grok_pattern` is not specified, the structure finder creates a
-Grok pattern.
-
-`ecs_compatibility`::
-(Optional, string) The mode of compatibility with ECS compliant Grok patterns.
-Use this parameter to specify whether to use ECS Grok patterns instead of
-legacy ones when the structure finder creates a Grok pattern. Valid values
-are `disabled` and `v1`. The default value is `disabled`. This setting primarily
-has an impact when a whole message Grok pattern such as `%{CATALINALOG}`
-matches the input. If the structure finder identifies a common structure but
-has no idea of meaning then generic field names such as `path`, `ipaddress`,
-`field1` and `field2` are used in the `grok_pattern` output, with the intention
-that a user who knows the meanings rename these fields before using it.
-`has_header_row`::
-(Optional, Boolean) If you have set `format` to `delimited`, you can use this
-parameter to indicate whether the column names are in the first row of the text.
-If this parameter is not specified, the structure finder guesses based on the
-similarity of the first row of the text to other rows.
-
-`line_merge_size_limit`::
-(Optional, unsigned integer) The maximum number of characters in a message when
-lines are merged to form messages while analyzing semi-structured text. The
-default is `10000`. If you have extremely long messages you may need to increase
-this, but be aware that this may lead to very long processing times if the way
-to group lines into messages is misdetected.
-
-`lines_to_sample`::
-(Optional, unsigned integer) The number of lines to include in the structural
-analysis, starting from the beginning of the text. The minimum is 2; the default
-is `1000`. If the value of this parameter is greater than the number of lines in
-the text, the analysis proceeds (as long as there are at least two lines in the
-text) for all of the lines.
-+
---
-NOTE: The number of lines and the variation of the lines affects the speed of
-the analysis. For example, if you upload text where the first 1000 lines
-are all variations on the same message, the analysis will find more commonality
-than would be seen with a bigger sample. If possible, however, it is more
-efficient to upload sample text with more variety in the first 1000 lines than
-to request analysis of 100000 lines to achieve some variety.
-
---
-
-`quote`::
-(Optional, string) If you have set `format` to `delimited`, you can specify the
-character used to quote the values in each row if they contain newlines or the
-delimiter character. Only a single character is supported. If this parameter is
-not specified, the default value is a double quote (`"`). If your delimited text
-format does not use quoting, a workaround is to set this argument to a character
-that does not appear anywhere in the sample.
-
-`should_trim_fields`::
-(Optional, Boolean) If you have set `format` to `delimited`, you can specify
-whether values between delimiters should have whitespace trimmed from them. If
-this parameter is not specified and the delimiter is pipe (`|`), the default
-value is `true`. Otherwise, the default value is `false`.
-
-`timeout`::
-(Optional, <<time-units,time units>>) Sets the maximum amount of time that the
-structure analysis make take. If the analysis is still running when the timeout
-expires then it will be aborted. The default value is 25 seconds.
-
-`timestamp_field`::
-(Optional, string) The name of the field that contains the primary timestamp of
-each record in the text. In particular, if the text were ingested into an index,
-this is the field that would be used to populate the `@timestamp` field.
-+
---
-If the `format` is `semi_structured_text`, this field must match the name of the
-appropriate extraction in the `grok_pattern`. Therefore, for semi-structured
-text, it is best not to specify this parameter unless `grok_pattern` is
-also specified.
-
-For structured text, if you specify this parameter, the field must exist
-within the text.
-
-If this parameter is not specified, the structure finder makes a decision about
-which field (if any) is the primary timestamp field. For structured text,
-it is not compulsory to have a timestamp in the text.
---
-
-`timestamp_format`::
-(Optional, string) The Java time format of the timestamp field in the text.
-+
---
-Only a subset of Java time format letter groups are supported:
-
-* `a`
-* `d`
-* `dd`
-* `EEE`
-* `EEEE`
-* `H`
-* `HH`
-* `h`
-* `M`
-* `MM`
-* `MMM`
-* `MMMM`
-* `mm`
-* `ss`
-* `XX`
-* `XXX`
-* `yy`
-* `yyyy`
-* `zzz`
-
-Additionally `S` letter groups (fractional seconds) of length one to nine are
-supported providing they occur after `ss` and separated from the `ss` by a `.`,
-`,` or `:`. Spacing and punctuation is also permitted with the exception of `?`,
-newline and carriage return, together with literal text enclosed in single
-quotes. For example, `MM/dd HH.mm.ss,SSSSSS 'in' yyyy` is a valid override
-format.
-
-One valuable use case for this parameter is when the format is semi-structured
-text, there are multiple timestamp formats in the text, and you know which
-format corresponds to the primary timestamp, but you do not want to specify the
-full `grok_pattern`. Another is when the timestamp format is one that the
-structure finder does not consider by default.
-
-If this parameter is not specified, the structure finder chooses the best
-format from a built-in set.
-
-If the special value `null` is specified the structure finder will not look
-for a primary timestamp in the text. When the format is semi-structured text
-this will result in the structure finder treating the text as single-line
-messages.
-
-The following table provides the appropriate `timeformat` values for some example timestamps:
-
-|===
-| Timeformat                 | Presentation
-
-| yyyy-MM-dd HH:mm:ssZ       | 2019-04-20 13:15:22+0000
-| EEE, d MMM yyyy HH:mm:ss Z | Sat, 20 Apr 2019 13:15:22 +0000
-| dd.MM.yy HH:mm:ss.SSS      | 20.04.19 13:15:22.285
-|===
-
-See
-https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[the Java date/time format documentation]
-for more information about date and time format syntax.
-
---
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-charset]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-column-names]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-delimiter]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-explain]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-format]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-grok-pattern]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-ecs-compatibility]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-has-header-row]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-line-merge-size-limit]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-lines-to-sample]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-quote]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-should-trim-fields]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timeout]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-field]
+include::{es-repo-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-format]
 
 [discrete]
 [[find-structure-request-body]]

+ 4 - 0
docs/reference/text-structure/apis/index.asciidoc

@@ -4,8 +4,12 @@
 
 You can use the following APIs to find text structures:
 
+* <<find-field-structure>>
+* <<find-message-structure>>
 * <<find-structure>>
 * <<test-grok-pattern>>
 
+include::find-field-structure.asciidoc[leveloffset=+2]
+include::find-message-structure.asciidoc[leveloffset=+2]
 include::find-structure.asciidoc[leveloffset=+2]
 include::test-grok-pattern.asciidoc[leveloffset=+2]

+ 90 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.find_field_structure.json

@@ -0,0 +1,90 @@
+{
+  "text_structure.find_field_structure":{
+    "documentation":{
+      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/find-field-structure.html",
+      "description":"Finds the structure of a text field in an index."
+    },
+    "stability":"stable",
+    "visibility":"public",
+    "headers":{
+      "accept": ["application/json"]
+    },
+    "url":{
+      "paths":[
+        {
+          "path":"/_text_structure/find_field_structure",
+          "methods":["GET"]
+        }
+      ]
+    },
+    "params":{
+      "index":{
+        "type":"string",
+        "description":"The index containing the analyzed field",
+        "required":true
+      },
+      "field":{
+        "type":"string",
+        "description":"The field that should be analyzed",
+        "required":true
+      },
+      "documents_to_sample":{
+        "type":"int",
+        "description":"How many documents should be included in the analysis",
+        "default":1000
+      },
+      "timeout":{
+        "type":"time",
+        "description":"Timeout after which the analysis will be aborted",
+        "default":"25s"
+      },
+      "format":{
+        "type":"enum",
+        "options":[
+          "ndjson",
+          "xml",
+          "delimited",
+          "semi_structured_text"
+        ],
+        "description":"Optional parameter to specify the high level file format"
+      },
+      "column_names":{
+        "type":"list",
+        "description":"Optional parameter containing a comma separated list of the column names for a delimited file"
+      },
+      "delimiter":{
+        "type":"string",
+        "description":"Optional parameter to specify the delimiter character for a delimited file - must be a single character"
+      },
+      "quote":{
+        "type":"string",
+        "description":"Optional parameter to specify the quote character for a delimited file - must be a single character"
+      },
+      "should_trim_fields":{
+        "type":"boolean",
+        "description":"Optional parameter to specify whether the values between delimiters in a delimited file should have whitespace trimmed from them"
+      },
+      "grok_pattern":{
+        "type":"string",
+        "description":"Optional parameter to specify the Grok pattern that should be used to extract fields from messages in a semi-structured text file"
+      },
+      "ecs_compatibility":{
+        "type":"string",
+        "description":"Optional parameter to specify the compatibility mode with ECS Grok patterns - may be either 'v1' or 'disabled'"
+      },
+      "timestamp_field":{
+        "type":"string",
+        "description":"Optional parameter to specify the timestamp field in the file"
+      },
+      "timestamp_format":{
+        "type":"string",
+        "description":"Optional parameter to specify the timestamp format in the file - may be either a Joda or Java time format"
+      },
+      "explain":{
+        "type":"boolean",
+        "description":"Whether to include a commentary on how the structure was derived",
+        "default":false
+      }
+    }
+  }
+}

+ 80 - 0
rest-api-spec/src/main/resources/rest-api-spec/api/text_structure.find_message_structure.json

@@ -0,0 +1,80 @@
+{
+  "text_structure.find_message_structure":{
+    "documentation":{
+      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/find-message-structure.html",
+      "description":"Finds the structure of a list of messages. The messages must contain data that is suitable to be ingested into Elasticsearch."
+    },
+    "stability":"stable",
+    "visibility":"public",
+    "headers":{
+      "accept": [ "application/json"],
+      "content_type": ["application/json"]
+    },
+    "url":{
+      "paths":[
+        {
+          "path":"/_text_structure/find_message_structure",
+          "methods":["GET", "POST"]
+        }
+      ]
+    },
+    "params":{
+      "timeout":{
+        "type":"time",
+        "description":"Timeout after which the analysis will be aborted",
+        "default":"25s"
+      },
+      "format":{
+        "type":"enum",
+        "options":[
+          "ndjson",
+          "xml",
+          "delimited",
+          "semi_structured_text"
+        ],
+        "description":"Optional parameter to specify the high level file format"
+      },
+      "column_names":{
+        "type":"list",
+        "description":"Optional parameter containing a comma separated list of the column names for a delimited file"
+      },
+      "delimiter":{
+        "type":"string",
+        "description":"Optional parameter to specify the delimiter character for a delimited file - must be a single character"
+      },
+      "quote":{
+        "type":"string",
+        "description":"Optional parameter to specify the quote character for a delimited file - must be a single character"
+      },
+      "should_trim_fields":{
+        "type":"boolean",
+        "description":"Optional parameter to specify whether the values between delimiters in a delimited file should have whitespace trimmed from them"
+      },
+      "grok_pattern":{
+        "type":"string",
+        "description":"Optional parameter to specify the Grok pattern that should be used to extract fields from messages in a semi-structured text file"
+      },
+      "ecs_compatibility":{
+        "type":"string",
+        "description":"Optional parameter to specify the compatibility mode with ECS Grok patterns - may be either 'v1' or 'disabled'"
+      },
+      "timestamp_field":{
+        "type":"string",
+        "description":"Optional parameter to specify the timestamp field in the file"
+      },
+      "timestamp_format":{
+        "type":"string",
+        "description":"Optional parameter to specify the timestamp format in the file - may be either a Joda or Java time format"
+      },
+      "explain":{
+        "type":"boolean",
+        "description":"Whether to include a commentary on how the structure was derived",
+        "default":false
+      }
+    },
+    "body":{
+      "description":"JSON object with one field [messages], containing an array of messages to be analyzed",
+      "required":true
+    }
+  }
+}

+ 377 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/AbstractFindStructureRequest.java

@@ -0,0 +1,377 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.textstructure.action;
+
+import org.elasticsearch.TransportVersions;
+import org.elasticsearch.action.ActionRequest;
+import org.elasticsearch.action.ActionRequestValidationException;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.grok.GrokBuiltinPatterns;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+
+import static org.elasticsearch.action.ValidateActions.addValidationError;
+
+public abstract class AbstractFindStructureRequest extends ActionRequest {
+
+    public static final int MIN_SAMPLE_LINE_COUNT = 2;
+
+    public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
+    public static final ParseField DOCUMENTS_TO_SAMPLE = new ParseField("documents_to_sample");
+    public static final ParseField LINE_MERGE_SIZE_LIMIT = new ParseField("line_merge_size_limit");
+    public static final ParseField TIMEOUT = new ParseField("timeout");
+    public static final ParseField CHARSET = TextStructure.CHARSET;
+    public static final ParseField FORMAT = TextStructure.FORMAT;
+    public static final ParseField COLUMN_NAMES = TextStructure.COLUMN_NAMES;
+    public static final ParseField HAS_HEADER_ROW = TextStructure.HAS_HEADER_ROW;
+    public static final ParseField DELIMITER = TextStructure.DELIMITER;
+    public static final ParseField QUOTE = TextStructure.QUOTE;
+    public static final ParseField SHOULD_TRIM_FIELDS = TextStructure.SHOULD_TRIM_FIELDS;
+    public static final ParseField GROK_PATTERN = TextStructure.GROK_PATTERN;
+    // This one is plural in FileStructure, but singular in FileStructureOverrides
+    public static final ParseField TIMESTAMP_FORMAT = new ParseField("timestamp_format");
+    public static final ParseField TIMESTAMP_FIELD = TextStructure.TIMESTAMP_FIELD;
+
+    public static final ParseField ECS_COMPATIBILITY = TextStructure.ECS_COMPATIBILITY;
+
+    private static final String ARG_INCOMPATIBLE_WITH_FORMAT_TEMPLATE = "[%s] may only be specified if ["
+        + FORMAT.getPreferredName()
+        + "] is [%s]";
+
+    private Integer linesToSample;
+    private Integer lineMergeSizeLimit;
+    private TimeValue timeout;
+    private String charset;
+    private TextStructure.Format format;
+    private List<String> columnNames;
+    private Boolean hasHeaderRow;
+    private Character delimiter;
+    private Character quote;
+    private Boolean shouldTrimFields;
+    private String grokPattern;
+    private String ecsCompatibility;
+    private String timestampFormat;
+    private String timestampField;
+
+    AbstractFindStructureRequest() {}
+
+    AbstractFindStructureRequest(StreamInput in) throws IOException {
+        super(in);
+        linesToSample = in.readOptionalVInt();
+        lineMergeSizeLimit = in.readOptionalVInt();
+        timeout = in.readOptionalTimeValue();
+        charset = in.readOptionalString();
+        format = in.readBoolean() ? in.readEnum(TextStructure.Format.class) : null;
+        columnNames = in.readBoolean() ? in.readStringCollectionAsList() : null;
+        hasHeaderRow = in.readOptionalBoolean();
+        delimiter = in.readBoolean() ? (char) in.readVInt() : null;
+        quote = in.readBoolean() ? (char) in.readVInt() : null;
+        shouldTrimFields = in.readOptionalBoolean();
+        grokPattern = in.readOptionalString();
+        if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_5_0)) {
+            ecsCompatibility = in.readOptionalString();
+        } else {
+            ecsCompatibility = null;
+        }
+        timestampFormat = in.readOptionalString();
+        timestampField = in.readOptionalString();
+    }
+
+    public Integer getLinesToSample() {
+        return linesToSample;
+    }
+
+    public void setLinesToSample(Integer linesToSample) {
+        this.linesToSample = linesToSample;
+    }
+
+    public Integer getLineMergeSizeLimit() {
+        return lineMergeSizeLimit;
+    }
+
+    public void setLineMergeSizeLimit(Integer lineMergeSizeLimit) {
+        this.lineMergeSizeLimit = lineMergeSizeLimit;
+    }
+
+    public TimeValue getTimeout() {
+        return timeout;
+    }
+
+    public void setTimeout(TimeValue timeout) {
+        this.timeout = timeout;
+    }
+
+    public String getCharset() {
+        return charset;
+    }
+
+    public void setCharset(String charset) {
+        this.charset = (charset == null || charset.isEmpty()) ? null : charset;
+    }
+
+    public TextStructure.Format getFormat() {
+        return format;
+    }
+
+    public void setFormat(TextStructure.Format format) {
+        this.format = format;
+    }
+
+    public void setFormat(String format) {
+        this.format = (format == null || format.isEmpty()) ? null : TextStructure.Format.fromString(format);
+    }
+
+    public List<String> getColumnNames() {
+        return columnNames;
+    }
+
+    public void setColumnNames(List<String> columnNames) {
+        this.columnNames = (columnNames == null || columnNames.isEmpty()) ? null : columnNames;
+    }
+
+    public void setColumnNames(String[] columnNames) {
+        this.columnNames = (columnNames == null || columnNames.length == 0) ? null : Arrays.asList(columnNames);
+    }
+
+    public Boolean getHasHeaderRow() {
+        return hasHeaderRow;
+    }
+
+    public void setHasHeaderRow(Boolean hasHeaderRow) {
+        this.hasHeaderRow = hasHeaderRow;
+    }
+
+    public Character getDelimiter() {
+        return delimiter;
+    }
+
+    public void setDelimiter(Character delimiter) {
+        this.delimiter = delimiter;
+    }
+
+    public void setDelimiter(String delimiter) {
+        if (delimiter == null || delimiter.isEmpty()) {
+            this.delimiter = null;
+        } else if (delimiter.length() == 1) {
+            this.delimiter = delimiter.charAt(0);
+        } else {
+            throw new IllegalArgumentException(DELIMITER.getPreferredName() + " must be a single character");
+        }
+    }
+
+    public Character getQuote() {
+        return quote;
+    }
+
+    public void setQuote(Character quote) {
+        this.quote = quote;
+    }
+
+    public void setQuote(String quote) {
+        if (quote == null || quote.isEmpty()) {
+            this.quote = null;
+        } else if (quote.length() == 1) {
+            this.quote = quote.charAt(0);
+        } else {
+            throw new IllegalArgumentException(QUOTE.getPreferredName() + " must be a single character");
+        }
+    }
+
+    public Boolean getShouldTrimFields() {
+        return shouldTrimFields;
+    }
+
+    public void setShouldTrimFields(Boolean shouldTrimFields) {
+        this.shouldTrimFields = shouldTrimFields;
+    }
+
+    public String getGrokPattern() {
+        return grokPattern;
+    }
+
+    public void setGrokPattern(String grokPattern) {
+        this.grokPattern = (grokPattern == null || grokPattern.isEmpty()) ? null : grokPattern;
+    }
+
+    public String getEcsCompatibility() {
+        return ecsCompatibility;
+    }
+
+    public void setEcsCompatibility(String ecsCompatibility) {
+        this.ecsCompatibility = (ecsCompatibility == null || ecsCompatibility.isEmpty()) ? null : ecsCompatibility;
+    }
+
+    public String getTimestampFormat() {
+        return timestampFormat;
+    }
+
+    public void setTimestampFormat(String timestampFormat) {
+        this.timestampFormat = (timestampFormat == null || timestampFormat.isEmpty()) ? null : timestampFormat;
+    }
+
+    public String getTimestampField() {
+        return timestampField;
+    }
+
+    public void setTimestampField(String timestampField) {
+        this.timestampField = (timestampField == null || timestampField.isEmpty()) ? null : timestampField;
+    }
+
+    private static ActionRequestValidationException addIncompatibleArgError(
+        ParseField arg,
+        TextStructure.Format format,
+        ActionRequestValidationException validationException
+    ) {
+        return addValidationError(
+            String.format(Locale.ROOT, ARG_INCOMPATIBLE_WITH_FORMAT_TEMPLATE, arg.getPreferredName(), format),
+            validationException
+        );
+    }
+
+    @Override
+    public ActionRequestValidationException validate() {
+        ActionRequestValidationException validationException = null;
+        if (linesToSample != null && linesToSample < MIN_SAMPLE_LINE_COUNT) {
+            validationException = addValidationError(
+                "[" + LINES_TO_SAMPLE.getPreferredName() + "] must be at least [" + MIN_SAMPLE_LINE_COUNT + "] if specified",
+                validationException
+            );
+        }
+        if (lineMergeSizeLimit != null && lineMergeSizeLimit <= 0) {
+            validationException = addValidationError(
+                "[" + LINE_MERGE_SIZE_LIMIT.getPreferredName() + "] must be positive if specified",
+                validationException
+            );
+        }
+        if (format != TextStructure.Format.DELIMITED) {
+            if (columnNames != null) {
+                validationException = addIncompatibleArgError(COLUMN_NAMES, TextStructure.Format.DELIMITED, validationException);
+            }
+            if (hasHeaderRow != null) {
+                validationException = addIncompatibleArgError(HAS_HEADER_ROW, TextStructure.Format.DELIMITED, validationException);
+            }
+            if (delimiter != null) {
+                validationException = addIncompatibleArgError(DELIMITER, TextStructure.Format.DELIMITED, validationException);
+            }
+            if (quote != null) {
+                validationException = addIncompatibleArgError(QUOTE, TextStructure.Format.DELIMITED, validationException);
+            }
+            if (shouldTrimFields != null) {
+                validationException = addIncompatibleArgError(SHOULD_TRIM_FIELDS, TextStructure.Format.DELIMITED, validationException);
+            }
+        }
+        if (format != TextStructure.Format.SEMI_STRUCTURED_TEXT) {
+            if (grokPattern != null) {
+                validationException = addIncompatibleArgError(GROK_PATTERN, TextStructure.Format.SEMI_STRUCTURED_TEXT, validationException);
+            }
+        }
+
+        if (ecsCompatibility != null && GrokBuiltinPatterns.isValidEcsCompatibilityMode(ecsCompatibility) == false) {
+            validationException = addValidationError(
+                "["
+                    + ECS_COMPATIBILITY.getPreferredName()
+                    + "] must be one of ["
+                    + String.join(", ", GrokBuiltinPatterns.ECS_COMPATIBILITY_MODES)
+                    + "] if specified",
+                validationException
+            );
+        }
+
+        return validationException;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        super.writeTo(out);
+        out.writeOptionalVInt(linesToSample);
+        out.writeOptionalVInt(lineMergeSizeLimit);
+        out.writeOptionalTimeValue(timeout);
+        out.writeOptionalString(charset);
+        if (format == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeEnum(format);
+        }
+        if (columnNames == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeStringCollection(columnNames);
+        }
+        out.writeOptionalBoolean(hasHeaderRow);
+        if (delimiter == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeVInt(delimiter);
+        }
+        if (quote == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeVInt(quote);
+        }
+        out.writeOptionalBoolean(shouldTrimFields);
+        out.writeOptionalString(grokPattern);
+        if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_5_0)) {
+            out.writeOptionalString(ecsCompatibility);
+        }
+        out.writeOptionalString(timestampFormat);
+        out.writeOptionalString(timestampField);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(
+            linesToSample,
+            lineMergeSizeLimit,
+            timeout,
+            charset,
+            format,
+            columnNames,
+            hasHeaderRow,
+            delimiter,
+            grokPattern,
+            ecsCompatibility,
+            timestampFormat,
+            timestampField
+        );
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+        AbstractFindStructureRequest that = (AbstractFindStructureRequest) other;
+        return Objects.equals(this.linesToSample, that.linesToSample)
+            && Objects.equals(this.lineMergeSizeLimit, that.lineMergeSizeLimit)
+            && Objects.equals(this.timeout, that.timeout)
+            && Objects.equals(this.charset, that.charset)
+            && Objects.equals(this.format, that.format)
+            && Objects.equals(this.columnNames, that.columnNames)
+            && Objects.equals(this.hasHeaderRow, that.hasHeaderRow)
+            && Objects.equals(this.delimiter, that.delimiter)
+            && Objects.equals(this.grokPattern, that.grokPattern)
+            && Objects.equals(this.ecsCompatibility, that.ecsCompatibility)
+            && Objects.equals(this.timestampFormat, that.timestampFormat)
+            && Objects.equals(this.timestampField, that.timestampField);
+    }
+}

+ 98 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindFieldStructureAction.java

@@ -0,0 +1,98 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.core.textstructure.action;
+
+import org.elasticsearch.action.ActionRequestValidationException;
+import org.elasticsearch.action.ActionType;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.xcontent.ParseField;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import static org.elasticsearch.action.ValidateActions.addValidationError;
+
+public class FindFieldStructureAction extends ActionType<FindStructureResponse> {
+
+    public static final FindFieldStructureAction INSTANCE = new FindFieldStructureAction();
+    public static final String NAME = "cluster:monitor/text_structure/find_field_structure";
+
+    private FindFieldStructureAction() {
+        super(NAME);
+    }
+
+    public static class Request extends AbstractFindStructureRequest {
+
+        public static final ParseField INDEX = new ParseField("index");
+        public static final ParseField FIELD = new ParseField("field");
+
+        private String index;
+        private String field;
+
+        public Request() {}
+
+        public Request(StreamInput in) throws IOException {
+            super(in);
+            index = in.readString();
+            field = in.readString();
+        }
+
+        public String getIndex() {
+            return index;
+        }
+
+        public void setIndex(String index) {
+            this.index = index;
+        }
+
+        public String getField() {
+            return field;
+        }
+
+        public void setField(String field) {
+            this.field = field;
+        }
+
+        @Override
+        public ActionRequestValidationException validate() {
+            ActionRequestValidationException validationException = super.validate();
+            if (Strings.isNullOrEmpty(index)) {
+                validationException = addValidationError("index must be specified", validationException);
+            }
+            if (Strings.isNullOrEmpty(field)) {
+                validationException = addValidationError("field must be specified", validationException);
+            }
+            return validationException;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            super.writeTo(out);
+            out.writeString(index);
+            out.writeString(field);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), field, index);
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) {
+                return true;
+            }
+            if (other == null || getClass() != other.getClass()) {
+                return false;
+            }
+            Request that = (Request) other;
+            return super.equals(other) && Objects.equals(this.index, that.index) && Objects.equals(this.field, that.field);
+        }
+    }
+}

+ 97 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindMessageStructureAction.java

@@ -0,0 +1,97 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.core.textstructure.action;
+
+import org.elasticsearch.action.ActionRequestValidationException;
+import org.elasticsearch.action.ActionType;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.xcontent.ObjectParser;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+import static org.elasticsearch.action.ValidateActions.addValidationError;
+
+public class FindMessageStructureAction extends ActionType<FindStructureResponse> {
+
+    public static final FindMessageStructureAction INSTANCE = new FindMessageStructureAction();
+    public static final String NAME = "cluster:monitor/text_structure/find_message_structure";
+
+    private FindMessageStructureAction() {
+        super(NAME);
+    }
+
+    public static class Request extends AbstractFindStructureRequest {
+
+        public static final ParseField MESSAGES = new ParseField("messages");
+
+        private List<String> messages;
+
+        private static final ObjectParser<Request, Void> PARSER = createParser();
+
+        private static ObjectParser<Request, Void> createParser() {
+            ObjectParser<Request, Void> parser = new ObjectParser<>("text_structure/find_message_structure", false, Request::new);
+            parser.declareStringArray(Request::setMessages, MESSAGES);
+            return parser;
+        }
+
+        public Request() {}
+
+        public Request(StreamInput in) throws IOException {
+            super(in);
+            messages = in.readStringCollectionAsList();
+        }
+
+        public static Request parseRequest(XContentParser parser) throws IOException {
+            return PARSER.parse(parser, null);
+        }
+
+        public List<String> getMessages() {
+            return messages;
+        }
+
+        public void setMessages(List<String> messages) {
+            this.messages = messages;
+        }
+
+        @Override
+        public ActionRequestValidationException validate() {
+            ActionRequestValidationException validationException = super.validate();
+            if (messages == null || messages.isEmpty()) {
+                validationException = addValidationError("messages must be specified", validationException);
+            }
+            return validationException;
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            super.writeTo(out);
+            out.writeStringCollection(messages);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), messages);
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) {
+                return true;
+            }
+            if (other == null || getClass() != other.getClass()) {
+                return false;
+            }
+            Request that = (Request) other;
+            return super.equals(other) && Objects.equals(this.messages, that.messages);
+        }
+    }
+}

+ 5 - 384
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindStructureAction.java

@@ -6,290 +6,37 @@
  */
 package org.elasticsearch.xpack.core.textstructure.action;
 
-import org.elasticsearch.TransportVersions;
-import org.elasticsearch.action.ActionRequest;
 import org.elasticsearch.action.ActionRequestValidationException;
-import org.elasticsearch.action.ActionResponse;
 import org.elasticsearch.action.ActionType;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.grok.GrokBuiltinPatterns;
-import org.elasticsearch.xcontent.ParseField;
-import org.elasticsearch.xcontent.ToXContentObject;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
 import java.util.Objects;
 
 import static org.elasticsearch.action.ValidateActions.addValidationError;
 
-public class FindStructureAction extends ActionType<FindStructureAction.Response> {
+public class FindStructureAction extends ActionType<FindStructureResponse> {
 
     public static final FindStructureAction INSTANCE = new FindStructureAction();
     public static final String NAME = "cluster:monitor/text_structure/findstructure";
 
-    public static final int MIN_SAMPLE_LINE_COUNT = 2;
-
     private FindStructureAction() {
         super(NAME);
     }
 
-    public static class Response extends ActionResponse implements ToXContentObject, Writeable {
-
-        private final TextStructure textStructure;
-
-        public Response(TextStructure textStructure) {
-            this.textStructure = textStructure;
-        }
-
-        Response(StreamInput in) throws IOException {
-            super(in);
-            textStructure = new TextStructure(in);
-        }
-
-        @Override
-        public void writeTo(StreamOutput out) throws IOException {
-            textStructure.writeTo(out);
-        }
-
-        @Override
-        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-            textStructure.toXContent(builder, params);
-            return builder;
-        }
-
-        @Override
-        public int hashCode() {
-            return Objects.hash(textStructure);
-        }
-
-        @Override
-        public boolean equals(Object other) {
-
-            if (this == other) {
-                return true;
-            }
-
-            if (other == null || getClass() != other.getClass()) {
-                return false;
-            }
-
-            FindStructureAction.Response that = (FindStructureAction.Response) other;
-            return Objects.equals(textStructure, that.textStructure);
-        }
-    }
-
-    public static class Request extends ActionRequest {
-
-        public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
-        public static final ParseField LINE_MERGE_SIZE_LIMIT = new ParseField("line_merge_size_limit");
-        public static final ParseField TIMEOUT = new ParseField("timeout");
-        public static final ParseField CHARSET = TextStructure.CHARSET;
-        public static final ParseField FORMAT = TextStructure.FORMAT;
-        public static final ParseField COLUMN_NAMES = TextStructure.COLUMN_NAMES;
-        public static final ParseField HAS_HEADER_ROW = TextStructure.HAS_HEADER_ROW;
-        public static final ParseField DELIMITER = TextStructure.DELIMITER;
-        public static final ParseField QUOTE = TextStructure.QUOTE;
-        public static final ParseField SHOULD_TRIM_FIELDS = TextStructure.SHOULD_TRIM_FIELDS;
-        public static final ParseField GROK_PATTERN = TextStructure.GROK_PATTERN;
-        // This one is plural in FileStructure, but singular in FileStructureOverrides
-        public static final ParseField TIMESTAMP_FORMAT = new ParseField("timestamp_format");
-        public static final ParseField TIMESTAMP_FIELD = TextStructure.TIMESTAMP_FIELD;
+    public static class Request extends AbstractFindStructureRequest {
 
-        public static final ParseField ECS_COMPATIBILITY = TextStructure.ECS_COMPATIBILITY;
-
-        private static final String ARG_INCOMPATIBLE_WITH_FORMAT_TEMPLATE = "[%s] may only be specified if ["
-            + FORMAT.getPreferredName()
-            + "] is [%s]";
-
-        private Integer linesToSample;
-        private Integer lineMergeSizeLimit;
-        private TimeValue timeout;
-        private String charset;
-        private TextStructure.Format format;
-        private List<String> columnNames;
-        private Boolean hasHeaderRow;
-        private Character delimiter;
-        private Character quote;
-        private Boolean shouldTrimFields;
-        private String grokPattern;
-        private String ecsCompatibility;
-        private String timestampFormat;
-        private String timestampField;
         private BytesReference sample;
 
         public Request() {}
 
         public Request(StreamInput in) throws IOException {
             super(in);
-            linesToSample = in.readOptionalVInt();
-            lineMergeSizeLimit = in.readOptionalVInt();
-            timeout = in.readOptionalTimeValue();
-            charset = in.readOptionalString();
-            format = in.readBoolean() ? in.readEnum(TextStructure.Format.class) : null;
-            columnNames = in.readBoolean() ? in.readStringCollectionAsList() : null;
-            hasHeaderRow = in.readOptionalBoolean();
-            delimiter = in.readBoolean() ? (char) in.readVInt() : null;
-            quote = in.readBoolean() ? (char) in.readVInt() : null;
-            shouldTrimFields = in.readOptionalBoolean();
-            grokPattern = in.readOptionalString();
-            if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_5_0)) {
-                ecsCompatibility = in.readOptionalString();
-            } else {
-                ecsCompatibility = null;
-            }
-            timestampFormat = in.readOptionalString();
-            timestampField = in.readOptionalString();
             sample = in.readBytesReference();
         }
 
-        public Integer getLinesToSample() {
-            return linesToSample;
-        }
-
-        public void setLinesToSample(Integer linesToSample) {
-            this.linesToSample = linesToSample;
-        }
-
-        public Integer getLineMergeSizeLimit() {
-            return lineMergeSizeLimit;
-        }
-
-        public void setLineMergeSizeLimit(Integer lineMergeSizeLimit) {
-            this.lineMergeSizeLimit = lineMergeSizeLimit;
-        }
-
-        public TimeValue getTimeout() {
-            return timeout;
-        }
-
-        public void setTimeout(TimeValue timeout) {
-            this.timeout = timeout;
-        }
-
-        public String getCharset() {
-            return charset;
-        }
-
-        public void setCharset(String charset) {
-            this.charset = (charset == null || charset.isEmpty()) ? null : charset;
-        }
-
-        public TextStructure.Format getFormat() {
-            return format;
-        }
-
-        public void setFormat(TextStructure.Format format) {
-            this.format = format;
-        }
-
-        public void setFormat(String format) {
-            this.format = (format == null || format.isEmpty()) ? null : TextStructure.Format.fromString(format);
-        }
-
-        public List<String> getColumnNames() {
-            return columnNames;
-        }
-
-        public void setColumnNames(List<String> columnNames) {
-            this.columnNames = (columnNames == null || columnNames.isEmpty()) ? null : columnNames;
-        }
-
-        public void setColumnNames(String[] columnNames) {
-            this.columnNames = (columnNames == null || columnNames.length == 0) ? null : Arrays.asList(columnNames);
-        }
-
-        public Boolean getHasHeaderRow() {
-            return hasHeaderRow;
-        }
-
-        public void setHasHeaderRow(Boolean hasHeaderRow) {
-            this.hasHeaderRow = hasHeaderRow;
-        }
-
-        public Character getDelimiter() {
-            return delimiter;
-        }
-
-        public void setDelimiter(Character delimiter) {
-            this.delimiter = delimiter;
-        }
-
-        public void setDelimiter(String delimiter) {
-            if (delimiter == null || delimiter.isEmpty()) {
-                this.delimiter = null;
-            } else if (delimiter.length() == 1) {
-                this.delimiter = delimiter.charAt(0);
-            } else {
-                throw new IllegalArgumentException(DELIMITER.getPreferredName() + " must be a single character");
-            }
-        }
-
-        public Character getQuote() {
-            return quote;
-        }
-
-        public void setQuote(Character quote) {
-            this.quote = quote;
-        }
-
-        public void setQuote(String quote) {
-            if (quote == null || quote.isEmpty()) {
-                this.quote = null;
-            } else if (quote.length() == 1) {
-                this.quote = quote.charAt(0);
-            } else {
-                throw new IllegalArgumentException(QUOTE.getPreferredName() + " must be a single character");
-            }
-        }
-
-        public Boolean getShouldTrimFields() {
-            return shouldTrimFields;
-        }
-
-        public void setShouldTrimFields(Boolean shouldTrimFields) {
-            this.shouldTrimFields = shouldTrimFields;
-        }
-
-        public String getGrokPattern() {
-            return grokPattern;
-        }
-
-        public void setGrokPattern(String grokPattern) {
-            this.grokPattern = (grokPattern == null || grokPattern.isEmpty()) ? null : grokPattern;
-        }
-
-        public String getEcsCompatibility() {
-            return ecsCompatibility;
-        }
-
-        public void setEcsCompatibility(String ecsCompatibility) {
-            this.ecsCompatibility = (ecsCompatibility == null || ecsCompatibility.isEmpty()) ? null : ecsCompatibility;
-        }
-
-        public String getTimestampFormat() {
-            return timestampFormat;
-        }
-
-        public void setTimestampFormat(String timestampFormat) {
-            this.timestampFormat = (timestampFormat == null || timestampFormat.isEmpty()) ? null : timestampFormat;
-        }
-
-        public String getTimestampField() {
-            return timestampField;
-        }
-
-        public void setTimestampField(String timestampField) {
-            this.timestampField = (timestampField == null || timestampField.isEmpty()) ? null : timestampField;
-        }
-
         public BytesReference getSample() {
             return sample;
         }
@@ -298,70 +45,9 @@ public class FindStructureAction extends ActionType<FindStructureAction.Response
             this.sample = sample;
         }
 
-        private static ActionRequestValidationException addIncompatibleArgError(
-            ParseField arg,
-            TextStructure.Format format,
-            ActionRequestValidationException validationException
-        ) {
-            return addValidationError(
-                String.format(Locale.ROOT, ARG_INCOMPATIBLE_WITH_FORMAT_TEMPLATE, arg.getPreferredName(), format),
-                validationException
-            );
-        }
-
         @Override
         public ActionRequestValidationException validate() {
-            ActionRequestValidationException validationException = null;
-            if (linesToSample != null && linesToSample < MIN_SAMPLE_LINE_COUNT) {
-                validationException = addValidationError(
-                    "[" + LINES_TO_SAMPLE.getPreferredName() + "] must be at least [" + MIN_SAMPLE_LINE_COUNT + "] if specified",
-                    validationException
-                );
-            }
-            if (lineMergeSizeLimit != null && lineMergeSizeLimit <= 0) {
-                validationException = addValidationError(
-                    "[" + LINE_MERGE_SIZE_LIMIT.getPreferredName() + "] must be positive if specified",
-                    validationException
-                );
-            }
-            if (format != TextStructure.Format.DELIMITED) {
-                if (columnNames != null) {
-                    validationException = addIncompatibleArgError(COLUMN_NAMES, TextStructure.Format.DELIMITED, validationException);
-                }
-                if (hasHeaderRow != null) {
-                    validationException = addIncompatibleArgError(HAS_HEADER_ROW, TextStructure.Format.DELIMITED, validationException);
-                }
-                if (delimiter != null) {
-                    validationException = addIncompatibleArgError(DELIMITER, TextStructure.Format.DELIMITED, validationException);
-                }
-                if (quote != null) {
-                    validationException = addIncompatibleArgError(QUOTE, TextStructure.Format.DELIMITED, validationException);
-                }
-                if (shouldTrimFields != null) {
-                    validationException = addIncompatibleArgError(SHOULD_TRIM_FIELDS, TextStructure.Format.DELIMITED, validationException);
-                }
-            }
-            if (format != TextStructure.Format.SEMI_STRUCTURED_TEXT) {
-                if (grokPattern != null) {
-                    validationException = addIncompatibleArgError(
-                        GROK_PATTERN,
-                        TextStructure.Format.SEMI_STRUCTURED_TEXT,
-                        validationException
-                    );
-                }
-            }
-
-            if (ecsCompatibility != null && GrokBuiltinPatterns.isValidEcsCompatibilityMode(ecsCompatibility) == false) {
-                validationException = addValidationError(
-                    "["
-                        + ECS_COMPATIBILITY.getPreferredName()
-                        + "] must be one of ["
-                        + String.join(", ", GrokBuiltinPatterns.ECS_COMPATIBILITY_MODES)
-                        + "] if specified",
-                    validationException
-                );
-            }
-
+            ActionRequestValidationException validationException = super.validate();
             if (sample == null || sample.length() == 0) {
                 validationException = addValidationError("sample must be specified", validationException);
             }
@@ -371,89 +57,24 @@ public class FindStructureAction extends ActionType<FindStructureAction.Response
         @Override
         public void writeTo(StreamOutput out) throws IOException {
             super.writeTo(out);
-            out.writeOptionalVInt(linesToSample);
-            out.writeOptionalVInt(lineMergeSizeLimit);
-            out.writeOptionalTimeValue(timeout);
-            out.writeOptionalString(charset);
-            if (format == null) {
-                out.writeBoolean(false);
-            } else {
-                out.writeBoolean(true);
-                out.writeEnum(format);
-            }
-            if (columnNames == null) {
-                out.writeBoolean(false);
-            } else {
-                out.writeBoolean(true);
-                out.writeStringCollection(columnNames);
-            }
-            out.writeOptionalBoolean(hasHeaderRow);
-            if (delimiter == null) {
-                out.writeBoolean(false);
-            } else {
-                out.writeBoolean(true);
-                out.writeVInt(delimiter);
-            }
-            if (quote == null) {
-                out.writeBoolean(false);
-            } else {
-                out.writeBoolean(true);
-                out.writeVInt(quote);
-            }
-            out.writeOptionalBoolean(shouldTrimFields);
-            out.writeOptionalString(grokPattern);
-            if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_5_0)) {
-                out.writeOptionalString(ecsCompatibility);
-            }
-            out.writeOptionalString(timestampFormat);
-            out.writeOptionalString(timestampField);
             out.writeBytesReference(sample);
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(
-                linesToSample,
-                lineMergeSizeLimit,
-                timeout,
-                charset,
-                format,
-                columnNames,
-                hasHeaderRow,
-                delimiter,
-                grokPattern,
-                ecsCompatibility,
-                timestampFormat,
-                timestampField,
-                sample
-            );
+            return Objects.hash(super.hashCode(), sample);
         }
 
         @Override
         public boolean equals(Object other) {
-
             if (this == other) {
                 return true;
             }
-
             if (other == null || getClass() != other.getClass()) {
                 return false;
             }
-
             Request that = (Request) other;
-            return Objects.equals(this.linesToSample, that.linesToSample)
-                && Objects.equals(this.lineMergeSizeLimit, that.lineMergeSizeLimit)
-                && Objects.equals(this.timeout, that.timeout)
-                && Objects.equals(this.charset, that.charset)
-                && Objects.equals(this.format, that.format)
-                && Objects.equals(this.columnNames, that.columnNames)
-                && Objects.equals(this.hasHeaderRow, that.hasHeaderRow)
-                && Objects.equals(this.delimiter, that.delimiter)
-                && Objects.equals(this.grokPattern, that.grokPattern)
-                && Objects.equals(this.ecsCompatibility, that.ecsCompatibility)
-                && Objects.equals(this.timestampFormat, that.timestampFormat)
-                && Objects.equals(this.timestampField, that.timestampField)
-                && Objects.equals(this.sample, that.sample);
+            return super.equals(other) && Objects.equals(this.sample, that.sample);
         }
     }
 }

+ 61 - 0
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/textstructure/action/FindStructureResponse.java

@@ -0,0 +1,61 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.textstructure.action;
+
+import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.xcontent.ToXContentObject;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class FindStructureResponse extends ActionResponse implements ToXContentObject, Writeable {
+
+    private final TextStructure textStructure;
+
+    public FindStructureResponse(TextStructure textStructure) {
+        this.textStructure = textStructure;
+    }
+
+    FindStructureResponse(StreamInput in) throws IOException {
+        super(in);
+        textStructure = new TextStructure(in);
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        textStructure.writeTo(out);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        textStructure.toXContent(builder, params);
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(textStructure);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+        FindStructureResponse that = (FindStructureResponse) other;
+        return Objects.equals(textStructure, that.textStructure);
+    }
+}

+ 0 - 29
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/textstructure/action/FindTextStructureActionResponseTests.java

@@ -1,29 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-package org.elasticsearch.xpack.core.textstructure.action;
-
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.test.AbstractWireSerializingTestCase;
-import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructureTests;
-
-public class FindTextStructureActionResponseTests extends AbstractWireSerializingTestCase<FindStructureAction.Response> {
-
-    @Override
-    protected FindStructureAction.Response createTestInstance() {
-        return new FindStructureAction.Response(TextStructureTests.createTestFileStructure());
-    }
-
-    @Override
-    protected FindStructureAction.Response mutateInstance(FindStructureAction.Response instance) {
-        return null;// TODO implement https://github.com/elastic/elasticsearch/issues/25929
-    }
-
-    @Override
-    protected Writeable.Reader<FindStructureAction.Response> instanceReader() {
-        return FindStructureAction.Response::new;
-    }
-}

+ 33 - 0
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/textstructure/action/FindTextStructureResponseTests.java

@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.core.textstructure.action;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructureTests;
+
+public class FindTextStructureResponseTests extends AbstractWireSerializingTestCase<FindStructureResponse> {
+
+    @Override
+    protected FindStructureResponse createTestInstance() {
+        return new FindStructureResponse(TextStructureTests.createTestFileStructure());
+    }
+
+    @Override
+    protected FindStructureResponse mutateInstance(FindStructureResponse response) {
+        FindStructureResponse newResponse;
+        do {
+            newResponse = createTestInstance();
+        } while (response.equals(newResponse));
+        return newResponse;
+    }
+
+    @Override
+    protected Writeable.Reader<FindStructureResponse> instanceReader() {
+        return FindStructureResponse::new;
+    }
+}

+ 2 - 0
x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java

@@ -348,6 +348,8 @@ public class Constants {
         "cluster:monitor/task",
         "cluster:monitor/task/get",
         "cluster:monitor/tasks/lists",
+        "cluster:monitor/text_structure/find_field_structure",
+        "cluster:monitor/text_structure/find_message_structure",
         "cluster:monitor/text_structure/findstructure",
         "cluster:monitor/text_structure/test_grok_pattern",
         "cluster:monitor/transform/get",

+ 63 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/find_field_structure.yml

@@ -0,0 +1,63 @@
+setup:
+  - do:
+      indices.create:
+        index: airlines
+        body:
+          mappings:
+            properties:
+              message:
+                type: text
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - index:
+              _index: airlines
+          - message: "{\"airline\": \"AAL\", \"responsetime\": 132.2046, \"sourcetype\": \"text-structure-test\", \"time\": 1403481600}"
+          - index:
+              _index: airlines
+          - message: "{\"airline\": \"JZA\", \"responsetime\": 990.4628, \"sourcetype\": \"text-structure-test\", \"time\": 1403481700}"
+          - index:
+              _index: airlines
+          - message: "{\"airline\": \"AAL\", \"responsetime\": 134.2046, \"sourcetype\": \"text-structure-test\", \"time\": 1403481800}"
+---
+"Field structure finder with JSON messages":
+  - do:
+      text_structure.find_field_structure:
+        index: airlines
+        field: message
+        documents_to_sample: 3
+        timeout: 10s
+  - match: { num_lines_analyzed: 3 }
+  - match: { num_messages_analyzed: 3 }
+  - match: { charset: "UTF-8" }
+  - match: { has_byte_order_marker: null }
+  - match: { format: ndjson }
+  - match: { timestamp_field: time }
+  - match: { joda_timestamp_formats.0: UNIX }
+  - match: { java_timestamp_formats.0: UNIX }
+  - match: { need_client_timezone: false }
+  - match: { mappings.properties.airline.type: keyword }
+  - match: { mappings.properties.responsetime.type: double }
+  - match: { mappings.properties.sourcetype.type: keyword }
+  - match: { mappings.properties.time.type: date }
+  - match: { mappings.properties.time.format: epoch_second }
+  - match: { ingest_pipeline.description: "Ingest pipeline created by text structure finder" }
+  - match: { ingest_pipeline.processors.0.date.field: time }
+  - match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
+  - match: { field_stats.airline.count: 3 }
+  - match: { field_stats.airline.cardinality: 2 }
+  - match: { field_stats.responsetime.count: 3 }
+  - match: { field_stats.responsetime.cardinality: 3 }
+  - match: { field_stats.responsetime.min_value: 132.2046 }
+  - match: { field_stats.responsetime.max_value: 990.4628 }
+  # Not asserting on field_stats.responsetime.mean as it's a recurring decimal
+  # so its representation in the response could cause spurious failures
+  - match: { field_stats.responsetime.median_value: 134.2046 }
+  - match: { field_stats.sourcetype.count: 3 }
+  - match: { field_stats.sourcetype.cardinality: 1 }
+  - match: { field_stats.time.count: 3 }
+  - match: { field_stats.time.cardinality: 3 }
+  - match: { field_stats.time.earliest: "1403481600" }
+  - match: { field_stats.time.latest: "1403481800" }
+  - is_false: explanation

+ 56 - 0
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/text_structure/find_message_structure.yml

@@ -0,0 +1,56 @@
+"Messages structure finder with JSON messages":
+  - do:
+      text_structure.find_message_structure:
+        timeout: 10s
+        body:
+          messages:
+            - "{\"airline\": \"AAL\", \"responsetime\": 132.2046, \"sourcetype\": \"text-structure-test\", \"time\": 1403481600}"
+            - "{\"airline\": \"JZA\", \"responsetime\": 990.4628, \"sourcetype\": \"text-structure-test\", \"time\": 1403481700}"
+            - "{\"airline\": \"AAL\", \"responsetime\": 134.2046, \"sourcetype\": \"text-structure-test\", \"time\": 1403481800}"
+  - match: { num_lines_analyzed: 3 }
+  - match: { num_messages_analyzed: 3 }
+  - match: { charset: "UTF-8" }
+  - match: { has_byte_order_marker: null }
+  - match: { format: ndjson }
+  - match: { timestamp_field: time }
+  - match: { joda_timestamp_formats.0: UNIX }
+  - match: { java_timestamp_formats.0: UNIX }
+  - match: { need_client_timezone: false }
+  - match: { mappings.properties.airline.type: keyword }
+  - match: { mappings.properties.responsetime.type: double }
+  - match: { mappings.properties.sourcetype.type: keyword }
+  - match: { mappings.properties.time.type: date }
+  - match: { mappings.properties.time.format: epoch_second }
+  - match: { ingest_pipeline.description: "Ingest pipeline created by text structure finder" }
+  - match: { ingest_pipeline.processors.0.date.field: time }
+  - match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
+  - match: { field_stats.airline.count: 3 }
+  - match: { field_stats.airline.cardinality: 2 }
+  - match: { field_stats.responsetime.count: 3 }
+  - match: { field_stats.responsetime.cardinality: 3 }
+  - match: { field_stats.responsetime.min_value: 132.2046 }
+  - match: { field_stats.responsetime.max_value: 990.4628 }
+  # Not asserting on field_stats.responsetime.mean as it's a recurring decimal
+  # so its representation in the response could cause spurious failures
+  - match: { field_stats.responsetime.median_value: 134.2046 }
+  - match: { field_stats.sourcetype.count: 3 }
+  - match: { field_stats.sourcetype.cardinality: 1 }
+  - match: { field_stats.time.count: 3 }
+  - match: { field_stats.time.cardinality: 3 }
+  - match: { field_stats.time.earliest: "1403481600" }
+  - match: { field_stats.time.latest: "1403481800" }
+  - is_false: explanation
+---
+"Messages structure finder with log messages":
+  - do:
+      text_structure.find_message_structure:
+        timeout: 10s
+        body:
+          messages:
+            - "2019-05-16 16:56:14 line 1 abcdefghijklmnopqrstuvwxyz"
+            - "2019-05-16 16:56:14 line 2 abcdefghijklmnopqrstuvwxyz\ncontinuation...\ncontinuation...\n"
+            - "2019-05-16 16:56:14 line 3 abcdefghijklmnopqrstuvwxyz"
+  - match: { num_lines_analyzed: 3 }
+  - match: { num_messages_analyzed: 3 }
+  - match: { format: semi_structured_text }
+  - match: { grok_pattern: "%{TIMESTAMP_ISO8601:timestamp} .*? %{INT:field} .*" }

+ 1 - 1
x-pack/plugin/text-structure/qa/text-structure-with-security/build.gradle

@@ -9,7 +9,7 @@ dependencies {
 restResources {
   restApi {
     // needed for template installation, etc.
-    include '_common', 'indices', 'text_structure'
+    include '_common', 'bulk', 'indices', 'text_structure'
   }
   restTests {
     includeXpack 'text_structure'

+ 12 - 0
x-pack/plugin/text-structure/qa/text-structure-with-security/roles.yml

@@ -6,3 +6,15 @@ minimal:
     # This is always required because the REST client uses it to find the version of
     # Elasticsearch it's talking to
     - cluster:monitor/main
+  indices:
+    # Give all users involved in these tests access to the indices where the data to
+    # be analyzed is stored.
+    - names: [ 'airlines' ]
+      privileges:
+        - create_index
+        - indices:admin/refresh
+        - read
+        - write
+        - view_index_metadata
+        - indices:data/write/bulk
+        - indices:data/write/index

+ 14 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/TextStructurePlugin.java

@@ -21,10 +21,16 @@ import org.elasticsearch.plugins.ActionPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.rest.RestController;
 import org.elasticsearch.rest.RestHandler;
+import org.elasticsearch.xpack.core.textstructure.action.FindFieldStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindMessageStructureAction;
 import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
 import org.elasticsearch.xpack.core.textstructure.action.TestGrokPatternAction;
+import org.elasticsearch.xpack.textstructure.rest.RestFindFieldStructureAction;
+import org.elasticsearch.xpack.textstructure.rest.RestFindMessageStructureAction;
 import org.elasticsearch.xpack.textstructure.rest.RestFindStructureAction;
 import org.elasticsearch.xpack.textstructure.rest.RestTestGrokPatternAction;
+import org.elasticsearch.xpack.textstructure.transport.TransportFindFieldStructureAction;
+import org.elasticsearch.xpack.textstructure.transport.TransportFindMessageStructureAction;
 import org.elasticsearch.xpack.textstructure.transport.TransportFindStructureAction;
 import org.elasticsearch.xpack.textstructure.transport.TransportTestGrokPatternAction;
 
@@ -53,12 +59,19 @@ public class TextStructurePlugin extends Plugin implements ActionPlugin {
         Supplier<DiscoveryNodes> nodesInCluster,
         Predicate<NodeFeature> clusterSupportsFeature
     ) {
-        return Arrays.asList(new RestFindStructureAction(), new RestTestGrokPatternAction());
+        return Arrays.asList(
+            new RestFindFieldStructureAction(),
+            new RestFindMessageStructureAction(),
+            new RestFindStructureAction(),
+            new RestTestGrokPatternAction()
+        );
     }
 
     @Override
     public List<ActionHandler<? extends ActionRequest, ? extends ActionResponse>> getActions() {
         return Arrays.asList(
+            new ActionHandler<>(FindFieldStructureAction.INSTANCE, TransportFindFieldStructureAction.class),
+            new ActionHandler<>(FindMessageStructureAction.INSTANCE, TransportFindMessageStructureAction.class),
             new ActionHandler<>(FindStructureAction.INSTANCE, TransportFindStructureAction.class),
             new ActionHandler<>(TestGrokPatternAction.INSTANCE, TransportTestGrokPatternAction.class)
         );

+ 51 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindFieldStructureAction.java

@@ -0,0 +1,51 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.textstructure.rest;
+
+import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.rest.BaseRestHandler;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.Scope;
+import org.elasticsearch.rest.ServerlessScope;
+import org.elasticsearch.rest.action.RestToXContentListener;
+import org.elasticsearch.xpack.core.textstructure.action.FindFieldStructureAction;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import static org.elasticsearch.rest.RestRequest.Method.GET;
+import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PATH;
+
+@ServerlessScope(Scope.INTERNAL)
+public class RestFindFieldStructureAction extends BaseRestHandler {
+
+    @Override
+    public List<Route> routes() {
+        return List.of(new Route(GET, BASE_PATH + "find_field_structure"));
+    }
+
+    @Override
+    public String getName() {
+        return "text_structure_find_field_structure_action";
+    }
+
+    @Override
+    protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) {
+        FindFieldStructureAction.Request request = new FindFieldStructureAction.Request();
+        RestFindStructureArgumentsParser.parse(restRequest, request);
+        request.setIndex(restRequest.param(FindFieldStructureAction.Request.INDEX.getPreferredName()));
+        request.setField(restRequest.param(FindFieldStructureAction.Request.FIELD.getPreferredName()));
+        return channel -> client.execute(FindFieldStructureAction.INSTANCE, request, new RestToXContentListener<>(channel));
+    }
+
+    @Override
+    protected Set<String> responseParams() {
+        return Collections.singleton(TextStructure.EXPLAIN);
+    }
+}

+ 55 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindMessageStructureAction.java

@@ -0,0 +1,55 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.textstructure.rest;
+
+import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.rest.BaseRestHandler;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.Scope;
+import org.elasticsearch.rest.ServerlessScope;
+import org.elasticsearch.rest.action.RestToXContentListener;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xpack.core.textstructure.action.FindMessageStructureAction;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import static org.elasticsearch.rest.RestRequest.Method.GET;
+import static org.elasticsearch.rest.RestRequest.Method.POST;
+import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PATH;
+
+@ServerlessScope(Scope.INTERNAL)
+public class RestFindMessageStructureAction extends BaseRestHandler {
+
+    @Override
+    public List<Route> routes() {
+        return List.of(new Route(GET, BASE_PATH + "find_message_structure"), new Route(POST, BASE_PATH + "find_message_structure"));
+    }
+
+    @Override
+    public String getName() {
+        return "text_structure_find_message_structure_action";
+    }
+
+    @Override
+    protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
+        FindMessageStructureAction.Request request;
+        try (XContentParser parser = restRequest.contentOrSourceParamParser()) {
+            request = FindMessageStructureAction.Request.parseRequest(parser);
+        }
+        RestFindStructureArgumentsParser.parse(restRequest, request);
+        return channel -> client.execute(FindMessageStructureAction.INSTANCE, request, new RestToXContentListener<>(channel));
+    }
+
+    @Override
+    protected Set<String> responseParams() {
+        return Collections.singleton(TextStructure.EXPLAIN);
+    }
+}

+ 2 - 36
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java

@@ -9,7 +9,6 @@ package org.elasticsearch.xpack.textstructure.rest;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.client.internal.node.NodeClient;
 import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.rest.BaseRestHandler;
 import org.elasticsearch.rest.RestRequest;
 import org.elasticsearch.rest.Scope;
@@ -17,12 +16,10 @@ import org.elasticsearch.rest.ServerlessScope;
 import org.elasticsearch.rest.action.RestToXContentListener;
 import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
-import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinderManager;
 
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
-import java.util.concurrent.TimeUnit;
 
 import static org.elasticsearch.rest.RestRequest.Method.POST;
 import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PATH;
@@ -30,8 +27,6 @@ import static org.elasticsearch.xpack.textstructure.TextStructurePlugin.BASE_PAT
 @ServerlessScope(Scope.INTERNAL)
 public class RestFindStructureAction extends BaseRestHandler {
 
-    private static final TimeValue DEFAULT_TIMEOUT = new TimeValue(25, TimeUnit.SECONDS);
-
     @Override
     public List<Route> routes() {
         return List.of(
@@ -46,38 +41,9 @@ public class RestFindStructureAction extends BaseRestHandler {
 
     @Override
     protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) {
-
         FindStructureAction.Request request = new FindStructureAction.Request();
-        request.setLinesToSample(
-            restRequest.paramAsInt(
-                FindStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(),
-                TextStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT
-            )
-        );
-        request.setLineMergeSizeLimit(
-            restRequest.paramAsInt(
-                FindStructureAction.Request.LINE_MERGE_SIZE_LIMIT.getPreferredName(),
-                TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT
-            )
-        );
-        request.setTimeout(
-            TimeValue.parseTimeValue(
-                restRequest.param(FindStructureAction.Request.TIMEOUT.getPreferredName()),
-                DEFAULT_TIMEOUT,
-                FindStructureAction.Request.TIMEOUT.getPreferredName()
-            )
-        );
-        request.setCharset(restRequest.param(FindStructureAction.Request.CHARSET.getPreferredName()));
-        request.setFormat(restRequest.param(FindStructureAction.Request.FORMAT.getPreferredName()));
-        request.setColumnNames(restRequest.paramAsStringArray(FindStructureAction.Request.COLUMN_NAMES.getPreferredName(), null));
-        request.setHasHeaderRow(restRequest.paramAsBoolean(FindStructureAction.Request.HAS_HEADER_ROW.getPreferredName(), null));
-        request.setDelimiter(restRequest.param(FindStructureAction.Request.DELIMITER.getPreferredName()));
-        request.setQuote(restRequest.param(FindStructureAction.Request.QUOTE.getPreferredName()));
-        request.setShouldTrimFields(restRequest.paramAsBoolean(FindStructureAction.Request.SHOULD_TRIM_FIELDS.getPreferredName(), null));
-        request.setGrokPattern(restRequest.param(FindStructureAction.Request.GROK_PATTERN.getPreferredName()));
-        request.setEcsCompatibility(restRequest.param(FindStructureAction.Request.ECS_COMPATIBILITY.getPreferredName()));
-        request.setTimestampFormat(restRequest.param(FindStructureAction.Request.TIMESTAMP_FORMAT.getPreferredName()));
-        request.setTimestampField(restRequest.param(FindStructureAction.Request.TIMESTAMP_FIELD.getPreferredName()));
+        RestFindStructureArgumentsParser.parse(restRequest, request);
+
         if (restRequest.hasContent()) {
             request.setSample(restRequest.content());
         } else {

+ 73 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureArgumentsParser.java

@@ -0,0 +1,73 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.textstructure.rest;
+
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.xpack.core.textstructure.action.AbstractFindStructureRequest;
+import org.elasticsearch.xpack.core.textstructure.action.FindFieldStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindMessageStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
+import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinderManager;
+
+import java.util.concurrent.TimeUnit;
+
+public class RestFindStructureArgumentsParser {
+
+    private static final TimeValue DEFAULT_TIMEOUT = new TimeValue(25, TimeUnit.SECONDS);
+
+    static void parse(RestRequest restRequest, AbstractFindStructureRequest request) {
+        if (request instanceof FindStructureAction.Request) {
+            request.setLinesToSample(
+                restRequest.paramAsInt(
+                    FindStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(),
+                    TextStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT
+                )
+            );
+            request.setLineMergeSizeLimit(
+                restRequest.paramAsInt(
+                    FindStructureAction.Request.LINE_MERGE_SIZE_LIMIT.getPreferredName(),
+                    TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT
+                )
+            );
+            request.setCharset(restRequest.param(FindStructureAction.Request.CHARSET.getPreferredName()));
+            request.setHasHeaderRow(restRequest.paramAsBoolean(FindStructureAction.Request.HAS_HEADER_ROW.getPreferredName(), null));
+        } else if (request instanceof FindFieldStructureAction.Request) {
+            request.setLinesToSample(
+                restRequest.paramAsInt(
+                    FindStructureAction.Request.DOCUMENTS_TO_SAMPLE.getPreferredName(),
+                    TextStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT
+                )
+            );
+        }
+
+        request.setTimeout(
+            TimeValue.parseTimeValue(
+                restRequest.param(FindStructureAction.Request.TIMEOUT.getPreferredName()),
+                DEFAULT_TIMEOUT,
+                FindStructureAction.Request.TIMEOUT.getPreferredName()
+            )
+        );
+        request.setFormat(restRequest.param(FindStructureAction.Request.FORMAT.getPreferredName()));
+        request.setColumnNames(restRequest.paramAsStringArray(FindStructureAction.Request.COLUMN_NAMES.getPreferredName(), null));
+        request.setDelimiter(restRequest.param(FindStructureAction.Request.DELIMITER.getPreferredName()));
+        request.setQuote(restRequest.param(FindStructureAction.Request.QUOTE.getPreferredName()));
+        request.setShouldTrimFields(restRequest.paramAsBoolean(FindStructureAction.Request.SHOULD_TRIM_FIELDS.getPreferredName(), null));
+        request.setGrokPattern(restRequest.param(FindStructureAction.Request.GROK_PATTERN.getPreferredName()));
+        request.setEcsCompatibility(restRequest.param(FindStructureAction.Request.ECS_COMPATIBILITY.getPreferredName()));
+        request.setTimestampFormat(restRequest.param(FindStructureAction.Request.TIMESTAMP_FORMAT.getPreferredName()));
+        request.setTimestampField(restRequest.param(FindStructureAction.Request.TIMESTAMP_FIELD.getPreferredName()));
+
+        if (request instanceof FindMessageStructureAction.Request || request instanceof FindFieldStructureAction.Request) {
+            if (TextStructure.Format.DELIMITED.equals(request.getFormat())) {
+                request.setHasHeaderRow(false);
+            }
+        }
+    }
+}

+ 31 - 23
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinder.java

@@ -44,7 +44,7 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
     private final List<String> sampleMessages;
     private final TextStructure structure;
 
-    static DelimitedTextStructureFinder makeDelimitedTextStructureFinder(
+    static DelimitedTextStructureFinder createFromSample(
         List<String> explanation,
         String sample,
         String charsetName,
@@ -590,6 +590,36 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
         return false;
     }
 
+    static boolean canCreateFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        int minFieldsPerRow,
+        CsvPreference csvPreference,
+        String formatName,
+        double allowedFractionOfBadLines
+    ) {
+        for (String message : messages) {
+            try (CsvListReader csvReader = new CsvListReader(new StringReader(message), csvPreference)) {
+                if (csvReader.read() == null) {
+                    explanation.add(format("Not %s because message with no lines: [%s]", formatName, message));
+                    return false;
+                }
+                if (csvReader.read() != null) {
+                    explanation.add(format("Not %s because message with multiple lines: [%s]", formatName, message));
+                    return false;
+                }
+            } catch (IOException e) {
+                explanation.add(format("Not %s because there was a parsing exception: [%s]", formatName, e.getMessage()));
+                return false;
+            }
+        }
+
+        // Every line contains a single valid delimited message, so
+        // we can safely concatenate and run the logic for a sample.
+        String sample = String.join("\n", messages);
+        return canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName, allowedFractionOfBadLines);
+    }
+
     static boolean canCreateFromSample(
         List<String> explanation,
         String sample,
@@ -598,7 +628,6 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
         String formatName,
         double allowedFractionOfBadLines
     ) {
-
         // Logstash's CSV parser won't tolerate fields where just part of the
         // value is quoted, whereas SuperCSV will, hence this extra check
         String[] sampleLines = sample.split("\n");
@@ -619,7 +648,6 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
         try (CsvListReader csvReader = new CsvListReader(new StringReader(sample), csvPreference)) {
 
             int fieldsInFirstRow = -1;
-            int fieldsInLastRow = -1;
 
             List<Integer> illFormattedRows = new ArrayList<>();
             int numberOfRows = 0;
@@ -643,7 +671,6 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
                             );
                             return false;
                         }
-                        fieldsInLastRow = fieldsInFirstRow;
                         continue;
                     }
 
@@ -676,26 +703,7 @@ public class DelimitedTextStructureFinder implements TextStructureFinder {
                             );
                             return false;
                         }
-                        continue;
                     }
-
-                    fieldsInLastRow = fieldsInThisRow;
-                }
-
-                if (fieldsInLastRow > fieldsInFirstRow) {
-                    explanation.add(
-                        "Not "
-                            + formatName
-                            + " because last row has more fields than first row: ["
-                            + fieldsInFirstRow
-                            + "] and ["
-                            + fieldsInLastRow
-                            + "]"
-                    );
-                    return false;
-                }
-                if (fieldsInLastRow < fieldsInFirstRow) {
-                    --numberOfRows;
                 }
             } catch (SuperCsvException e) {
                 // Tolerate an incomplete last row

+ 39 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderFactory.java

@@ -67,6 +67,22 @@ public class DelimitedTextStructureFinderFactory implements TextStructureFinderF
         );
     }
 
+    public boolean canCreateFromMessages(List<String> explanation, List<String> messages, double allowedFractionOfBadLines) {
+        String formatName = switch ((char) csvPreference.getDelimiterChar()) {
+            case ',' -> "CSV";
+            case '\t' -> "TSV";
+            default -> Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
+        };
+        return DelimitedTextStructureFinder.canCreateFromMessages(
+            explanation,
+            messages,
+            minFieldsPerRow,
+            csvPreference,
+            formatName,
+            allowedFractionOfBadLines
+        );
+    }
+
     @Override
     public TextStructureFinder createFromSample(
         List<String> explanation,
@@ -78,7 +94,7 @@ public class DelimitedTextStructureFinderFactory implements TextStructureFinderF
         TimeoutChecker timeoutChecker
     ) throws IOException {
         CsvPreference adjustedCsvPreference = new CsvPreference.Builder(csvPreference).maxLinesPerRow(lineMergeSizeLimit).build();
-        return DelimitedTextStructureFinder.makeDelimitedTextStructureFinder(
+        return DelimitedTextStructureFinder.createFromSample(
             explanation,
             sample,
             charsetName,
@@ -89,4 +105,26 @@ public class DelimitedTextStructureFinderFactory implements TextStructureFinderF
             timeoutChecker
         );
     }
+
+    public TextStructureFinder createFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws IOException {
+        // DelimitedTextStructureFinderFactory::canCreateFromMessages already
+        // checked that every line contains a single valid delimited message,
+        // so we can safely concatenate and run the logic for a sample.
+        String sample = String.join("\n", messages);
+        return DelimitedTextStructureFinder.createFromSample(
+            explanation,
+            sample,
+            "UTF-8",
+            null,
+            csvPreference,
+            trimFields,
+            overrides,
+            timeoutChecker
+        );
+    }
 }

+ 125 - 16
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinder.java

@@ -36,7 +36,6 @@ public class LogTextStructureFinder implements TextStructureFinder {
         String[] sampleLines,
         String charsetName,
         Boolean hasByteOrderMarker,
-        int lineMergeSizeLimit,
         TextStructureOverrides overrides,
         TimeoutChecker timeoutChecker
     ) {
@@ -108,12 +107,9 @@ public class LogTextStructureFinder implements TextStructureFinder {
         return new LogTextStructureFinder(sampleMessages, structure);
     }
 
-    private static LogTextStructureFinder makeMultiLineLogTextStructureFinder(
+    private static TimestampFormatFinder getTimestampFormatFinder(
         List<String> explanation,
         String[] sampleLines,
-        String charsetName,
-        Boolean hasByteOrderMarker,
-        int lineMergeSizeLimit,
         TextStructureOverrides overrides,
         TimeoutChecker timeoutChecker
     ) {
@@ -145,15 +141,20 @@ public class LogTextStructureFinder implements TextStructureFinder {
                 + timestampFormatFinder.getJavaTimestampFormats()
         );
 
+        return timestampFormatFinder;
+    }
+
+    private static Tuple<List<String>, Integer> getSampleMessages(
+        String multiLineRegex,
+        String[] sampleLines,
+        int lineMergeSizeLimit,
+        TimeoutChecker timeoutChecker
+    ) {
         List<String> sampleMessages = new ArrayList<>();
-        StringBuilder preamble = new StringBuilder();
         int linesConsumed = 0;
         StringBuilder message = null;
         int linesInMessage = 0;
-        String multiLineRegex = createMultiLineMessageStartRegex(
-            timestampFormatFinder.getPrefaces(),
-            timestampFormatFinder.getSimplePattern().pattern()
-        );
+
         Pattern multiLinePattern = Pattern.compile(multiLineRegex);
         for (String sampleLine : sampleLines) {
             if (multiLinePattern.matcher(sampleLine).find()) {
@@ -195,9 +196,6 @@ public class LogTextStructureFinder implements TextStructureFinder {
                 }
             }
             timeoutChecker.check("multi-line message determination");
-            if (sampleMessages.size() < 2) {
-                preamble.append(sampleLine).append('\n');
-            }
         }
         // Don't add the last message, as it might be partial and mess up subsequent pattern finding
 
@@ -209,8 +207,24 @@ public class LogTextStructureFinder implements TextStructureFinder {
             );
         }
 
-        // null to allow GC before Grok pattern search
-        sampleLines = null;
+        return new Tuple<>(sampleMessages, linesConsumed);
+    }
+
+    private static LogTextStructureFinder makeMultiLineLogTextStructureFinder(
+        List<String> explanation,
+        List<String> sampleMessages,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        TextStructureOverrides overrides,
+        int linesConsumed,
+        TimestampFormatFinder timestampFormatFinder,
+        String multiLineRegex,
+        TimeoutChecker timeoutChecker
+    ) {
+        StringBuilder preamble = new StringBuilder();
+        for (int i = 0; i < sampleMessages.size() && i < 2; i++) {
+            preamble.append(sampleMessages.get(i)).append('\n');
+        }
 
         TextStructure.Builder structureBuilder = new TextStructure.Builder(TextStructure.Format.SEMI_STRUCTURED_TEXT).setCharset(
             charsetName
@@ -300,6 +314,80 @@ public class LogTextStructureFinder implements TextStructureFinder {
         return new LogTextStructureFinder(sampleMessages, structure);
     }
 
+    private static LogTextStructureFinder makeMultiLineLogTextStructureFinder(
+        List<String> explanation,
+        String[] sampleLines,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        int lineMergeSizeLimit,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        TimestampFormatFinder timestampFormatFinder = getTimestampFormatFinder(explanation, sampleLines, overrides, timeoutChecker);
+
+        String multiLineRegex = createMultiLineMessageStartRegex(
+            timestampFormatFinder.getPrefaces(),
+            timestampFormatFinder.getSimplePattern().pattern()
+        );
+
+        Tuple<List<String>, Integer> sampleMessagesAndLinesConsumed = getSampleMessages(
+            multiLineRegex,
+            sampleLines,
+            lineMergeSizeLimit,
+            timeoutChecker
+        );
+        List<String> sampleMessages = sampleMessagesAndLinesConsumed.v1();
+        int linesConsumed = sampleMessagesAndLinesConsumed.v2();
+
+        // null to allow GC before Grok pattern search
+        sampleLines = null;
+
+        return makeMultiLineLogTextStructureFinder(
+            explanation,
+            sampleMessages,
+            charsetName,
+            hasByteOrderMarker,
+            overrides,
+            linesConsumed,
+            timestampFormatFinder,
+            multiLineRegex,
+            timeoutChecker
+        );
+    }
+
+    private static LogTextStructureFinder makeMultiLineLogTextStructureFinder(
+        List<String> explanation,
+        List<String> messages,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        TimestampFormatFinder timestampFormatFinder = getTimestampFormatFinder(
+            explanation,
+            messages.toArray(new String[0]),
+            overrides,
+            timeoutChecker
+        );
+
+        String multiLineRegex = createMultiLineMessageStartRegex(
+            timestampFormatFinder.getPrefaces(),
+            timestampFormatFinder.getSimplePattern().pattern()
+        );
+
+        return makeMultiLineLogTextStructureFinder(
+            explanation,
+            messages,
+            charsetName,
+            hasByteOrderMarker,
+            overrides,
+            messages.size(),
+            timestampFormatFinder,
+            multiLineRegex,
+            timeoutChecker
+        );
+    }
+
     static LogTextStructureFinder makeLogTextStructureFinder(
         List<String> explanation,
         String sample,
@@ -316,7 +404,6 @@ public class LogTextStructureFinder implements TextStructureFinder {
                 sampleLines,
                 charsetName,
                 hasByteOrderMarker,
-                lineMergeSizeLimit,
                 overrides,
                 timeoutChecker
             );
@@ -333,6 +420,28 @@ public class LogTextStructureFinder implements TextStructureFinder {
         }
     }
 
+    static LogTextStructureFinder makeLogTextStructureFinder(
+        List<String> explanation,
+        List<String> messages,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        if (TextStructureUtils.NULL_TIMESTAMP_FORMAT.equals(overrides.getTimestampFormat())) {
+            return makeSingleLineLogTextStructureFinder(
+                explanation,
+                messages.toArray(new String[0]),
+                charsetName,
+                hasByteOrderMarker,
+                overrides,
+                timeoutChecker
+            );
+        } else {
+            return makeMultiLineLogTextStructureFinder(explanation, messages, charsetName, hasByteOrderMarker, overrides, timeoutChecker);
+        }
+    }
+
     private LogTextStructureFinder(List<String> sampleMessages, TextStructure structure) {
         this.sampleMessages = Collections.unmodifiableList(sampleMessages);
         this.structure = structure;

+ 13 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinderFactory.java

@@ -40,6 +40,10 @@ public class LogTextStructureFinderFactory implements TextStructureFinderFactory
         return true;
     }
 
+    public boolean canCreateFromMessages(List<String> explanation, List<String> messages, double allowedFractionOfBadLines) {
+        return true;
+    }
+
     @Override
     public TextStructureFinder createFromSample(
         List<String> explanation,
@@ -60,4 +64,13 @@ public class LogTextStructureFinderFactory implements TextStructureFinderFactory
             timeoutChecker
         );
     }
+
+    public TextStructureFinder createFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) {
+        return LogTextStructureFinder.makeLogTextStructureFinder(explanation, messages, "UTF-8", null, overrides, timeoutChecker);
+    }
 }

+ 23 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderFactory.java

@@ -72,6 +72,16 @@ public class NdJsonTextStructureFinderFactory implements TextStructureFinderFact
         return true;
     }
 
+    public boolean canCreateFromMessages(List<String> explanation, List<String> messages, double allowedFractionOfBadLines) {
+        for (String message : messages) {
+            if (message.contains("\n")) {
+                explanation.add("Not NDJSON because message contains multiple lines: [" + message + "]");
+                return false;
+            }
+        }
+        return canCreateFromSample(explanation, String.join("\n", messages), allowedFractionOfBadLines);
+    }
+
     @Override
     public TextStructureFinder createFromSample(
         List<String> explanation,
@@ -92,6 +102,19 @@ public class NdJsonTextStructureFinderFactory implements TextStructureFinderFact
         );
     }
 
+    public TextStructureFinder createFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws IOException {
+        // NdJsonTextStructureFinderFactory::canCreateFromMessages already
+        // checked that every line contains a single valid JSON message,
+        // so we can safely concatenate and run the logic for a sample.
+        String sample = String.join("\n", messages);
+        return NdJsonTextStructureFinder.makeNdJsonTextStructureFinder(explanation, sample, "UTF-8", null, overrides, timeoutChecker);
+    }
+
     private static class ContextPrintingStringReader extends StringReader {
 
         private final String str;

+ 9 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureFinderFactory.java

@@ -33,6 +33,8 @@ public interface TextStructureFinderFactory {
      */
     boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines);
 
+    boolean canCreateFromMessages(List<String> explanation, List<String> messages, double allowedFractionOfBadMessages);
+
     /**
      * Create an object representing the structure of some text.
      * @param explanation List of reasons for making decisions.  May contain items when passed and new reasons
@@ -56,4 +58,11 @@ public interface TextStructureFinderFactory {
         TextStructureOverrides overrides,
         TimeoutChecker timeoutChecker
     ) throws Exception;
+
+    TextStructureFinder createFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws Exception;
 }

+ 72 - 19
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureFinderManager.java

@@ -13,7 +13,7 @@ import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchTimeoutException;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.core.Tuple;
-import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.AbstractFindStructureRequest;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 
 import java.io.BufferedInputStream;
@@ -310,7 +310,7 @@ public final class TextStructureFinderManager {
      * Given a stream of text data, determine its structure.
      * @param idealSampleLineCount Ideally, how many lines from the stream will be read to determine the structure?
      *                             If the stream has fewer lines then an attempt will still be made, providing at
-     *                             least {@link FindStructureAction#MIN_SAMPLE_LINE_COUNT} lines can be read.  If
+     *                             least {@link AbstractFindStructureRequest#MIN_SAMPLE_LINE_COUNT} lines can be read.  If
      *                             <code>null</code> the value of {@link #DEFAULT_IDEAL_SAMPLE_LINE_COUNT} will be used.
      * @param lineMergeSizeLimit Maximum number of characters permitted when lines are merged to create messages.
      *                           If <code>null</code> the value of {@link #DEFAULT_LINE_MERGE_SIZE_LIMIT} will be used.
@@ -383,11 +383,11 @@ public final class TextStructureFinderManager {
                 sampleReader = charsetMatch.getReader();
             }
 
-            assert idealSampleLineCount >= FindStructureAction.MIN_SAMPLE_LINE_COUNT;
+            assert idealSampleLineCount >= AbstractFindStructureRequest.MIN_SAMPLE_LINE_COUNT;
             Tuple<String, Boolean> sampleInfo = sampleText(
                 sampleReader,
                 charsetName,
-                FindStructureAction.MIN_SAMPLE_LINE_COUNT,
+                AbstractFindStructureRequest.MIN_SAMPLE_LINE_COUNT,
                 idealSampleLineCount,
                 timeoutChecker
             );
@@ -413,6 +413,23 @@ public final class TextStructureFinderManager {
         }
     }
 
+    public TextStructureFinder findTextStructure(List<String> messages, TextStructureOverrides overrides, TimeValue timeout)
+        throws Exception {
+        List<String> explanation = new ArrayList<>();
+        try (TimeoutChecker timeoutChecker = new TimeoutChecker("structure analysis", timeout, scheduler)) {
+            return makeBestStructureFinder(explanation, messages, overrides, timeoutChecker);
+        } catch (Exception e) {
+            // Add a dummy exception containing the explanation so far - this can be invaluable for troubleshooting as incorrect
+            // decisions made early on in the structure analysis can result in seemingly crazy decisions or timeouts later on
+            if (explanation.isEmpty() == false) {
+                e.addSuppressed(
+                    new ElasticsearchException(explanation.stream().collect(Collectors.joining("]\n[", "Explanation so far:\n[", "]\n")))
+                );
+            }
+            throw e;
+        }
+    }
+
     CharsetMatch findCharset(List<String> explanation, InputStream inputStream, TimeoutChecker timeoutChecker) throws Exception {
 
         // We need an input stream that supports mark and reset, so wrap the argument
@@ -551,24 +568,12 @@ public final class TextStructureFinderManager {
         );
     }
 
-    TextStructureFinder makeBestStructureFinder(
-        List<String> explanation,
-        String sample,
-        String charsetName,
-        Boolean hasByteOrderMarker,
-        int lineMergeSizeLimit,
-        TextStructureOverrides overrides,
-        TimeoutChecker timeoutChecker
-    ) throws Exception {
-
+    List<TextStructureFinderFactory> getFactories(TextStructureOverrides overrides) {
         Character delimiter = overrides.getDelimiter();
         Character quote = overrides.getQuote();
         Boolean shouldTrimFields = overrides.getShouldTrimFields();
         List<TextStructureFinderFactory> factories;
-        double allowedFractionOfBadLines = 0.0;
         if (delimiter != null) {
-            allowedFractionOfBadLines = DelimitedTextStructureFinderFactory.DELIMITER_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
-
             // If a precise delimiter is specified, we only need one structure finder
             // factory, and we'll tolerate as little as one column in the input
             factories = Collections.singletonList(
@@ -581,8 +586,6 @@ public final class TextStructureFinderManager {
             );
 
         } else if (quote != null || shouldTrimFields != null || TextStructure.Format.DELIMITED.equals(overrides.getFormat())) {
-            allowedFractionOfBadLines = DelimitedTextStructureFinderFactory.FORMAT_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
-
             // The delimiter is not specified, but some other aspect of delimited text is,
             // so clone our default delimited factories altering the overridden values
             factories = ORDERED_STRUCTURE_FACTORIES.stream()
@@ -599,6 +602,34 @@ public final class TextStructureFinderManager {
 
         }
 
+        return factories;
+    }
+
+    private double getAllowedFractionOfBadLines(TextStructureOverrides overrides) {
+        Character delimiter = overrides.getDelimiter();
+        Character quote = overrides.getQuote();
+        Boolean shouldTrimFields = overrides.getShouldTrimFields();
+        if (delimiter != null) {
+            return DelimitedTextStructureFinderFactory.DELIMITER_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
+        } else if (quote != null || shouldTrimFields != null || TextStructure.Format.DELIMITED.equals(overrides.getFormat())) {
+            return DelimitedTextStructureFinderFactory.FORMAT_OVERRIDDEN_ALLOWED_FRACTION_OF_BAD_LINES;
+        } else {
+            return 0.0;
+        }
+    }
+
+    TextStructureFinder makeBestStructureFinder(
+        List<String> explanation,
+        String sample,
+        String charsetName,
+        Boolean hasByteOrderMarker,
+        int lineMergeSizeLimit,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws Exception {
+        List<TextStructureFinderFactory> factories = getFactories(overrides);
+        double allowedFractionOfBadLines = getAllowedFractionOfBadLines(overrides);
+
         for (TextStructureFinderFactory factory : factories) {
             timeoutChecker.check("high level format detection");
             if (factory.canCreateFromSample(explanation, sample, allowedFractionOfBadLines)) {
@@ -620,6 +651,28 @@ public final class TextStructureFinderManager {
         );
     }
 
+    private TextStructureFinder makeBestStructureFinder(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws Exception {
+        List<TextStructureFinderFactory> factories = getFactories(overrides);
+        double allowedFractionOfBadLines = getAllowedFractionOfBadLines(overrides);
+
+        for (TextStructureFinderFactory factory : factories) {
+            timeoutChecker.check("high level format detection");
+            if (factory.canCreateFromMessages(explanation, messages, allowedFractionOfBadLines)) {
+                return factory.createFromMessages(explanation, messages, overrides, timeoutChecker);
+            }
+        }
+
+        throw new IllegalArgumentException(
+            "Input did not match "
+                + ((overrides.getFormat() == null) ? "any known formats" : "the specified format [" + overrides.getFormat() + "]")
+        );
+    }
+
     private Tuple<String, Boolean> sampleText(Reader reader, String charsetName, int minLines, int maxLines, TimeoutChecker timeoutChecker)
         throws IOException {
 

+ 2 - 1
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureOverrides.java

@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.xpack.textstructure.structurefinder;
 
+import org.elasticsearch.xpack.core.textstructure.action.AbstractFindStructureRequest;
 import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 
@@ -37,7 +38,7 @@ public class TextStructureOverrides {
 
     private final String ecsCompatibility;
 
-    public TextStructureOverrides(FindStructureAction.Request request) {
+    public TextStructureOverrides(AbstractFindStructureRequest request) {
 
         this(
             request.getCharset(),

+ 53 - 11
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/XmlTextStructureFinderFactory.java

@@ -46,7 +46,42 @@ public class XmlTextStructureFinderFactory implements TextStructureFinderFactory
      */
     @Override
     public boolean canCreateFromSample(List<String> explanation, String sample, double allowedFractionOfBadLines) {
+        int completeDocCount = parseXml(explanation, sample);
+        if (completeDocCount == -1) {
+            return false;
+        }
+        if (completeDocCount == 0) {
+            explanation.add("Not XML because sample didn't contain a complete document");
+            return false;
+        }
+        explanation.add("Deciding sample is XML");
+        return true;
+    }
+
+    public boolean canCreateFromMessages(List<String> explanation, List<String> messages, double allowedFractionOfBadLines) {
+        for (String message : messages) {
+            int completeDocCount = parseXml(explanation, message);
+            if (completeDocCount == -1) {
+                return false;
+            }
+            if (completeDocCount == 0) {
+                explanation.add("Not XML because a message didn't contain a complete document");
+                return false;
+            }
+            if (completeDocCount > 1) {
+                explanation.add("Not XML because a message contains a multiple documents");
+                return false;
+            }
+        }
+        explanation.add("Deciding sample is XML");
+        return true;
+    }
 
+    /**
+     * Tries to parse the sample as XML.
+     * @return -1 if invalid, otherwise the number of complete docs
+     */
+    private int parseXml(List<String> explanation, String sample) {
         int completeDocCount = 0;
         String commonRootElementName = null;
         String remainder = sample.trim();
@@ -80,14 +115,14 @@ public class XmlTextStructureFinderFactory implements TextStructureFinderFactory
                                                 + rootElementName
                                                 + "]"
                                         );
-                                        return false;
+                                        return -1;
                                     }
                                 }
                                 break;
                             case XMLStreamReader.END_ELEMENT:
                                 if (--nestingLevel < 0) {
                                     explanation.add("Not XML because an end element occurs before a start element");
-                                    return false;
+                                    return -1;
                                 }
                                 break;
                         }
@@ -111,7 +146,7 @@ public class XmlTextStructureFinderFactory implements TextStructureFinderFactory
                                             + remainder
                                             + "]"
                                     );
-                                    return false;
+                                    return -1;
                                 }
                             }
                             endPos += location.getColumnNumber() - 1;
@@ -125,17 +160,11 @@ public class XmlTextStructureFinderFactory implements TextStructureFinderFactory
                 }
             } catch (IOException | XMLStreamException e) {
                 explanation.add("Not XML because there was a parsing exception: [" + e.getMessage().replaceAll("\\s?\r?\n\\s?", " ") + "]");
-                return false;
+                return -1;
             }
         }
 
-        if (completeDocCount == 0) {
-            explanation.add("Not XML because sample didn't contain a complete document");
-            return false;
-        }
-
-        explanation.add("Deciding sample is XML");
-        return true;
+        return completeDocCount;
     }
 
     @Override
@@ -157,4 +186,17 @@ public class XmlTextStructureFinderFactory implements TextStructureFinderFactory
             timeoutChecker
         );
     }
+
+    public TextStructureFinder createFromMessages(
+        List<String> explanation,
+        List<String> messages,
+        TextStructureOverrides overrides,
+        TimeoutChecker timeoutChecker
+    ) throws IOException, ParserConfigurationException, SAXException {
+        // XmlTextStructureFinderFactory::canCreateFromMessages already
+        // checked that every message contains a single valid XML document,
+        // so we can safely concatenate and run the logic for a sample.
+        String sample = String.join("\n", messages);
+        return XmlTextStructureFinder.makeXmlTextStructureFinder(explanation, sample, "UTF-8", null, overrides, timeoutChecker);
+    }
 }

+ 94 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindFieldStructureAction.java

@@ -0,0 +1,94 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.textstructure.transport;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.HandledTransportAction;
+import org.elasticsearch.client.internal.Client;
+import org.elasticsearch.client.internal.ParentTaskAssigningClient;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.tasks.TaskId;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportService;
+import org.elasticsearch.xpack.core.ml.utils.MapHelper;
+import org.elasticsearch.xpack.core.textstructure.action.AbstractFindStructureRequest;
+import org.elasticsearch.xpack.core.textstructure.action.FindFieldStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindStructureResponse;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinder;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinderManager;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureOverrides;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+public class TransportFindFieldStructureAction extends HandledTransportAction<FindFieldStructureAction.Request, FindStructureResponse> {
+
+    private final Client client;
+    private final TransportService transportService;
+    private final ThreadPool threadPool;
+
+    @Inject
+    public TransportFindFieldStructureAction(
+        TransportService transportService,
+        ActionFilters actionFilters,
+        Client client,
+        ThreadPool threadPool
+    ) {
+        super(FindFieldStructureAction.NAME, transportService, actionFilters, FindFieldStructureAction.Request::new, threadPool.generic());
+        this.client = client;
+        this.transportService = transportService;
+        this.threadPool = threadPool;
+    }
+
+    @Override
+    protected void doExecute(Task task, FindFieldStructureAction.Request request, ActionListener<FindStructureResponse> listener) {
+        TaskId taskId = new TaskId(transportService.getLocalNode().getId(), task.getId());
+        new ParentTaskAssigningClient(client, taskId).prepareSearch(request.getIndex())
+            .setSize(request.getLinesToSample())
+            .setFetchSource(true)
+            .setQuery(QueryBuilders.existsQuery(request.getField()))
+            .setFetchSource(new String[] { request.getField() }, null)
+            .execute(ActionListener.wrap(searchResponse -> {
+                long hitCount = searchResponse.getHits().getHits().length;
+                if (hitCount < AbstractFindStructureRequest.MIN_SAMPLE_LINE_COUNT) {
+                    listener.onFailure(
+                        new IllegalArgumentException("Input contained too few lines [" + hitCount + "] to obtain a meaningful sample")
+                    );
+                    return;
+                }
+                List<String> messages = getMessages(searchResponse, request.getField());
+                try {
+                    listener.onResponse(buildTextStructureResponse(messages, request));
+                } catch (Exception e) {
+                    listener.onFailure(e);
+                }
+            }, listener::onFailure));
+    }
+
+    private List<String> getMessages(SearchResponse searchResponse, String field) {
+        return Arrays.stream(searchResponse.getHits().getHits())
+            .map(hit -> MapHelper.dig(field, Objects.requireNonNull(hit.getSourceAsMap())).toString())
+            .collect(Collectors.toList());
+    }
+
+    private FindStructureResponse buildTextStructureResponse(List<String> messages, FindFieldStructureAction.Request request)
+        throws Exception {
+        TextStructureFinderManager structureFinderManager = new TextStructureFinderManager(threadPool.scheduler());
+        TextStructureFinder textStructureFinder = structureFinderManager.findTextStructure(
+            messages,
+            new TextStructureOverrides(request),
+            request.getTimeout()
+        );
+        return new FindStructureResponse(textStructureFinder.getStructure());
+    }
+}

+ 56 - 0
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindMessageStructureAction.java

@@ -0,0 +1,56 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.textstructure.transport;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.HandledTransportAction;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportService;
+import org.elasticsearch.xpack.core.textstructure.action.FindMessageStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindStructureResponse;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinder;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinderManager;
+import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureOverrides;
+
+public class TransportFindMessageStructureAction extends HandledTransportAction<FindMessageStructureAction.Request, FindStructureResponse> {
+
+    private final ThreadPool threadPool;
+
+    @Inject
+    public TransportFindMessageStructureAction(TransportService transportService, ActionFilters actionFilters, ThreadPool threadPool) {
+        super(
+            FindMessageStructureAction.NAME,
+            transportService,
+            actionFilters,
+            FindMessageStructureAction.Request::new,
+            threadPool.generic()
+        );
+        this.threadPool = threadPool;
+    }
+
+    @Override
+    protected void doExecute(Task task, FindMessageStructureAction.Request request, ActionListener<FindStructureResponse> listener) {
+        try {
+            listener.onResponse(buildTextStructureResponse(request));
+        } catch (Exception e) {
+            listener.onFailure(e);
+        }
+    }
+
+    private FindStructureResponse buildTextStructureResponse(FindMessageStructureAction.Request request) throws Exception {
+        TextStructureFinderManager structureFinderManager = new TextStructureFinderManager(threadPool.scheduler());
+        TextStructureFinder textStructureFinder = structureFinderManager.findTextStructure(
+            request.getMessages(),
+            new TextStructureOverrides(request),
+            request.getTimeout()
+        );
+        return new FindStructureResponse(textStructureFinder.getStructure());
+    }
+}

+ 11 - 27
x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/transport/TransportFindStructureAction.java

@@ -10,53 +10,38 @@ import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.HandledTransportAction;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 import org.elasticsearch.xpack.core.textstructure.action.FindStructureAction;
+import org.elasticsearch.xpack.core.textstructure.action.FindStructureResponse;
 import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinder;
 import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureFinderManager;
 import org.elasticsearch.xpack.textstructure.structurefinder.TextStructureOverrides;
 
 import java.io.InputStream;
 
-import static org.elasticsearch.threadpool.ThreadPool.Names.GENERIC;
-
-public class TransportFindStructureAction extends HandledTransportAction<FindStructureAction.Request, FindStructureAction.Response> {
+public class TransportFindStructureAction extends HandledTransportAction<FindStructureAction.Request, FindStructureResponse> {
 
     private final ThreadPool threadPool;
 
     @Inject
     public TransportFindStructureAction(TransportService transportService, ActionFilters actionFilters, ThreadPool threadPool) {
-        super(
-            FindStructureAction.NAME,
-            transportService,
-            actionFilters,
-            FindStructureAction.Request::new,
-            EsExecutors.DIRECT_EXECUTOR_SERVICE
-        );
+        super(FindStructureAction.NAME, transportService, actionFilters, FindStructureAction.Request::new, threadPool.generic());
         this.threadPool = threadPool;
     }
 
     @Override
-    protected void doExecute(Task task, FindStructureAction.Request request, ActionListener<FindStructureAction.Response> listener) {
-
-        // As determining the text structure might take a while, we run
-        // in a different thread to avoid blocking the network thread.
-        threadPool.executor(GENERIC).execute(() -> {
-            try {
-                listener.onResponse(buildTextStructureResponse(request));
-            } catch (Exception e) {
-                listener.onFailure(e);
-            }
-        });
+    protected void doExecute(Task task, FindStructureAction.Request request, ActionListener<FindStructureResponse> listener) {
+        try {
+            listener.onResponse(buildTextStructureResponse(request));
+        } catch (Exception e) {
+            listener.onFailure(e);
+        }
     }
 
-    private FindStructureAction.Response buildTextStructureResponse(FindStructureAction.Request request) throws Exception {
-
+    private FindStructureResponse buildTextStructureResponse(FindStructureAction.Request request) throws Exception {
         TextStructureFinderManager structureFinderManager = new TextStructureFinderManager(threadPool.scheduler());
-
         try (InputStream sampleStream = request.getSample().streamInput()) {
             TextStructureFinder textStructureFinder = structureFinderManager.findTextStructure(
                 request.getLinesToSample(),
@@ -65,8 +50,7 @@ public class TransportFindStructureAction extends HandledTransportAction<FindStr
                 new TextStructureOverrides(request),
                 request.getTimeout()
             );
-
-            return new FindStructureAction.Response(textStructureFinder.getStructure());
+            return new FindStructureResponse(textStructureFinder.getStructure());
         }
     }
 }

+ 18 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderFactoryTests.java

@@ -6,6 +6,9 @@
  */
 package org.elasticsearch.xpack.textstructure.structurefinder;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class DelimitedTextStructureFinderFactoryTests extends TextStructureTestCase {
 
     private final TextStructureFinderFactory csvFactory = new DelimitedTextStructureFinderFactory(',', '"', 2, false);
@@ -40,6 +43,21 @@ public class DelimitedTextStructureFinderFactoryTests extends TextStructureTestC
         assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE, 0.0));
     }
 
+    public void testCanCreateCsvFromMessagesCsv() {
+        List<String> messages = Arrays.asList(CSV_SAMPLE.split("\n"));
+        assertTrue(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateCsvFromMessagesCsv_multipleCsvRowsPerMessage() {
+        List<String> messages = List.of(CSV_SAMPLE, CSV_SAMPLE, CSV_SAMPLE);
+        assertFalse(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateCsvFromMessagesCsv_emptyMessages() {
+        List<String> messages = List.of("", "", "");
+        assertFalse(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
     // TSV - no need to check NDJSON, XML or CSV because they come earlier in the order we check formats
 
     public void testCanCreateTsvFromSampleGivenTsv() {

+ 24 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/DelimitedTextStructureFinderTests.java

@@ -790,6 +790,30 @@ public class DelimitedTextStructureFinderTests extends TextStructureTestCase {
         assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
     }
 
+    public void testCreateFromMessages() throws Exception {
+        List<String> messages = List.of("a,b,c", "d,e,f", "g,h,i");
+        assertTrue(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+        TextStructureFinder structureFinder = csvFactory.createFromMessages(
+            explanation,
+            messages,
+            TextStructureOverrides.EMPTY_OVERRIDES,
+            NOOP_TIMEOUT_CHECKER
+        );
+        TextStructure structure = structureFinder.getStructure();
+        assertEquals(TextStructure.Format.DELIMITED, structure.getFormat());
+        assertEquals(3, structure.getNumMessagesAnalyzed());
+    }
+
+    public void testCreateFromMessages_multipleRowPerMessage() {
+        List<String> messages = List.of("a,b,c\nd,e,f", "g,h,i");
+        assertFalse(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCreateFromMessages_emptyMessage() {
+        List<String> messages = List.of("a,b,c", "", "d,e,f");
+        assertFalse(csvFactory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
     public void testFindHeaderFromSampleGivenHeaderInSample() throws IOException {
         String withHeader = """
             time,airline,responsetime,sourcetype

+ 16 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/LogTextStructureFinderTests.java

@@ -11,6 +11,7 @@ import org.elasticsearch.xpack.core.textstructure.structurefinder.FieldStats;
 import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure;
 
 import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -106,6 +107,21 @@ public class LogTextStructureFinderTests extends TextStructureTestCase {
         assertTrue(keys.contains("@timestamp"));
     }
 
+    public void testCreateFromMessages() throws Exception {
+        List<String> messages = List.of(TEXT_SAMPLE.split("\n"));
+        assertTrue(factory.canCreateFromMessages(explanation, messages, 0.0));
+
+        TextStructureFinder structureFinder = factory.createFromMessages(
+            explanation,
+            messages,
+            TextStructureOverrides.EMPTY_OVERRIDES,
+            NOOP_TIMEOUT_CHECKER
+        );
+
+        TextStructure structure = structureFinder.getStructure();
+        assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern());
+    }
+
     public void testCreateConfigsGivenElasticsearchLogWithNoTimestamps() throws Exception {
         assertTrue(factory.canCreateFromSample(explanation, TEXT_WITH_NO_TIMESTAMPS_SAMPLE, 0.0));
 

+ 18 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderFactoryTests.java

@@ -6,6 +6,9 @@
  */
 package org.elasticsearch.xpack.textstructure.structurefinder;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class NdJsonTextStructureFinderFactoryTests extends TextStructureTestCase {
 
     private final TextStructureFinderFactory factory = new NdJsonTextStructureFinderFactory();
@@ -15,6 +18,21 @@ public class NdJsonTextStructureFinderFactoryTests extends TextStructureTestCase
         assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0));
     }
 
+    public void testCanCreateFromMessages() {
+        List<String> messages = Arrays.asList(NDJSON_SAMPLE.split("\n"));
+        assertTrue(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateFromMessages_multipleJsonLinesPerMessage() {
+        List<String> messages = List.of(NDJSON_SAMPLE, NDJSON_SAMPLE, NDJSON_SAMPLE);
+        assertFalse(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateFromMessages_emptyMessages() {
+        List<String> messages = List.of("", "", "");
+        assertFalse(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
     public void testCanCreateFromSampleGivenXml() {
 
         assertFalse(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));

+ 18 - 0
x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/XmlTextStructureFinderFactoryTests.java

@@ -6,6 +6,9 @@
  */
 package org.elasticsearch.xpack.textstructure.structurefinder;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class XmlTextStructureFinderFactoryTests extends TextStructureTestCase {
 
     private final TextStructureFinderFactory factory = new XmlTextStructureFinderFactory();
@@ -17,6 +20,21 @@ public class XmlTextStructureFinderFactoryTests extends TextStructureTestCase {
         assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
     }
 
+    public void testCanCreateFromMessages() {
+        List<String> messages = Arrays.asList(XML_SAMPLE.split("\n\n"));
+        assertTrue(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateFromMessages_multipleXmlDocsPerMessage() {
+        List<String> messages = List.of(XML_SAMPLE, XML_SAMPLE, XML_SAMPLE);
+        assertFalse(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
+    public void testCanCreateFromMessages_emptyMessages() {
+        List<String> messages = List.of("", "", "");
+        assertFalse(factory.canCreateFromMessages(explanation, messages, 0.0));
+    }
+
     public void testCanCreateFromSampleGivenCsv() {
 
         assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE, 0.0));