Browse Source

[HLRC][ML] Add ML find file structure API (#35833)

Relates to #29827
David Roberts 6 years ago
parent
commit
3c059ee057
14 changed files with 1786 additions and 0 deletions
  1. 63 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
  2. 43 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
  3. 302 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/ml/FindFileStructureRequest.java
  4. 70 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/ml/FindFileStructureResponse.java
  5. 166 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/ml/filestructurefinder/FieldStats.java
  6. 516 0
      client/rest-high-level/src/main/java/org/elasticsearch/client/ml/filestructurefinder/FileStructure.java
  7. 83 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
  8. 44 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
  9. 68 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
  10. 114 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/ml/FindFileStructureRequestTests.java
  11. 49 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/ml/FindFileStructureResponseTests.java
  12. 88 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/ml/filestructurefinder/FieldStatsTests.java
  13. 127 0
      client/rest-high-level/src/test/java/org/elasticsearch/client/ml/filestructurefinder/FileStructureTests.java
  14. 53 0
      docs/java-rest/high-level/ml/find-file-structure.asciidoc

+ 63 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java

@@ -36,6 +36,7 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
 import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequest;
 import org.elasticsearch.client.ml.FlushJobRequest;
 import org.elasticsearch.client.ml.ForecastJobRequest;
 import org.elasticsearch.client.ml.GetBucketsRequest;
@@ -70,6 +71,7 @@ import org.elasticsearch.client.ml.UpdateModelSnapshotRequest;
 import org.elasticsearch.client.ml.job.util.PageParams;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.xcontent.XContentType;
 
 import java.io.IOException;
 
@@ -648,4 +650,65 @@ final class MLRequestConverters {
         Request request = new Request(HttpDelete.METHOD_NAME, endpoint);
         return request;
     }
+
+    static Request findFileStructure(FindFileStructureRequest findFileStructureRequest) {
+        String endpoint = new EndpointBuilder()
+            .addPathPartAsIs("_xpack")
+            .addPathPartAsIs("ml")
+            .addPathPartAsIs("find_file_structure")
+            .build();
+        Request request = new Request(HttpPost.METHOD_NAME, endpoint);
+
+        RequestConverters.Params params = new RequestConverters.Params(request);
+        if (findFileStructureRequest.getLinesToSample() != null) {
+            params.putParam(FindFileStructureRequest.LINES_TO_SAMPLE.getPreferredName(),
+                findFileStructureRequest.getLinesToSample().toString());
+        }
+        if (findFileStructureRequest.getTimeout() != null) {
+            params.putParam(FindFileStructureRequest.TIMEOUT.getPreferredName(), findFileStructureRequest.getTimeout().toString());
+        }
+        if (findFileStructureRequest.getCharset() != null) {
+            params.putParam(FindFileStructureRequest.CHARSET.getPreferredName(), findFileStructureRequest.getCharset());
+        }
+        if (findFileStructureRequest.getFormat() != null) {
+            params.putParam(FindFileStructureRequest.FORMAT.getPreferredName(), findFileStructureRequest.getFormat().toString());
+        }
+        if (findFileStructureRequest.getColumnNames() != null) {
+            params.putParam(FindFileStructureRequest.COLUMN_NAMES.getPreferredName(),
+                Strings.collectionToCommaDelimitedString(findFileStructureRequest.getColumnNames()));
+        }
+        if (findFileStructureRequest.getHasHeaderRow() != null) {
+            params.putParam(FindFileStructureRequest.HAS_HEADER_ROW.getPreferredName(),
+                findFileStructureRequest.getHasHeaderRow().toString());
+        }
+        if (findFileStructureRequest.getDelimiter() != null) {
+            params.putParam(FindFileStructureRequest.DELIMITER.getPreferredName(),
+                findFileStructureRequest.getDelimiter().toString());
+        }
+        if (findFileStructureRequest.getQuote() != null) {
+            params.putParam(FindFileStructureRequest.QUOTE.getPreferredName(), findFileStructureRequest.getQuote().toString());
+        }
+        if (findFileStructureRequest.getShouldTrimFields() != null) {
+            params.putParam(FindFileStructureRequest.SHOULD_TRIM_FIELDS.getPreferredName(),
+                findFileStructureRequest.getShouldTrimFields().toString());
+        }
+        if (findFileStructureRequest.getGrokPattern() != null) {
+            params.putParam(FindFileStructureRequest.GROK_PATTERN.getPreferredName(), findFileStructureRequest.getGrokPattern());
+        }
+        if (findFileStructureRequest.getTimestampFormat() != null) {
+            params.putParam(FindFileStructureRequest.TIMESTAMP_FORMAT.getPreferredName(), findFileStructureRequest.getTimestampFormat());
+        }
+        if (findFileStructureRequest.getTimestampField() != null) {
+            params.putParam(FindFileStructureRequest.TIMESTAMP_FIELD.getPreferredName(), findFileStructureRequest.getTimestampField());
+        }
+        if (findFileStructureRequest.getExplain() != null) {
+            params.putParam(FindFileStructureRequest.EXPLAIN.getPreferredName(), findFileStructureRequest.getExplain().toString());
+        }
+
+        BytesReference sample = findFileStructureRequest.getSample();
+        BytesRef source = sample.toBytesRef();
+        HttpEntity byteEntity = new ByteArrayEntity(source.bytes, source.offset, source.length, createContentType(XContentType.JSON));
+        request.setEntity(byteEntity);
+        return request;
+    }
 }

+ 43 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java

@@ -31,6 +31,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequest;
+import org.elasticsearch.client.ml.FindFileStructureResponse;
 import org.elasticsearch.client.ml.FlushJobRequest;
 import org.elasticsearch.client.ml.FlushJobResponse;
 import org.elasticsearch.client.ml.ForecastJobRequest;
@@ -1711,4 +1713,45 @@ public final class MachineLearningClient {
             listener,
             Collections.emptySet());
     }
+
+    /**
+     * Finds the structure of a file
+     * <p>
+     * For additional info
+     * see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
+     *     ML Find File Structure documentation</a>
+     *
+     * @param request The find file structure request
+     * @param options  Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
+     * @return the response containing details of the file structure
+     * @throws IOException when there is a serialization issue sending the request or receiving the response
+     */
+    public FindFileStructureResponse findFileStructure(FindFileStructureRequest request, RequestOptions options) throws IOException {
+        return restHighLevelClient.performRequestAndParseEntity(request,
+            MLRequestConverters::findFileStructure,
+            options,
+            FindFileStructureResponse::fromXContent,
+            Collections.emptySet());
+    }
+
+    /**
+     * Finds the structure of a file asynchronously and notifies the listener on completion
+     * <p>
+     * For additional info
+     * see <a href="http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-find-file-structure.html">
+     *         ML Find File Structure documentation</a>
+     *
+     * @param request The find file structure request
+     * @param options  Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
+     * @param listener Listener to be notified upon request completion
+     */
+    public void findFileStructureAsync(FindFileStructureRequest request, RequestOptions options,
+                                       ActionListener<FindFileStructureResponse> listener) {
+        restHighLevelClient.performRequestAsyncAndParseEntity(request,
+            MLRequestConverters::findFileStructure,
+            options,
+            FindFileStructureResponse::fromXContent,
+            listener,
+            Collections.emptySet());
+    }
 }

+ 302 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/ml/FindFileStructureRequest.java

@@ -0,0 +1,302 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.Validatable;
+import org.elasticsearch.client.ValidationException;
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+public class FindFileStructureRequest implements Validatable, ToXContent {
+
+    public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
+    public static final ParseField TIMEOUT = new ParseField("timeout");
+    public static final ParseField CHARSET = FileStructure.CHARSET;
+    public static final ParseField FORMAT = FileStructure.FORMAT;
+    public static final ParseField COLUMN_NAMES = FileStructure.COLUMN_NAMES;
+    public static final ParseField HAS_HEADER_ROW = FileStructure.HAS_HEADER_ROW;
+    public static final ParseField DELIMITER = FileStructure.DELIMITER;
+    public static final ParseField QUOTE = FileStructure.QUOTE;
+    public static final ParseField SHOULD_TRIM_FIELDS = FileStructure.SHOULD_TRIM_FIELDS;
+    public static final ParseField GROK_PATTERN = FileStructure.GROK_PATTERN;
+    // This one is plural in FileStructure, but singular in FileStructureOverrides
+    public static final ParseField TIMESTAMP_FORMAT = new ParseField("timestamp_format");
+    public static final ParseField TIMESTAMP_FIELD = FileStructure.TIMESTAMP_FIELD;
+    public static final ParseField EXPLAIN = new ParseField("explain");
+
+    private Integer linesToSample;
+    private TimeValue timeout;
+    private String charset;
+    private FileStructure.Format format;
+    private List<String> columnNames;
+    private Boolean hasHeaderRow;
+    private Character delimiter;
+    private Character quote;
+    private Boolean shouldTrimFields;
+    private String grokPattern;
+    private String timestampFormat;
+    private String timestampField;
+    private Boolean explain;
+    private BytesReference sample;
+
+    public FindFileStructureRequest() {
+    }
+
+    public Integer getLinesToSample() {
+        return linesToSample;
+    }
+
+    public void setLinesToSample(Integer linesToSample) {
+        this.linesToSample = linesToSample;
+    }
+
+    public TimeValue getTimeout() {
+        return timeout;
+    }
+
+    public void setTimeout(TimeValue timeout) {
+        this.timeout = timeout;
+    }
+
+    public String getCharset() {
+        return charset;
+    }
+
+    public void setCharset(String charset) {
+        this.charset = (charset == null || charset.isEmpty()) ? null : charset;
+    }
+
+    public FileStructure.Format getFormat() {
+        return format;
+    }
+
+    public void setFormat(FileStructure.Format format) {
+        this.format = format;
+    }
+
+    public void setFormat(String format) {
+        this.format = (format == null || format.isEmpty()) ? null : FileStructure.Format.fromString(format);
+    }
+
+    public List<String> getColumnNames() {
+        return columnNames;
+    }
+
+    public void setColumnNames(List<String> columnNames) {
+        this.columnNames = (columnNames == null || columnNames.isEmpty()) ? null : columnNames;
+    }
+
+    public void setColumnNames(String[] columnNames) {
+        this.columnNames = (columnNames == null || columnNames.length == 0) ? null : Arrays.asList(columnNames);
+    }
+
+    public Boolean getHasHeaderRow() {
+        return hasHeaderRow;
+    }
+
+    public void setHasHeaderRow(Boolean hasHeaderRow) {
+        this.hasHeaderRow = hasHeaderRow;
+    }
+
+    public Character getDelimiter() {
+        return delimiter;
+    }
+
+    public void setDelimiter(Character delimiter) {
+        this.delimiter = delimiter;
+    }
+
+    public void setDelimiter(String delimiter) {
+        if (delimiter == null || delimiter.isEmpty()) {
+            this.delimiter = null;
+        } else if (delimiter.length() == 1) {
+            this.delimiter = delimiter.charAt(0);
+        } else {
+            throw new IllegalArgumentException(DELIMITER.getPreferredName() + " must be a single character");
+        }
+    }
+
+    public Character getQuote() {
+        return quote;
+    }
+
+    public void setQuote(Character quote) {
+        this.quote = quote;
+    }
+
+    public void setQuote(String quote) {
+        if (quote == null || quote.isEmpty()) {
+            this.quote = null;
+        } else if (quote.length() == 1) {
+            this.quote = quote.charAt(0);
+        } else {
+            throw new IllegalArgumentException(QUOTE.getPreferredName() + " must be a single character");
+        }
+    }
+
+    public Boolean getShouldTrimFields() {
+        return shouldTrimFields;
+    }
+
+    public void setShouldTrimFields(Boolean shouldTrimFields) {
+        this.shouldTrimFields = shouldTrimFields;
+    }
+
+    public String getGrokPattern() {
+        return grokPattern;
+    }
+
+    public void setGrokPattern(String grokPattern) {
+        this.grokPattern = (grokPattern == null || grokPattern.isEmpty()) ? null : grokPattern;
+    }
+
+    public String getTimestampFormat() {
+        return timestampFormat;
+    }
+
+    public void setTimestampFormat(String timestampFormat) {
+        this.timestampFormat = (timestampFormat == null || timestampFormat.isEmpty()) ? null : timestampFormat;
+    }
+
+    public String getTimestampField() {
+        return timestampField;
+    }
+
+    public void setTimestampField(String timestampField) {
+        this.timestampField = (timestampField == null || timestampField.isEmpty()) ? null : timestampField;
+    }
+
+    public Boolean getExplain() {
+        return explain;
+    }
+
+    public void setExplain(Boolean explain) {
+        this.explain = explain;
+    }
+
+    public BytesReference getSample() {
+        return sample;
+    }
+
+    public void setSample(byte[] sample) {
+        this.sample = new BytesArray(sample);
+    }
+
+    public void setSample(BytesReference sample) {
+        this.sample = Objects.requireNonNull(sample);
+    }
+
+    @Override
+    public Optional<ValidationException> validate() {
+        ValidationException validationException = new ValidationException();
+        if (sample == null || sample.length() == 0) {
+            validationException.addValidationError("sample must be specified");
+        }
+        return validationException.validationErrors().isEmpty() ? Optional.empty() : Optional.of(validationException);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+
+        if (linesToSample != null) {
+            builder.field(LINES_TO_SAMPLE.getPreferredName(), linesToSample);
+        }
+        if (timeout != null) {
+            builder.field(TIMEOUT.getPreferredName(), timeout);
+        }
+        if (charset != null) {
+            builder.field(CHARSET.getPreferredName(), charset);
+        }
+        if (format != null) {
+            builder.field(FORMAT.getPreferredName(), format);
+        }
+        if (columnNames != null) {
+            builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
+        }
+        if (hasHeaderRow != null) {
+            builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow);
+        }
+        if (delimiter != null) {
+            builder.field(DELIMITER.getPreferredName(), delimiter.toString());
+        }
+        if (quote != null) {
+            builder.field(QUOTE.getPreferredName(), quote.toString());
+        }
+        if (shouldTrimFields != null) {
+            builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields);
+        }
+        if (grokPattern != null) {
+            builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
+        }
+        if (timestampFormat != null) {
+            builder.field(TIMESTAMP_FORMAT.getPreferredName(), timestampFormat);
+        }
+        if (timestampField != null) {
+            builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
+        }
+        if (explain != null) {
+            builder.field(EXPLAIN.getPreferredName(), explain);
+        }
+        // Sample is not included in the X-Content representation
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(linesToSample, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern, timestampFormat,
+            timestampField, explain, sample);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        FindFileStructureRequest that = (FindFileStructureRequest) other;
+        return Objects.equals(this.linesToSample, that.linesToSample) &&
+            Objects.equals(this.timeout, that.timeout) &&
+            Objects.equals(this.charset, that.charset) &&
+            Objects.equals(this.format, that.format) &&
+            Objects.equals(this.columnNames, that.columnNames) &&
+            Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
+            Objects.equals(this.delimiter, that.delimiter) &&
+            Objects.equals(this.grokPattern, that.grokPattern) &&
+            Objects.equals(this.timestampFormat, that.timestampFormat) &&
+            Objects.equals(this.timestampField, that.timestampField) &&
+            Objects.equals(this.explain, that.explain) &&
+            Objects.equals(this.sample, that.sample);
+    }
+}

+ 70 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/ml/FindFileStructureResponse.java

@@ -0,0 +1,70 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class FindFileStructureResponse implements ToXContentObject {
+
+    private final FileStructure fileStructure;
+
+    FindFileStructureResponse(FileStructure fileStructure) {
+        this.fileStructure = Objects.requireNonNull(fileStructure);
+    }
+
+    public static FindFileStructureResponse fromXContent(XContentParser parser) throws IOException {
+        return new FindFileStructureResponse(FileStructure.PARSER.parse(parser, null).build());
+    }
+
+    public FileStructure getFileStructure() {
+        return fileStructure;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        fileStructure.toXContent(builder, params);
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(fileStructure);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        FindFileStructureResponse that = (FindFileStructureResponse) other;
+        return Objects.equals(fileStructure, that.fileStructure);
+    }
+}

+ 166 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/ml/filestructurefinder/FieldStats.java

@@ -0,0 +1,166 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.filestructurefinder;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public class FieldStats implements ToXContentObject {
+
+    public static final ParseField COUNT = new ParseField("count");
+    public static final ParseField CARDINALITY = new ParseField("cardinality");
+    public static final ParseField MIN_VALUE = new ParseField("min_value");
+    public static final ParseField MAX_VALUE = new ParseField("max_value");
+    public static final ParseField MEAN_VALUE = new ParseField("mean_value");
+    public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
+    public static final ParseField TOP_HITS = new ParseField("top_hits");
+
+    @SuppressWarnings("unchecked")
+    public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
+        a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
+            (List<Map<String, Object>>) a[6]));
+
+    static {
+        PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
+        PARSER.declareInt(ConstructingObjectParser.constructorArg(), CARDINALITY);
+        PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MIN_VALUE);
+        PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
+        PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
+        PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
+        PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
+    }
+
+    private final long count;
+    private final int cardinality;
+    private final Double minValue;
+    private final Double maxValue;
+    private final Double meanValue;
+    private final Double medianValue;
+    private final List<Map<String, Object>> topHits;
+
+    FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
+               List<Map<String, Object>> topHits) {
+        this.count = count;
+        this.cardinality = cardinality;
+        this.minValue = minValue;
+        this.maxValue = maxValue;
+        this.meanValue = meanValue;
+        this.medianValue = medianValue;
+        this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
+    }
+
+    public long getCount() {
+        return count;
+    }
+
+    public int getCardinality() {
+        return cardinality;
+    }
+
+    public Double getMinValue() {
+        return minValue;
+    }
+
+    public Double getMaxValue() {
+        return maxValue;
+    }
+
+    public Double getMeanValue() {
+        return meanValue;
+    }
+
+    public Double getMedianValue() {
+        return medianValue;
+    }
+
+    public List<Map<String, Object>> getTopHits() {
+        return topHits;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
+
+        builder.startObject();
+        builder.field(COUNT.getPreferredName(), count);
+        builder.field(CARDINALITY.getPreferredName(), cardinality);
+        if (minValue != null) {
+            builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue));
+        }
+        if (maxValue != null) {
+            builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue));
+        }
+        if (meanValue != null) {
+            builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue));
+        }
+        if (medianValue != null) {
+            builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
+        }
+        if (topHits.isEmpty() == false) {
+            builder.field(TOP_HITS.getPreferredName(), topHits);
+        }
+        builder.endObject();
+
+        return builder;
+    }
+
+    static Number toIntegerIfInteger(double d) {
+
+        if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) {
+            return (int) d;
+        }
+
+        return d;
+    }
+
+    @Override
+    public int hashCode() {
+
+        return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        FieldStats that = (FieldStats) other;
+        return this.count == that.count &&
+            this.cardinality == that.cardinality &&
+            Objects.equals(this.minValue, that.minValue) &&
+            Objects.equals(this.maxValue, that.maxValue) &&
+            Objects.equals(this.meanValue, that.meanValue) &&
+            Objects.equals(this.medianValue, that.medianValue) &&
+            Objects.equals(this.topHits, that.topHits);
+    }
+}

+ 516 - 0
client/rest-high-level/src/main/java/org/elasticsearch/client/ml/filestructurefinder/FileStructure.java

@@ -0,0 +1,516 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.filestructurefinder;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Stores the file format determined by Machine Learning.
+ */
+public class FileStructure implements ToXContentObject {
+
+    public enum Format {
+
+        NDJSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
+
+        public static Format fromString(String name) {
+            return valueOf(name.trim().toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    public static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed");
+    public static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed");
+    public static final ParseField SAMPLE_START = new ParseField("sample_start");
+    public static final ParseField CHARSET = new ParseField("charset");
+    public static final ParseField HAS_BYTE_ORDER_MARKER = new ParseField("has_byte_order_marker");
+    public static final ParseField FORMAT = new ParseField("format");
+    public static final ParseField MULTILINE_START_PATTERN = new ParseField("multiline_start_pattern");
+    public static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
+    public static final ParseField COLUMN_NAMES = new ParseField("column_names");
+    public static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
+    public static final ParseField DELIMITER = new ParseField("delimiter");
+    public static final ParseField QUOTE = new ParseField("quote");
+    public static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
+    public static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
+    public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
+    public static final ParseField JODA_TIMESTAMP_FORMATS = new ParseField("joda_timestamp_formats");
+    public static final ParseField JAVA_TIMESTAMP_FORMATS = new ParseField("java_timestamp_formats");
+    public static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone");
+    public static final ParseField MAPPINGS = new ParseField("mappings");
+    public static final ParseField INGEST_PIPELINE = new ParseField("ingest_pipeline");
+    public static final ParseField FIELD_STATS = new ParseField("field_stats");
+    public static final ParseField EXPLANATION = new ParseField("explanation");
+
+    public static final ObjectParser<Builder, Void> PARSER = new ObjectParser<>("file_structure", true, Builder::new);
+
+    static {
+        PARSER.declareInt(Builder::setNumLinesAnalyzed, NUM_LINES_ANALYZED);
+        PARSER.declareInt(Builder::setNumMessagesAnalyzed, NUM_MESSAGES_ANALYZED);
+        PARSER.declareString(Builder::setSampleStart, SAMPLE_START);
+        PARSER.declareString(Builder::setCharset, CHARSET);
+        PARSER.declareBoolean(Builder::setHasByteOrderMarker, HAS_BYTE_ORDER_MARKER);
+        PARSER.declareString((p, c) -> p.setFormat(Format.fromString(c)), FORMAT);
+        PARSER.declareString(Builder::setMultilineStartPattern, MULTILINE_START_PATTERN);
+        PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
+        PARSER.declareStringArray(Builder::setColumnNames, COLUMN_NAMES);
+        PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
+        PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
+        PARSER.declareString((p, c) -> p.setQuote(c.charAt(0)), QUOTE);
+        PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
+        PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
+        PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
+        PARSER.declareStringArray(Builder::setJodaTimestampFormats, JODA_TIMESTAMP_FORMATS);
+        PARSER.declareStringArray(Builder::setJavaTimestampFormats, JAVA_TIMESTAMP_FORMATS);
+        PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE);
+        PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS);
+        PARSER.declareObject(Builder::setIngestPipeline, (p, c) -> p.mapOrdered(), INGEST_PIPELINE);
+        PARSER.declareObject(Builder::setFieldStats, (p, c) -> {
+            Map<String, FieldStats> fieldStats = new TreeMap<>();
+            while (p.nextToken() == XContentParser.Token.FIELD_NAME) {
+                fieldStats.put(p.currentName(), FieldStats.PARSER.apply(p, c));
+            }
+            return fieldStats;
+        }, FIELD_STATS);
+        PARSER.declareStringArray(Builder::setExplanation, EXPLANATION);
+    }
+
+    private final int numLinesAnalyzed;
+    private final int numMessagesAnalyzed;
+    private final String sampleStart;
+    private final String charset;
+    private final Boolean hasByteOrderMarker;
+    private final Format format;
+    private final String multilineStartPattern;
+    private final String excludeLinesPattern;
+    private final List<String> columnNames;
+    private final Boolean hasHeaderRow;
+    private final Character delimiter;
+    private final Character quote;
+    private final Boolean shouldTrimFields;
+    private final String grokPattern;
+    private final List<String> jodaTimestampFormats;
+    private final List<String> javaTimestampFormats;
+    private final String timestampField;
+    private final boolean needClientTimezone;
+    private final SortedMap<String, Object> mappings;
+    private final Map<String, Object> ingestPipeline;
+    private final SortedMap<String, FieldStats> fieldStats;
+    private final List<String> explanation;
+
+    private FileStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
+                          Format format, String multilineStartPattern, String excludeLinesPattern, List<String> columnNames,
+                          Boolean hasHeaderRow, Character delimiter, Character quote, Boolean shouldTrimFields, String grokPattern,
+                          String timestampField, List<String> jodaTimestampFormats, List<String> javaTimestampFormats,
+                          boolean needClientTimezone, Map<String, Object> mappings, Map<String, Object> ingestPipeline,
+                          Map<String, FieldStats> fieldStats, List<String> explanation) {
+
+        this.numLinesAnalyzed = numLinesAnalyzed;
+        this.numMessagesAnalyzed = numMessagesAnalyzed;
+        this.sampleStart = Objects.requireNonNull(sampleStart);
+        this.charset = Objects.requireNonNull(charset);
+        this.hasByteOrderMarker = hasByteOrderMarker;
+        this.format = Objects.requireNonNull(format);
+        this.multilineStartPattern = multilineStartPattern;
+        this.excludeLinesPattern = excludeLinesPattern;
+        this.columnNames = (columnNames == null) ? null : Collections.unmodifiableList(new ArrayList<>(columnNames));
+        this.hasHeaderRow = hasHeaderRow;
+        this.delimiter = delimiter;
+        this.quote = quote;
+        this.shouldTrimFields = shouldTrimFields;
+        this.grokPattern = grokPattern;
+        this.timestampField = timestampField;
+        this.jodaTimestampFormats =
+            (jodaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(jodaTimestampFormats));
+        this.javaTimestampFormats =
+            (javaTimestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(javaTimestampFormats));
+        this.needClientTimezone = needClientTimezone;
+        this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings));
+        this.ingestPipeline = (ingestPipeline == null) ? null : Collections.unmodifiableMap(new LinkedHashMap<>(ingestPipeline));
+        this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats));
+        this.explanation = (explanation == null) ? null : Collections.unmodifiableList(new ArrayList<>(explanation));
+    }
+
+    public int getNumLinesAnalyzed() {
+        return numLinesAnalyzed;
+    }
+
+    public int getNumMessagesAnalyzed() {
+        return numMessagesAnalyzed;
+    }
+
+    public String getSampleStart() {
+        return sampleStart;
+    }
+
+    public String getCharset() {
+        return charset;
+    }
+
+    public Boolean getHasByteOrderMarker() {
+        return hasByteOrderMarker;
+    }
+
+    public Format getFormat() {
+        return format;
+    }
+
+    public String getMultilineStartPattern() {
+        return multilineStartPattern;
+    }
+
+    public String getExcludeLinesPattern() {
+        return excludeLinesPattern;
+    }
+
+    public List<String> getColumnNames() {
+        return columnNames;
+    }
+
+    public Boolean getHasHeaderRow() {
+        return hasHeaderRow;
+    }
+
+    public Character getDelimiter() {
+        return delimiter;
+    }
+
+    public Character getQuote() {
+        return quote;
+    }
+
+    public Boolean getShouldTrimFields() {
+        return shouldTrimFields;
+    }
+
+    public String getGrokPattern() {
+        return grokPattern;
+    }
+
+    public String getTimestampField() {
+        return timestampField;
+    }
+
+    public List<String> getJodaTimestampFormats() {
+        return jodaTimestampFormats;
+    }
+
+    public List<String> getJavaTimestampFormats() {
+        return javaTimestampFormats;
+    }
+
+    public boolean needClientTimezone() {
+        return needClientTimezone;
+    }
+
+    public SortedMap<String, Object> getMappings() {
+        return mappings;
+    }
+
+    public Map<String, Object> getIngestPipeline() {
+        return ingestPipeline;
+    }
+
+    public SortedMap<String, FieldStats> getFieldStats() {
+        return fieldStats;
+    }
+
+    public List<String> getExplanation() {
+        return explanation;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+
+        builder.startObject();
+        builder.field(NUM_LINES_ANALYZED.getPreferredName(), numLinesAnalyzed);
+        builder.field(NUM_MESSAGES_ANALYZED.getPreferredName(), numMessagesAnalyzed);
+        builder.field(SAMPLE_START.getPreferredName(), sampleStart);
+        builder.field(CHARSET.getPreferredName(), charset);
+        if (hasByteOrderMarker != null) {
+            builder.field(HAS_BYTE_ORDER_MARKER.getPreferredName(), hasByteOrderMarker.booleanValue());
+        }
+        builder.field(FORMAT.getPreferredName(), format);
+        if (multilineStartPattern != null && multilineStartPattern.isEmpty() == false) {
+            builder.field(MULTILINE_START_PATTERN.getPreferredName(), multilineStartPattern);
+        }
+        if (excludeLinesPattern != null && excludeLinesPattern.isEmpty() == false) {
+            builder.field(EXCLUDE_LINES_PATTERN.getPreferredName(), excludeLinesPattern);
+        }
+        if (columnNames != null && columnNames.isEmpty() == false) {
+            builder.field(COLUMN_NAMES.getPreferredName(), columnNames);
+        }
+        if (hasHeaderRow != null) {
+            builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
+        }
+        if (delimiter != null) {
+            builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
+        }
+        if (quote != null) {
+            builder.field(QUOTE.getPreferredName(), String.valueOf(quote));
+        }
+        if (shouldTrimFields != null) {
+            builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
+        }
+        if (grokPattern != null && grokPattern.isEmpty() == false) {
+            builder.field(GROK_PATTERN.getPreferredName(), grokPattern);
+        }
+        if (timestampField != null && timestampField.isEmpty() == false) {
+            builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField);
+        }
+        if (jodaTimestampFormats != null && jodaTimestampFormats.isEmpty() == false) {
+            builder.field(JODA_TIMESTAMP_FORMATS.getPreferredName(), jodaTimestampFormats);
+        }
+        if (javaTimestampFormats != null && javaTimestampFormats.isEmpty() == false) {
+            builder.field(JAVA_TIMESTAMP_FORMATS.getPreferredName(), javaTimestampFormats);
+        }
+        builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone);
+        builder.field(MAPPINGS.getPreferredName(), mappings);
+        if (ingestPipeline != null) {
+            builder.field(INGEST_PIPELINE.getPreferredName(), ingestPipeline);
+        }
+        if (fieldStats.isEmpty() == false) {
+            builder.startObject(FIELD_STATS.getPreferredName());
+            for (Map.Entry<String, FieldStats> entry : fieldStats.entrySet()) {
+                builder.field(entry.getKey(), entry.getValue());
+            }
+            builder.endObject();
+        }
+        if (explanation != null && explanation.isEmpty() == false) {
+            builder.field(EXPLANATION.getPreferredName(), explanation);
+        }
+        builder.endObject();
+
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+
+        return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
+            multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
+            timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, fieldStats, explanation);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        FileStructure that = (FileStructure) other;
+        return this.numLinesAnalyzed == that.numLinesAnalyzed &&
+            this.numMessagesAnalyzed == that.numMessagesAnalyzed &&
+            Objects.equals(this.sampleStart, that.sampleStart) &&
+            Objects.equals(this.charset, that.charset) &&
+            Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) &&
+            Objects.equals(this.format, that.format) &&
+            Objects.equals(this.multilineStartPattern, that.multilineStartPattern) &&
+            Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
+            Objects.equals(this.columnNames, that.columnNames) &&
+            Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
+            Objects.equals(this.delimiter, that.delimiter) &&
+            Objects.equals(this.quote, that.quote) &&
+            Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
+            Objects.equals(this.grokPattern, that.grokPattern) &&
+            Objects.equals(this.timestampField, that.timestampField) &&
+            Objects.equals(this.jodaTimestampFormats, that.jodaTimestampFormats) &&
+            Objects.equals(this.javaTimestampFormats, that.javaTimestampFormats) &&
+            this.needClientTimezone == that.needClientTimezone &&
+            Objects.equals(this.mappings, that.mappings) &&
+            Objects.equals(this.fieldStats, that.fieldStats) &&
+            Objects.equals(this.explanation, that.explanation);
+    }
+
+    public static class Builder {
+
+        private int numLinesAnalyzed;
+        private int numMessagesAnalyzed;
+        private String sampleStart;
+        private String charset;
+        private Boolean hasByteOrderMarker;
+        private Format format;
+        private String multilineStartPattern;
+        private String excludeLinesPattern;
+        private List<String> columnNames;
+        private Boolean hasHeaderRow;
+        private Character delimiter;
+        private Character quote;
+        private Boolean shouldTrimFields;
+        private String grokPattern;
+        private String timestampField;
+        private List<String> jodaTimestampFormats;
+        private List<String> javaTimestampFormats;
+        private boolean needClientTimezone;
+        private Map<String, Object> mappings = Collections.emptyMap();
+        private Map<String, Object> ingestPipeline;
+        private Map<String, FieldStats> fieldStats = Collections.emptyMap();
+        private List<String> explanation;
+
+        Builder() {
+            this(Format.SEMI_STRUCTURED_TEXT);
+        }
+
+        Builder(Format format) {
+            setFormat(format);
+        }
+
+        Builder setNumLinesAnalyzed(int numLinesAnalyzed) {
+            this.numLinesAnalyzed = numLinesAnalyzed;
+            return this;
+        }
+
+        Builder setNumMessagesAnalyzed(int numMessagesAnalyzed) {
+            this.numMessagesAnalyzed = numMessagesAnalyzed;
+            return this;
+        }
+
+        Builder setSampleStart(String sampleStart) {
+            this.sampleStart = Objects.requireNonNull(sampleStart);
+            return this;
+        }
+
+        Builder setCharset(String charset) {
+            this.charset = Objects.requireNonNull(charset);
+            return this;
+        }
+
+        Builder setHasByteOrderMarker(Boolean hasByteOrderMarker) {
+            this.hasByteOrderMarker = hasByteOrderMarker;
+            return this;
+        }
+
+        Builder setFormat(Format format) {
+            this.format = Objects.requireNonNull(format);
+            return this;
+        }
+
+        Builder setMultilineStartPattern(String multilineStartPattern) {
+            this.multilineStartPattern = multilineStartPattern;
+            return this;
+        }
+
+        Builder setExcludeLinesPattern(String excludeLinesPattern) {
+            this.excludeLinesPattern = excludeLinesPattern;
+            return this;
+        }
+
+        Builder setColumnNames(List<String> columnNames) {
+            this.columnNames = columnNames;
+            return this;
+        }
+
+        Builder setHasHeaderRow(Boolean hasHeaderRow) {
+            this.hasHeaderRow = hasHeaderRow;
+            return this;
+        }
+
+        Builder setDelimiter(Character delimiter) {
+            this.delimiter = delimiter;
+            return this;
+        }
+
+        Builder setQuote(Character quote) {
+            this.quote = quote;
+            return this;
+        }
+
+        Builder setShouldTrimFields(Boolean shouldTrimFields) {
+            this.shouldTrimFields = shouldTrimFields;
+            return this;
+        }
+
+        Builder setGrokPattern(String grokPattern) {
+            this.grokPattern = grokPattern;
+            return this;
+        }
+
+        Builder setTimestampField(String timestampField) {
+            this.timestampField = timestampField;
+            return this;
+        }
+
+        Builder setJodaTimestampFormats(List<String> jodaTimestampFormats) {
+            this.jodaTimestampFormats = jodaTimestampFormats;
+            return this;
+        }
+
+        Builder setJavaTimestampFormats(List<String> javaTimestampFormats) {
+            this.javaTimestampFormats = javaTimestampFormats;
+            return this;
+        }
+
+        Builder setNeedClientTimezone(boolean needClientTimezone) {
+            this.needClientTimezone = needClientTimezone;
+            return this;
+        }
+
+        Builder setMappings(Map<String, Object> mappings) {
+            this.mappings = Objects.requireNonNull(mappings);
+            return this;
+        }
+
+        Builder setIngestPipeline(Map<String, Object> ingestPipeline) {
+            this.ingestPipeline = ingestPipeline;
+            return this;
+        }
+
+        Builder setFieldStats(Map<String, FieldStats> fieldStats) {
+            this.fieldStats = Objects.requireNonNull(fieldStats);
+            return this;
+        }
+
+        Builder setExplanation(List<String> explanation) {
+            this.explanation = explanation;
+            return this;
+        }
+
+        public FileStructure build() {
+
+            return new FileStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
+                multilineStartPattern, excludeLinesPattern, columnNames, hasHeaderRow, delimiter, quote, shouldTrimFields, grokPattern,
+                timestampField, jodaTimestampFormats, javaTimestampFormats, needClientTimezone, mappings, ingestPipeline, fieldStats,
+                explanation);
+        }
+    }
+}

+ 83 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java

@@ -32,6 +32,8 @@ import org.elasticsearch.client.ml.DeleteFilterRequest;
 import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequestTests;
 import org.elasticsearch.client.ml.FlushJobRequest;
 import org.elasticsearch.client.ml.ForecastJobRequest;
 import org.elasticsearch.client.ml.GetBucketsRequest;
@@ -69,6 +71,7 @@ import org.elasticsearch.client.ml.calendars.ScheduledEvent;
 import org.elasticsearch.client.ml.calendars.ScheduledEventTests;
 import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
 import org.elasticsearch.client.ml.datafeed.DatafeedConfigTests;
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
 import org.elasticsearch.client.ml.job.config.AnalysisConfig;
 import org.elasticsearch.client.ml.job.config.Detector;
 import org.elasticsearch.client.ml.job.config.Job;
@@ -87,6 +90,7 @@ import org.elasticsearch.test.ESTestCase;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -715,6 +719,85 @@ public class MLRequestConvertersTests extends ESTestCase {
         assertNull(request.getEntity());
     }
 
+    public void testFindFileStructure() throws Exception {
+
+        String sample = randomAlphaOfLength(randomIntBetween(1000, 2000));
+        FindFileStructureRequest findFileStructureRequest = FindFileStructureRequestTests.createTestRequestWithoutSample();
+        findFileStructureRequest.setSample(sample.getBytes(StandardCharsets.UTF_8));
+        Request request = MLRequestConverters.findFileStructure(findFileStructureRequest);
+
+        assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+        assertEquals("/_xpack/ml/find_file_structure", request.getEndpoint());
+        if (findFileStructureRequest.getLinesToSample() != null) {
+            assertEquals(findFileStructureRequest.getLinesToSample(), Integer.valueOf(request.getParameters().get("lines_to_sample")));
+        } else {
+            assertNull(request.getParameters().get("lines_to_sample"));
+        }
+        if (findFileStructureRequest.getTimeout() != null) {
+            assertEquals(findFileStructureRequest.getTimeout().toString(), request.getParameters().get("timeout"));
+        } else {
+            assertNull(request.getParameters().get("timeout"));
+        }
+        if (findFileStructureRequest.getCharset() != null) {
+            assertEquals(findFileStructureRequest.getCharset(), request.getParameters().get("charset"));
+        } else {
+            assertNull(request.getParameters().get("charset"));
+        }
+        if (findFileStructureRequest.getFormat() != null) {
+            assertEquals(findFileStructureRequest.getFormat(), FileStructure.Format.fromString(request.getParameters().get("format")));
+        } else {
+            assertNull(request.getParameters().get("format"));
+        }
+        if (findFileStructureRequest.getColumnNames() != null) {
+            assertEquals(findFileStructureRequest.getColumnNames(),
+                Arrays.asList(Strings.splitStringByCommaToArray(request.getParameters().get("column_names"))));
+        } else {
+            assertNull(request.getParameters().get("column_names"));
+        }
+        if (findFileStructureRequest.getHasHeaderRow() != null) {
+            assertEquals(findFileStructureRequest.getHasHeaderRow(), Boolean.valueOf(request.getParameters().get("has_header_row")));
+        } else {
+            assertNull(request.getParameters().get("has_header_row"));
+        }
+        if (findFileStructureRequest.getDelimiter() != null) {
+            assertEquals(findFileStructureRequest.getDelimiter().toString(), request.getParameters().get("delimiter"));
+        } else {
+            assertNull(request.getParameters().get("delimiter"));
+        }
+        if (findFileStructureRequest.getQuote() != null) {
+            assertEquals(findFileStructureRequest.getQuote().toString(), request.getParameters().get("quote"));
+        } else {
+            assertNull(request.getParameters().get("quote"));
+        }
+        if (findFileStructureRequest.getShouldTrimFields() != null) {
+            assertEquals(findFileStructureRequest.getShouldTrimFields(),
+                Boolean.valueOf(request.getParameters().get("should_trim_fields")));
+        } else {
+            assertNull(request.getParameters().get("should_trim_fields"));
+        }
+        if (findFileStructureRequest.getGrokPattern() != null) {
+            assertEquals(findFileStructureRequest.getGrokPattern(), request.getParameters().get("grok_pattern"));
+        } else {
+            assertNull(request.getParameters().get("grok_pattern"));
+        }
+        if (findFileStructureRequest.getTimestampFormat() != null) {
+            assertEquals(findFileStructureRequest.getTimestampFormat(), request.getParameters().get("timestamp_format"));
+        } else {
+            assertNull(request.getParameters().get("timestamp_format"));
+        }
+        if (findFileStructureRequest.getTimestampField() != null) {
+            assertEquals(findFileStructureRequest.getTimestampField(), request.getParameters().get("timestamp_field"));
+        } else {
+            assertNull(request.getParameters().get("timestamp_field"));
+        }
+        if (findFileStructureRequest.getExplain() != null) {
+            assertEquals(findFileStructureRequest.getExplain(), Boolean.valueOf(request.getParameters().get("explain")));
+        } else {
+            assertNull(request.getParameters().get("explain"));
+        }
+        assertEquals(sample, requestEntityToString(request));
+    }
+
     private static Job createValidJob(String jobId) {
         AnalysisConfig.Builder analysisConfig = AnalysisConfig.builder(Collections.singletonList(
                 Detector.builder().setFunction("count").build()));

+ 44 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java

@@ -38,6 +38,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequest;
+import org.elasticsearch.client.ml.FindFileStructureResponse;
 import org.elasticsearch.client.ml.FlushJobRequest;
 import org.elasticsearch.client.ml.FlushJobResponse;
 import org.elasticsearch.client.ml.ForecastJobRequest;
@@ -94,6 +96,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
 import org.elasticsearch.client.ml.datafeed.DatafeedState;
 import org.elasticsearch.client.ml.datafeed.DatafeedStats;
 import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
 import org.elasticsearch.client.ml.job.config.AnalysisConfig;
 import org.elasticsearch.client.ml.job.config.DataDescription;
 import org.elasticsearch.client.ml.job.config.Detector;
@@ -110,11 +113,13 @@ import org.elasticsearch.rest.RestStatus;
 import org.junit.After;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
@@ -1306,4 +1311,43 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
             assertEquals(snapshotId, model.getSnapshotId());
         }
     }
+
+    public void testFindFileStructure() throws IOException {
+
+        String sample = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
+                "\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
+                "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
+            "{\"logger\":\"controller\",\"timestamp\":1478261151445," +
+                "\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
+                "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
+
+        MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
+
+        FindFileStructureRequest request = new FindFileStructureRequest();
+        request.setSample(sample.getBytes(StandardCharsets.UTF_8));
+
+        FindFileStructureResponse response =
+            execute(request, machineLearningClient::findFileStructure, machineLearningClient::findFileStructureAsync);
+
+        FileStructure structure = response.getFileStructure();
+
+        assertEquals(2, structure.getNumLinesAnalyzed());
+        assertEquals(2, structure.getNumMessagesAnalyzed());
+        assertEquals(sample, structure.getSampleStart());
+        assertEquals(FileStructure.Format.NDJSON, structure.getFormat());
+        assertEquals(StandardCharsets.UTF_8.displayName(Locale.ROOT), structure.getCharset());
+        assertFalse(structure.getHasByteOrderMarker());
+        assertNull(structure.getMultilineStartPattern());
+        assertNull(structure.getExcludeLinesPattern());
+        assertNull(structure.getColumnNames());
+        assertNull(structure.getHasHeaderRow());
+        assertNull(structure.getDelimiter());
+        assertNull(structure.getQuote());
+        assertNull(structure.getShouldTrimFields());
+        assertNull(structure.getGrokPattern());
+        assertEquals(Collections.singletonList("UNIX_MS"), structure.getJavaTimestampFormats());
+        assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
+        assertEquals("timestamp", structure.getTimestampField());
+        assertFalse(structure.needClientTimezone());
+    }
 }

+ 68 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java

@@ -44,6 +44,8 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
+import org.elasticsearch.client.ml.FindFileStructureRequest;
+import org.elasticsearch.client.ml.FindFileStructureResponse;
 import org.elasticsearch.client.ml.FlushJobRequest;
 import org.elasticsearch.client.ml.FlushJobResponse;
 import org.elasticsearch.client.ml.ForecastJobRequest;
@@ -110,6 +112,7 @@ import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
 import org.elasticsearch.client.ml.datafeed.DatafeedStats;
 import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
 import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
 import org.elasticsearch.client.ml.job.config.AnalysisConfig;
 import org.elasticsearch.client.ml.job.config.AnalysisLimits;
 import org.elasticsearch.client.ml.job.config.DataDescription;
@@ -140,6 +143,9 @@ import org.elasticsearch.tasks.TaskId;
 import org.junit.After;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Date;
@@ -1730,6 +1736,68 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
         }
     }
 
+    public void testFindFileStructure() throws Exception {
+        RestHighLevelClient client = highLevelClient();
+
+        Path anInterestingFile = createTempFile();
+        String contents = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
+                "\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
+                "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
+            "{\"logger\":\"controller\",\"timestamp\":1478261151445," +
+                "\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
+                "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
+        Files.write(anInterestingFile, Collections.singleton(contents), StandardCharsets.UTF_8);
+
+        {
+            // tag::find-file-structure-request
+            FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest(); // <1>
+            findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile)); // <2>
+            // end::find-file-structure-request
+
+            // tag::find-file-structure-request-options
+            findFileStructureRequest.setLinesToSample(500); // <1>
+            findFileStructureRequest.setExplain(true); // <2>
+            // end::find-file-structure-request-options
+
+            // tag::find-file-structure-execute
+            FindFileStructureResponse findFileStructureResponse =
+                client.machineLearning().findFileStructure(findFileStructureRequest, RequestOptions.DEFAULT);
+            // end::find-file-structure-execute
+
+            // tag::find-file-structure-response
+            FileStructure structure = findFileStructureResponse.getFileStructure(); // <1>
+            // end::find-file-structure-response
+            assertEquals(2, structure.getNumLinesAnalyzed());
+        }
+        {
+            // tag::find-file-structure-execute-listener
+            ActionListener<FindFileStructureResponse> listener = new ActionListener<FindFileStructureResponse>() {
+                @Override
+                public void onResponse(FindFileStructureResponse findFileStructureResponse) {
+                    // <1>
+                }
+
+                @Override
+                public void onFailure(Exception e) {
+                    // <2>
+                }
+            };
+            // end::find-file-structure-execute-listener
+            FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
+            findFileStructureRequest.setSample(Files.readAllBytes(anInterestingFile));
+
+            // Replace the empty listener by a blocking listener in test
+            final CountDownLatch latch = new CountDownLatch(1);
+            listener = new LatchedActionListener<>(listener, latch);
+
+            // tag::find-file-structure-execute-async
+            client.machineLearning().findFileStructureAsync(findFileStructureRequest, RequestOptions.DEFAULT, listener); // <1>
+            // end::find-file-structure-execute-async
+
+            assertTrue(latch.await(30L, TimeUnit.SECONDS));
+        }
+    }
+
     public void testGetInfluencers() throws IOException, InterruptedException {
         RestHighLevelClient client = highLevelClient();
 

+ 114 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/FindFileStructureRequestTests.java

@@ -0,0 +1,114 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+public class FindFileStructureRequestTests extends AbstractXContentTestCase<FindFileStructureRequest> {
+
+    private static final ObjectParser<FindFileStructureRequest, Void> PARSER =
+        new ObjectParser<>("find_file_structure_request", FindFileStructureRequest::new);
+
+    static {
+        PARSER.declareInt(FindFileStructureRequest::setLinesToSample, FindFileStructureRequest.LINES_TO_SAMPLE);
+        PARSER.declareString((p, c) -> p.setTimeout(TimeValue.parseTimeValue(c, FindFileStructureRequest.TIMEOUT.getPreferredName())),
+            FindFileStructureRequest.TIMEOUT);
+        PARSER.declareString(FindFileStructureRequest::setCharset, FindFileStructureRequest.CHARSET);
+        PARSER.declareString(FindFileStructureRequest::setFormat, FindFileStructureRequest.FORMAT);
+        PARSER.declareStringArray(FindFileStructureRequest::setColumnNames, FindFileStructureRequest.COLUMN_NAMES);
+        PARSER.declareBoolean(FindFileStructureRequest::setHasHeaderRow, FindFileStructureRequest.HAS_HEADER_ROW);
+        PARSER.declareString(FindFileStructureRequest::setDelimiter, FindFileStructureRequest.DELIMITER);
+        PARSER.declareString(FindFileStructureRequest::setQuote, FindFileStructureRequest.QUOTE);
+        PARSER.declareBoolean(FindFileStructureRequest::setShouldTrimFields, FindFileStructureRequest.SHOULD_TRIM_FIELDS);
+        PARSER.declareString(FindFileStructureRequest::setGrokPattern, FindFileStructureRequest.GROK_PATTERN);
+        PARSER.declareString(FindFileStructureRequest::setTimestampFormat, FindFileStructureRequest.TIMESTAMP_FORMAT);
+        PARSER.declareString(FindFileStructureRequest::setTimestampField, FindFileStructureRequest.TIMESTAMP_FIELD);
+        PARSER.declareBoolean(FindFileStructureRequest::setExplain, FindFileStructureRequest.EXPLAIN);
+        // Sample is not included in the X-Content representation
+    }
+
+    @Override
+    protected FindFileStructureRequest doParseInstance(XContentParser parser) throws IOException {
+        return PARSER.apply(parser, null);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return false;
+    }
+
+    @Override
+    protected FindFileStructureRequest createTestInstance() {
+        return createTestRequestWithoutSample();
+    }
+
+    public static FindFileStructureRequest createTestRequestWithoutSample() {
+
+        FindFileStructureRequest findFileStructureRequest = new FindFileStructureRequest();
+        if (randomBoolean()) {
+            findFileStructureRequest.setLinesToSample(randomIntBetween(1000, 2000));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setTimeout(TimeValue.timeValueSeconds(randomIntBetween(10, 20)));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setCharset(Charset.defaultCharset().toString());
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setFormat(randomFrom(FileStructure.Format.values()));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setHasHeaderRow(randomBoolean());
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setDelimiter(randomAlphaOfLength(1));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setQuote(randomAlphaOfLength(1));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setShouldTrimFields(randomBoolean());
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setGrokPattern(randomAlphaOfLength(100));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setTimestampFormat(randomAlphaOfLength(10));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setTimestampField(randomAlphaOfLength(10));
+        }
+        if (randomBoolean()) {
+            findFileStructureRequest.setExplain(randomBoolean());
+        }
+
+        return findFileStructureRequest;
+    }
+}

+ 49 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/FindFileStructureResponseTests.java

@@ -0,0 +1,49 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.filestructurefinder.FileStructureTests;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.function.Predicate;
+
+public class FindFileStructureResponseTests extends AbstractXContentTestCase<FindFileStructureResponse> {
+
+    @Override
+    protected FindFileStructureResponse createTestInstance() {
+        return new FindFileStructureResponse(FileStructureTests.createTestFileStructure());
+    }
+
+    @Override
+    protected FindFileStructureResponse doParseInstance(XContentParser parser) throws IOException {
+        return FindFileStructureResponse.fromXContent(parser);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+
+    @Override
+    protected Predicate<String> getRandomFieldsExcludeFilter() {
+        return field -> !field.isEmpty();
+    }
+}

+ 88 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/filestructurefinder/FieldStatsTests.java

@@ -0,0 +1,88 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.filestructurefinder;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Predicate;
+
+public class FieldStatsTests extends AbstractXContentTestCase<FieldStats> {
+
+    @Override
+    protected FieldStats createTestInstance() {
+        return createTestFieldStats();
+    }
+
+    static FieldStats createTestFieldStats() {
+
+        long count = randomIntBetween(1, 100000);
+        int cardinality = randomIntBetween(1, (int) count);
+
+        Double minValue = null;
+        Double maxValue = null;
+        Double meanValue = null;
+        Double medianValue = null;
+        boolean isMetric = randomBoolean();
+        if (isMetric) {
+            if (randomBoolean()) {
+                minValue = randomDouble();
+                maxValue = randomDouble();
+            } else {
+                minValue = (double) randomInt();
+                maxValue = (double) randomInt();
+            }
+            meanValue = randomDouble();
+            medianValue = randomDouble();
+        }
+
+        List<Map<String, Object>> topHits = new ArrayList<>();
+        for (int i = 0; i < Math.min(10, cardinality); ++i) {
+            Map<String, Object> topHit = new LinkedHashMap<>();
+            if (isMetric) {
+                topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt());
+            } else {
+                topHit.put("value", randomAlphaOfLength(20));
+            }
+            topHit.put("count", randomIntBetween(1, cardinality));
+            topHits.add(topHit);
+        }
+
+        return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
+    }
+
+    @Override
+    protected FieldStats doParseInstance(XContentParser parser) {
+        return FieldStats.PARSER.apply(parser, null);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+
+    @Override
+    protected Predicate<String> getRandomFieldsExcludeFilter() {
+        return field -> field.contains(FieldStats.TOP_HITS.getPreferredName());
+    }
+}

+ 127 - 0
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/filestructurefinder/FileStructureTests.java

@@ -0,0 +1,127 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.filestructurefinder;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.LinkedHashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.function.Predicate;
+
+public class FileStructureTests extends AbstractXContentTestCase<FileStructure> {
+
+    @Override
+    protected FileStructure createTestInstance() {
+        return createTestFileStructure();
+    }
+
+    public static FileStructure createTestFileStructure() {
+
+        FileStructure.Format format = randomFrom(EnumSet.allOf(FileStructure.Format.class));
+
+        FileStructure.Builder builder = new FileStructure.Builder(format);
+
+        int numLinesAnalyzed = randomIntBetween(2, 10000);
+        builder.setNumLinesAnalyzed(numLinesAnalyzed);
+        int numMessagesAnalyzed = randomIntBetween(1, numLinesAnalyzed);
+        builder.setNumMessagesAnalyzed(numMessagesAnalyzed);
+        builder.setSampleStart(randomAlphaOfLength(1000));
+
+        String charset = randomFrom(Charset.availableCharsets().keySet());
+        builder.setCharset(charset);
+        if (charset.toUpperCase(Locale.ROOT).startsWith("UTF")) {
+            builder.setHasByteOrderMarker(randomBoolean());
+        }
+
+        if (numMessagesAnalyzed < numLinesAnalyzed) {
+            builder.setMultilineStartPattern(randomAlphaOfLength(100));
+        }
+        if (randomBoolean()) {
+            builder.setExcludeLinesPattern(randomAlphaOfLength(100));
+        }
+
+        if (format == FileStructure.Format.DELIMITED) {
+            builder.setColumnNames(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
+            builder.setHasHeaderRow(randomBoolean());
+            builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
+            builder.setQuote(randomFrom('"', '\''));
+        }
+
+        if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT) {
+            builder.setGrokPattern(randomAlphaOfLength(100));
+        }
+
+        if (format == FileStructure.Format.SEMI_STRUCTURED_TEXT || randomBoolean()) {
+            builder.setTimestampField(randomAlphaOfLength(10));
+            builder.setJodaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
+            builder.setJavaTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false)));
+            builder.setNeedClientTimezone(randomBoolean());
+        }
+
+        Map<String, Object> mappings = new TreeMap<>();
+        for (String field : generateRandomStringArray(5, 20, false, false)) {
+            mappings.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
+        }
+        builder.setMappings(mappings);
+
+        if (randomBoolean()) {
+            Map<String, Object> ingestPipeline = new LinkedHashMap<>();
+            for (String field : generateRandomStringArray(5, 20, false, false)) {
+                ingestPipeline.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10)));
+            }
+            builder.setMappings(ingestPipeline);
+        }
+
+        if (randomBoolean()) {
+            Map<String, FieldStats> fieldStats = new TreeMap<>();
+            for (String field : generateRandomStringArray(5, 20, false, false)) {
+                fieldStats.put(field, FieldStatsTests.createTestFieldStats());
+            }
+            builder.setFieldStats(fieldStats);
+        }
+
+        builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false)));
+
+        return builder.build();
+    }
+
+    @Override
+    protected FileStructure doParseInstance(XContentParser parser) {
+        return FileStructure.PARSER.apply(parser, null).build();
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+
+    @Override
+    protected Predicate<String> getRandomFieldsExcludeFilter() {
+        // unknown fields are only guaranteed to be ignored at the top level - below this several data
+        // structures (e.g. mappings, ingest pipeline, field stats) will preserve arbitrary fields
+        return field -> !field.isEmpty();
+    }
+}

+ 53 - 0
docs/java-rest/high-level/ml/find-file-structure.asciidoc

@@ -0,0 +1,53 @@
+--
+:api: find-file-structure
+:request: FindFileStructureRequest
+:response: FindFileStructureResponse
+--
+[id="{upid}-{api}"]
+=== Find File Structure API
+
+The Find File Structure API can be used to find the structure of a text file
+and other information that will be useful to import its contents to an {es}
+index.  It accepts a +{request}+ object and responds
+with a +{response}+ object.
+
+[id="{upid}-{api}-request"]
+==== Find File Structure Request
+
+A sample from the beginning of the file (or the entire file contents if
+it's small) must be added to the +{request}+ object using the
+`FindFileStructureRequest#setSample` method.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-request]
+--------------------------------------------------
+<1> Create a new `FindFileStructureRequest` object
+<2> Add the contents of `anInterestingFile` to the request
+
+==== Optional Arguments
+
+The following arguments are optional.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-request-options]
+--------------------------------------------------
+<1> Set the maximum number of lines to sample (the entire sample will be
+    used if it contains fewer lines)
+<2> Request that an explanation of the analysis be returned in the response
+
+include::../execution.asciidoc[]
+
+[id="{upid}-{api}-response"]
+==== Find File Structure Response
+
+A +{response}+ contains information about the file structure,
+as well as mappings and an ingest pipeline that could be used
+to index the contents into {es}.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-response]
+--------------------------------------------------
+<1> The `FileStructure` object contains the structure information