Sfoglia il codice sorgente

A random-random test for time-series data (#132556)

* First test case in prototype messy test file

* [CI] Auto commit changes from spotless

* First two randomized test cases

* smol cleanup

* [CI] Auto commit changes from spotless

* cleanup and ready for first check

* Address comments

* addressing comments

* more addressing comments

* include values check that guarantees avg,count are off

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Pablo 2 mesi fa
parent
commit
84ba58332c

+ 4 - 1
test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java

@@ -51,6 +51,7 @@ public enum FieldType {
     TEXT("text"),
     IP("ip"),
     CONSTANT_KEYWORD("constant_keyword"),
+    PASSTHROUGH("passthrough"), // For now this field type does not have default generators.
     WILDCARD("wildcard"),
     MATCH_ONLY_TEXT("match_only_text");
 
@@ -81,6 +82,7 @@ public enum FieldType {
             case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator();
             case WILDCARD -> new WildcardFieldDataGenerator(dataSource);
             case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource);
+            case PASSTHROUGH -> throw new IllegalArgumentException("Passthrough field type does not have a default generator");
         };
     }
 
@@ -104,8 +106,9 @@ public enum FieldType {
             case "ip" -> FieldType.IP;
             case "constant_keyword" -> FieldType.CONSTANT_KEYWORD;
             case "wildcard" -> FieldType.WILDCARD;
+            case "passthrough" -> FieldType.PASSTHROUGH;
             case "match_only_text" -> FieldType.MATCH_ONLY_TEXT;
-            default -> null;
+            default -> throw new IllegalArgumentException("Unknown field type: " + name);
         };
     }
 

+ 0 - 1
test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java

@@ -67,7 +67,6 @@ public class MappingGenerator {
         if (specification.fullyDynamicMapping()) {
             // Has to be "true" for fully dynamic mapping
             topLevelMappingParameters.remove("dynamic");
-
             return new Mapping(rawMapping, lookup);
         }
 

+ 7 - 2
test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSource.java

@@ -46,7 +46,12 @@ public class DataSource {
                 return response;
             }
         }
-
-        throw new IllegalStateException("Request is not supported by data source");
+        throw new IllegalStateException(
+            "Request is not supported by data source. Request: "
+                + request.toString()
+                + "\n"
+                + "Available handlers: "
+                + handlers.stream().map(Object::getClass).map(Class::getName).toList().toString()
+        );
     }
 }

+ 1 - 0
test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java

@@ -49,6 +49,7 @@ public class DefaultMappingParametersHandler implements DataSourceHandler {
             case CONSTANT_KEYWORD -> constantKeywordMapping();
             case WILDCARD -> wildcardMapping();
             case MATCH_ONLY_TEXT -> matchOnlyTextMapping();
+            case PASSTHROUGH -> throw new IllegalArgumentException("Unsupported field type: " + fieldType);
         });
     }
 

+ 10 - 4
test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java

@@ -12,8 +12,10 @@ package org.elasticsearch.datageneration.datasource;
 import org.elasticsearch.datageneration.FieldType;
 import org.elasticsearch.test.ESTestCase;
 
+import java.util.Arrays;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import static org.elasticsearch.test.ESTestCase.randomDouble;
 import static org.elasticsearch.test.ESTestCase.randomIntBetween;
@@ -77,13 +79,17 @@ public class DefaultObjectGenerationHandler implements DataSourceHandler {
 
     // UNSIGNED_LONG is excluded because it is mapped as long
     // and values larger than long fail to parse.
-    private static final Set<FieldType> EXCLUDED_FROM_DYNAMIC_MAPPING = Set.of(FieldType.UNSIGNED_LONG);
+    private static final Set<FieldType> EXCLUDED_FROM_DYNAMIC_MAPPING = Set.of(FieldType.UNSIGNED_LONG, FieldType.PASSTHROUGH);
+    private static final Set<FieldType> ALLOWED_FIELD_TYPES = Arrays.stream(FieldType.values())
+        .filter(fieldType -> EXCLUDED_FROM_DYNAMIC_MAPPING.contains(fieldType) == false)
+        .collect(Collectors.toSet());
 
     @Override
     public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
-        return new DataSourceResponse.FieldTypeGenerator(
-            () -> new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(ESTestCase.randomFrom(FieldType.values()).toString())
-        );
+        return new DataSourceResponse.FieldTypeGenerator(() -> {
+            var fieldType = ESTestCase.randomFrom(ALLOWED_FIELD_TYPES);
+            return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(fieldType.toString());
+        });
     }
 
     @Override

+ 7 - 1
test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGenerationTests.java

@@ -31,6 +31,7 @@ import org.elasticsearch.xpack.unsignedlong.UnsignedLongMapperPlugin;
 import org.elasticsearch.xpack.wildcard.Wildcard;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.Optional;
@@ -94,11 +95,16 @@ public class DataGenerationTests extends ESTestCase {
                 return testChildFieldGenerator;
             }
 
+            private static final FieldType[] SUPPORTED_FIELD_TYPES = Arrays.asList(FieldType.values())
+                .stream()
+                .filter(fieldType -> fieldType != FieldType.PASSTHROUGH)
+                .toArray(FieldType[]::new);
+
             @Override
             public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
                 return new DataSourceResponse.FieldTypeGenerator(
                     () -> new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(
-                        FieldType.values()[generatedFields++ % FieldType.values().length].toString()
+                        SUPPORTED_FIELD_TYPES[generatedFields++ % SUPPORTED_FIELD_TYPES.length].toString()
                     )
                 );
 

+ 2 - 0
x-pack/plugin/esql/build.gradle

@@ -35,6 +35,8 @@ dependencies {
   compileOnly project(':modules:lang-painless:spi')
   compileOnly project(xpackModule('esql-core'))
   compileOnly project(xpackModule('ml'))
+  compileOnly project(path: xpackModule('mapper-aggregate-metric'))
+  compileOnly project(path: xpackModule('downsample'))
   implementation project(xpackModule('kql'))
   implementation project('compute')
   implementation project('compute:ann')

+ 278 - 0
x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/RandomizedTimeSeriesIT.java

@@ -0,0 +1,278 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.action;
+
+import org.elasticsearch.Build;
+import org.elasticsearch.action.DocWriteRequest;
+import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction;
+import org.elasticsearch.cluster.metadata.ComposableIndexTemplate;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.datastreams.DataStreamsPlugin;
+import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.aggregatemetric.AggregateMetricMapperPlugin;
+import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
+import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+
+@SuppressWarnings("unchecked")
+public class RandomizedTimeSeriesIT extends AbstractEsqlIntegTestCase {
+
+    private static final Long NUM_DOCS = 2000L;
+    private static final String DATASTREAM_NAME = "tsit_ds";
+    private List<XContentBuilder> documents = null;
+    private TSDataGenerationHelper dataGenerationHelper;
+
+    List<List<Object>> consumeRows(EsqlQueryResponse resp) {
+        List<List<Object>> rows = new ArrayList<>();
+        resp.rows().forEach(rowIter -> {
+            List<Object> row = new ArrayList<>();
+            rowIter.forEach(row::add);
+            rows.add(row);
+        });
+        return rows;
+    }
+
+    Map<List<String>, List<Map<String, Object>>> groupedRows(
+        List<XContentBuilder> docs,
+        List<String> groupingAttributes,
+        int secondsInWindow
+    ) {
+        Map<List<String>, List<Map<String, Object>>> groupedMap = new HashMap<>();
+        for (XContentBuilder doc : docs) {
+            Map<String, Object> docMap = XContentHelper.convertToMap(BytesReference.bytes(doc), false, XContentType.JSON).v2();
+            @SuppressWarnings("unchecked")
+            List<String> groupingPairs = groupingAttributes.stream()
+                .map(
+                    attr -> Tuple.tuple(
+                        attr,
+                        ((Map<String, Object>) docMap.getOrDefault("attributes", Map.of())).getOrDefault(attr, "").toString()
+                    )
+                )
+                .filter(val -> val.v2().isEmpty() == false) // Filter out empty values
+                .map(tup -> tup.v1() + ":" + tup.v2())
+                .toList();
+            long timeBucketStart = windowStart(docMap.get("@timestamp"), secondsInWindow);
+            var keyList = new ArrayList<>(groupingPairs);
+            keyList.add(Long.toString(timeBucketStart));
+            groupedMap.computeIfAbsent(keyList, k -> new ArrayList<>()).add(docMap);
+        }
+        return groupedMap;
+    }
+
+    static Long windowStart(Object timestampCell, int secondsInWindow) {
+        // This calculation looks a little weird, but it simply performs an integer division that
+        // throws away the remainder of the division by secondsInWindow. It rounds down
+        // the timestamp to the nearest multiple of secondsInWindow.
+        var timestampSeconds = Instant.parse((String) timestampCell).toEpochMilli() / 1000;
+        return (timestampSeconds / secondsInWindow) * secondsInWindow;
+    }
+
+    enum Agg {
+        MAX,
+        MIN,
+        AVG,
+        SUM
+    }
+
+    static List<Integer> valuesInWindow(List<Map<String, Object>> pointsInGroup, String metricName) {
+        @SuppressWarnings("unchecked")
+        var values = pointsInGroup.stream()
+            .map(doc -> ((Map<String, Integer>) doc.get("metrics")).get(metricName))
+            .filter(Objects::nonNull)
+            .collect(Collectors.toList());
+        return values;
+    }
+
+    static Double aggregateValuesInWindow(List<Integer> values, Agg agg) {
+        if (values.isEmpty()) {
+            throw new IllegalArgumentException("No values to aggregate for " + agg + " operation");
+        }
+        return switch (agg) {
+            case MAX -> Double.valueOf(values.stream().max(Integer::compareTo).orElseThrow());
+            case MIN -> Double.valueOf(values.stream().min(Integer::compareTo).orElseThrow());
+            case AVG -> values.stream().mapToDouble(Integer::doubleValue).average().orElseThrow();
+            case SUM -> values.stream().mapToDouble(Integer::doubleValue).sum();
+        };
+    }
+
+    static List<String> getRowKey(List<Object> row, List<String> groupingAttributes, int timestampIndex) {
+        List<String> rowKey = new ArrayList<>();
+        for (int i = 0; i < groupingAttributes.size(); i++) {
+            Object value = row.get(i + timestampIndex + 1);
+            if (value != null) {
+                rowKey.add(groupingAttributes.get(i) + ":" + value);
+            }
+        }
+        rowKey.add(Long.toString(Instant.parse((String) row.get(timestampIndex)).toEpochMilli() / 1000));
+        return rowKey;
+    }
+
+    @Override
+    public EsqlQueryResponse run(EsqlQueryRequest request) {
+        assumeTrue("time series available in snapshot builds only", Build.current().isSnapshot());
+        return super.run(request);
+    }
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return List.of(DataStreamsPlugin.class, LocalStateCompositeXPackPlugin.class, AggregateMetricMapperPlugin.class, EsqlPlugin.class);
+    }
+
+    void putTSDBIndexTemplate(List<String> patterns, @Nullable String mappingString) throws IOException {
+        Settings.Builder settingsBuilder = Settings.builder();
+        // Ensure it will be a TSDB data stream
+        settingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES);
+        CompressedXContent mappings = mappingString == null ? null : CompressedXContent.fromJSON(mappingString);
+        TransportPutComposableIndexTemplateAction.Request request = new TransportPutComposableIndexTemplateAction.Request(
+            RandomizedTimeSeriesIT.DATASTREAM_NAME
+        );
+        request.indexTemplate(
+            ComposableIndexTemplate.builder()
+                .indexPatterns(patterns)
+                .template(org.elasticsearch.cluster.metadata.Template.builder().settings(settingsBuilder).mappings(mappings))
+                .metadata(null)
+                .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate())
+                .build()
+        );
+        assertAcked(client().execute(TransportPutComposableIndexTemplateAction.TYPE, request));
+    }
+
+    @Before
+    public void populateIndex() throws IOException {
+        dataGenerationHelper = new TSDataGenerationHelper(NUM_DOCS);
+        final XContentBuilder builder = XContentFactory.jsonBuilder();
+        builder.map(dataGenerationHelper.mapping.raw());
+        final String jsonMappings = Strings.toString(builder);
+
+        putTSDBIndexTemplate(List.of(DATASTREAM_NAME + "*"), jsonMappings);
+        // Now we can push data into the data stream.
+        for (int i = 0; i < NUM_DOCS; i++) {
+            var document = dataGenerationHelper.generateDocument(Map.of());
+            if (documents == null) {
+                documents = new ArrayList<>();
+            }
+            documents.add(document);
+            var indexRequest = client().prepareIndex(DATASTREAM_NAME).setOpType(DocWriteRequest.OpType.CREATE).setSource(document);
+            indexRequest.setRefreshPolicy(org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE);
+            indexRequest.get();
+        }
+    }
+
+    /**
+     * This test validates Gauge metrics aggregation with grouping by time bucket and a subset of dimensions.
+     * The subset of dimensions is a random subset of the dimensions present in the data.
+     * The test checks that the max, min, and avg values of the gauge metric - and calculates
+     * the same values from the documents in the group.
+     */
+    public void testGroupBySubset() {
+        var dimensions = ESTestCase.randomNonEmptySubsetOf(dataGenerationHelper.attributesForMetrics);
+        var dimensionsStr = dimensions.stream().map(d -> "attributes." + d).collect(Collectors.joining(", "));
+        try (EsqlQueryResponse resp = run(String.format(Locale.ROOT, """
+            TS %s
+            | STATS
+                values(metrics.gauge_hdd.bytes.used),
+                max(max_over_time(metrics.gauge_hdd.bytes.used)),
+                min(min_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(count_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(sum_over_time(metrics.gauge_hdd.bytes.used)),
+                avg(avg_over_time(metrics.gauge_hdd.bytes.used))
+                BY tbucket=bucket(@timestamp, 1 minute), %s
+            | SORT tbucket
+            | LIMIT 1000""", DATASTREAM_NAME, dimensionsStr))) {
+            var groups = groupedRows(documents, dimensions, 60);
+            List<List<Object>> rows = consumeRows(resp);
+            for (List<Object> row : rows) {
+                var rowKey = getRowKey(row, dimensions, 6);
+                var docValues = valuesInWindow(groups.get(rowKey), "gauge_hdd.bytes.used");
+                if (row.get(0) instanceof List) {
+                    assertThat(
+                        (Collection<Long>) row.get(0),
+                        containsInAnyOrder(docValues.stream().mapToLong(Integer::longValue).boxed().toArray(Long[]::new))
+                    );
+                } else {
+                    assertThat(row.get(0), equalTo(docValues.getFirst().longValue()));
+                }
+                assertThat(row.get(1), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MAX))));
+                assertThat(row.get(2), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MIN))));
+                assertThat(row.get(3), equalTo((long) docValues.size()));
+                assertThat(row.get(4), equalTo(aggregateValuesInWindow(docValues, Agg.SUM).longValue()));
+                // TODO: fix then enable
+                // assertThat(row.get(5), equalTo(aggregateValuesInWindow(docValues, Agg.SUM) / (double) docValues.size()));
+            }
+        }
+    }
+
+    /**
+     * This test validates Gauge metrics aggregation with grouping by time bucket only.
+     * The test checks that the max, min, and avg values of the gauge metric - and calculates
+     * the same values from the documents in the group. Because there is no grouping by dimensions,
+     * there is only one metric group per time bucket.
+     */
+    public void testGroupByNothing() {
+        try (EsqlQueryResponse resp = run(String.format(Locale.ROOT, """
+            TS %s
+            | STATS
+                values(metrics.gauge_hdd.bytes.used),
+                max(max_over_time(metrics.gauge_hdd.bytes.used)),
+                min(min_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(count_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(sum_over_time(metrics.gauge_hdd.bytes.used)),
+                avg(avg_over_time(metrics.gauge_hdd.bytes.used))
+                BY tbucket=bucket(@timestamp, 1 minute)
+            | SORT tbucket
+            | LIMIT 1000""", DATASTREAM_NAME))) {
+            List<List<Object>> rows = consumeRows(resp);
+            var groups = groupedRows(documents, List.of(), 60);
+            for (List<Object> row : rows) {
+                var windowStart = windowStart(row.get(6), 60);
+                List<Integer> docValues = valuesInWindow(groups.get(List.of(Long.toString(windowStart))), "gauge_hdd.bytes.used");
+                if (row.get(0) instanceof List) {
+                    assertThat(
+                        (Collection<Long>) row.get(0),
+                        containsInAnyOrder(docValues.stream().mapToLong(Integer::longValue).boxed().toArray(Long[]::new))
+                    );
+                } else {
+                    assertThat(row.get(0), equalTo(docValues.getFirst().longValue()));
+                }
+                assertThat(row.get(1), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MAX))));
+                assertThat(row.get(2), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MIN))));
+                assertThat(row.get(3), equalTo((long) docValues.size()));
+                assertThat(row.get(4), equalTo(aggregateValuesInWindow(docValues, Agg.SUM).longValue()));
+                // TODO: fix then enable
+                // assertThat(row.get(5), equalTo(aggregateValuesInWindow(docValues, Agg.SUM) / (double) docValues.size()));
+            }
+        }
+    }
+}

+ 136 - 0
x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/TSDataGenerationHelper.java

@@ -0,0 +1,136 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.action;
+
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.network.NetworkAddress;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.datageneration.DataGeneratorSpecification;
+import org.elasticsearch.datageneration.DocumentGenerator;
+import org.elasticsearch.datageneration.FieldType;
+import org.elasticsearch.datageneration.Mapping;
+import org.elasticsearch.datageneration.MappingGenerator;
+import org.elasticsearch.datageneration.Template;
+import org.elasticsearch.datageneration.TemplateGenerator;
+import org.elasticsearch.datageneration.fields.PredefinedField;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+
+import java.io.IOException;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+class TSDataGenerationHelper {
+
+    private static Object randomDimensionValue(String dimensionName) {
+        // We use dimensionName to determine the type of the value.
+        var isNumeric = dimensionName.hashCode() % 5 == 0;
+        var isIP = dimensionName.hashCode() % 5 == 1;
+        if (isNumeric) {
+            // Numeric values are sometimes passed as integers and sometimes as strings.
+            return ESTestCase.randomIntBetween(1, 1000);
+        } else if (isIP) {
+            // TODO: Make sure the schema ingests this as an IP address.
+            return NetworkAddress.format(ESTestCase.randomIp(ESTestCase.randomBoolean()));
+        } else {
+            return ESTestCase.randomAlphaOfLengthBetween(1, 20);
+        }
+    }
+
+    TSDataGenerationHelper(long numDocs) {
+        // Metrics coming into our system have a pre-set group of attributes.
+        // Making a list-to-set-to-list to ensure uniqueness.
+        this.numDocs = numDocs;
+        var maxAttributes = (int) Math.sqrt(numDocs);
+        attributesForMetrics = List.copyOf(
+            Set.copyOf(ESTestCase.randomList(1, maxAttributes, () -> ESTestCase.randomAlphaOfLengthBetween(2, 30)))
+        );
+        var maxTimeSeries = (int) Math.sqrt(numDocs);
+        var minTimeSeries = Math.max(1, maxTimeSeries / 4);
+        numTimeSeries = ESTestCase.randomIntBetween(minTimeSeries, maxTimeSeries);
+        // allTimeSeries contains the list of dimension-values for each time series.
+        List<List<Tuple<String, Object>>> allTimeSeries = IntStream.range(0, numTimeSeries).mapToObj(tsIdx -> {
+            List<String> dimensionsInMetric = ESTestCase.randomNonEmptySubsetOf(attributesForMetrics);
+            // TODO: How do we handle the case when there are no dimensions? (i.e. regular randomSubsetof(...)
+            return dimensionsInMetric.stream().map(attr -> new Tuple<>(attr, randomDimensionValue(attr))).collect(Collectors.toList());
+        }).toList();
+
+        spec = DataGeneratorSpecification.builder()
+            .withMaxFieldCountPerLevel(0)
+            .withPredefinedFields(
+                List.of(
+                    new PredefinedField.WithGenerator(
+                        "@timestamp",
+                        FieldType.DATE,
+                        Map.of("type", "date"),
+                        fieldMapping -> ESTestCase.randomInstantBetween(Instant.now().minusSeconds(2 * 60 * 60), Instant.now())
+                    ),
+                    new PredefinedField.WithGenerator(
+                        "attributes",
+                        FieldType.PASSTHROUGH,
+                        Map.of("type", "passthrough", "time_series_dimension", true, "dynamic", true, "priority", 1),
+                        (ignored) -> {
+                            var tsDimensions = ESTestCase.randomFrom(allTimeSeries);
+                            return tsDimensions.stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2));
+                        }
+                    ),
+                    new PredefinedField.WithGenerator(
+                        "metrics",
+                        FieldType.PASSTHROUGH,
+                        Map.of("type", "passthrough", "dynamic", true, "priority", 10),
+                        (ignored) -> Map.of("gauge_hdd.bytes.used", Randomness.get().nextLong(0, 1000000000L))
+                    )
+                )
+            )
+            .build();
+
+        documentGenerator = new DocumentGenerator(spec);
+        template = new TemplateGenerator(spec).generate();
+        mapping = new MappingGenerator(spec).generate(template);
+        var doc = mapping.raw().get("_doc");
+        @SuppressWarnings("unchecked")
+        Map<String, Object> docMap = ((Map<String, Object>) doc);
+        // Add dynamic templates to the mapping
+        docMap.put(
+            "dynamic_templates",
+            List.of(
+                Map.of(
+                    "counter_long",
+                    Map.of("path_match", "metrics.counter_*", "mapping", Map.of("type", "long", "time_series_metric", "counter"))
+                ),
+                Map.of(
+                    "gauge_long",
+                    Map.of("path_match", "metrics.gauge_*", "mapping", Map.of("type", "long", "time_series_metric", "gauge"))
+                )
+                // TODO: Add double and other metric types
+            )
+        );
+    }
+
+    final DataGeneratorSpecification spec;
+    final DocumentGenerator documentGenerator;
+    final Template template;
+    final Mapping mapping;
+    final int numTimeSeries;
+    final long numDocs;
+    final List<String> attributesForMetrics;
+
+    XContentBuilder generateDocument(Map<String, Object> additionalFields) throws IOException {
+        var doc = XContentFactory.jsonBuilder();
+        var generated = documentGenerator.generate(template, mapping);
+        generated.putAll(additionalFields);
+
+        doc.map(generated);
+        return doc;
+    }
+}

+ 4 - 1
x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java

@@ -89,7 +89,10 @@ public class DataGenerationHelper {
                         @Override
                         public DataSourceResponse.FieldTypeGenerator.FieldTypeInfo get() {
                             // Base set of field types
-                            var options = Arrays.stream(FieldType.values()).map(FieldType::toString).collect(Collectors.toSet());
+                            var options = Arrays.stream(FieldType.values())
+                                .filter(ft -> ft != FieldType.PASSTHROUGH)
+                                .map(FieldType::toString)
+                                .collect(Collectors.toSet());
                             // Custom types coming from specific functionality modules
 
                             if (shapesGenerated < 5) {