Browse Source

Inject the `host.name` field mapping only if required for `logsdb` index mode (#114573) (#114916)

Here we check for the existence of a `host.name` field in index sort settings
when the index mode is `logsdb` and decide to inject the field in the mapping
depending on whether it exists or not. By default `host.name` is required for
sorting in LogsDB. This reduces the chances for errors at mapping or template
composition time as a result of injecting the `host.name` field only if strictly
required. A user who wants to override index sort settings without including
a `host.name` field would be able to do so without finding an additional
`host.name` field in the mappings (injected automatically). If users override the
sort settings and a `host.name` field is not included we don't need
to inject such field since sorting does not require it anymore.

As a result of this change we have the following:
* the user does not provide any index sorting configuration: we are responsible for injecting the default sort fields and their mapping (for `logsdb`)
* the user explicitly provides non-empty index sorting configuration: the user is also responsible for providing correct mappings and we do not modify index sorting or mappings

Note also that all sort settings `index.sort.*` are `final` which means doing this
check once, when mappings are merged at template composition time, is enough.

(cherry picked from commit 9bf6e3b0baf4296125f2b8d8ab2726f3a4614e3f)

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Salvatore Campagna 1 year ago
parent
commit
0cab608638

+ 1 - 1
server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java

@@ -1373,7 +1373,7 @@ public class MetadataCreateIndexService {
         MapperService mapperService = indexService.mapperService();
         IndexMode indexMode = indexService.getIndexSettings() != null ? indexService.getIndexSettings().getMode() : IndexMode.STANDARD;
         List<CompressedXContent> allMappings = new ArrayList<>();
-        final CompressedXContent defaultMapping = indexMode.getDefaultMapping();
+        final CompressedXContent defaultMapping = indexMode.getDefaultMapping(indexService.getIndexSettings());
         if (defaultMapping != null) {
             allMappings.add(defaultMapping);
         }

+ 30 - 41
server/src/main/java/org/elasticsearch/index/IndexMode.java

@@ -75,7 +75,7 @@ public enum IndexMode {
         }
 
         @Override
-        public CompressedXContent getDefaultMapping() {
+        public CompressedXContent getDefaultMapping(final IndexSettings indexSettings) {
             return null;
         }
 
@@ -171,7 +171,7 @@ public enum IndexMode {
         }
 
         @Override
-        public CompressedXContent getDefaultMapping() {
+        public CompressedXContent getDefaultMapping(final IndexSettings indexSettings) {
             return DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING;
         }
 
@@ -249,8 +249,10 @@ public enum IndexMode {
         }
 
         @Override
-        public CompressedXContent getDefaultMapping() {
-            return DEFAULT_LOGS_TIMESTAMP_MAPPING;
+        public CompressedXContent getDefaultMapping(final IndexSettings indexSettings) {
+            return indexSettings != null && indexSettings.getIndexSortConfig().hasPrimarySortOnField(HOST_NAME)
+                ? DEFAULT_LOGS_TIMESTAMP_MAPPING_WITH_HOSTNAME
+                : DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING;
         }
 
         @Override
@@ -308,6 +310,8 @@ public enum IndexMode {
         }
     };
 
+    private static final String HOST_NAME = "host.name";
+
     private static void validateTimeSeriesSettings(Map<Setting<?>, Object> settings) {
         settingRequiresTimeSeries(settings, IndexMetadata.INDEX_ROUTING_PATH);
         settingRequiresTimeSeries(settings, IndexSettings.TIME_SERIES_START_TIME);
@@ -324,48 +328,33 @@ public enum IndexMode {
         return "[" + IndexSettings.MODE.getKey() + "=time_series]";
     }
 
-    public static final CompressedXContent DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING;
+    private static CompressedXContent createDefaultMapping(boolean includeHostName) throws IOException {
+        return new CompressedXContent((builder, params) -> {
+            builder.startObject(MapperService.SINGLE_MAPPING_NAME)
+                .startObject(DataStreamTimestampFieldMapper.NAME)
+                .field("enabled", true)
+                .endObject()
+                .startObject("properties")
+                .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH)
+                .field("type", DateFieldMapper.CONTENT_TYPE)
+                .endObject();
+
+            if (includeHostName) {
+                builder.startObject(HOST_NAME).field("type", KeywordFieldMapper.CONTENT_TYPE).field("ignore_above", 1024).endObject();
+            }
 
-    static {
-        try {
-            DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING = new CompressedXContent(
-                ((builder, params) -> builder.startObject(MapperService.SINGLE_MAPPING_NAME)
-                    .startObject(DataStreamTimestampFieldMapper.NAME)
-                    .field("enabled", true)
-                    .endObject()
-                    .startObject("properties")
-                    .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH)
-                    .field("type", DateFieldMapper.CONTENT_TYPE)
-                    .field("ignore_malformed", "false")
-                    .endObject()
-                    .endObject()
-                    .endObject())
-            );
-        } catch (IOException e) {
-            throw new AssertionError(e);
-        }
+            return builder.endObject().endObject();
+        });
     }
 
-    public static final CompressedXContent DEFAULT_LOGS_TIMESTAMP_MAPPING;
+    private static final CompressedXContent DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING;
+
+    private static final CompressedXContent DEFAULT_LOGS_TIMESTAMP_MAPPING_WITH_HOSTNAME;
 
     static {
         try {
-            DEFAULT_LOGS_TIMESTAMP_MAPPING = new CompressedXContent(
-                ((builder, params) -> builder.startObject(MapperService.SINGLE_MAPPING_NAME)
-                    .startObject(DataStreamTimestampFieldMapper.NAME)
-                    .field("enabled", true)
-                    .endObject()
-                    .startObject("properties")
-                    .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH)
-                    .field("type", DateFieldMapper.CONTENT_TYPE)
-                    .endObject()
-                    .startObject("host.name")
-                    .field("type", KeywordFieldMapper.CONTENT_TYPE)
-                    .field("ignore_above", 1024)
-                    .endObject()
-                    .endObject()
-                    .endObject())
-            );
+            DEFAULT_TIME_SERIES_TIMESTAMP_MAPPING = createDefaultMapping(false);
+            DEFAULT_LOGS_TIMESTAMP_MAPPING_WITH_HOSTNAME = createDefaultMapping(true);
         } catch (IOException e) {
             throw new AssertionError(e);
         }
@@ -421,7 +410,7 @@ public enum IndexMode {
      * Get default mapping for this index or {@code null} if there is none.
      */
     @Nullable
-    public abstract CompressedXContent getDefaultMapping();
+    public abstract CompressedXContent getDefaultMapping(IndexSettings indexSettings);
 
     /**
      * Build the {@link FieldMapper} for {@code _id}.

+ 14 - 2
server/src/test/java/org/elasticsearch/index/LogsIndexModeTests.java

@@ -13,14 +13,24 @@ import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.ESTestCase;
 
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
 
 public class LogsIndexModeTests extends ESTestCase {
     public void testLogsIndexModeSetting() {
         assertThat(IndexSettings.MODE.get(buildSettings()), equalTo(IndexMode.LOGSDB));
     }
 
-    public void testSortField() {
+    public void testDefaultHostNameSortField() {
+        final IndexMetadata metadata = IndexSettingsTests.newIndexMeta("test", buildSettings());
+        assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB));
+        final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY);
+        assertThat(settings.getIndexSortConfig().hasPrimarySortOnField("host.name"), equalTo(true));
+        assertThat(IndexMode.LOGSDB.getDefaultMapping(settings).string(), containsString("host.name"));
+    }
+
+    public void testCustomSortField() {
         final Settings sortSettings = Settings.builder()
             .put(buildSettings())
             .put(IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey(), "agent_id")
@@ -29,7 +39,9 @@ public class LogsIndexModeTests extends ESTestCase {
         assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB));
         final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY);
         assertThat(settings.getMode(), equalTo(IndexMode.LOGSDB));
-        assertThat("agent_id", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey())));
+        assertThat(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey()), equalTo("agent_id"));
+        assertThat(settings.getIndexSortConfig().hasPrimarySortOnField("host.name"), equalTo(false));
+        assertThat(IndexMode.LOGSDB.getDefaultMapping(settings).string(), not(containsString("host")));
     }
 
     public void testSortMode() {

+ 6 - 2
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java

@@ -302,8 +302,12 @@ public abstract class MapperServiceTestCase extends FieldTypeTestCase {
                 mapperMetrics
             );
 
-            if (applyDefaultMapping && indexSettings.getMode().getDefaultMapping() != null) {
-                mapperService.merge(null, indexSettings.getMode().getDefaultMapping(), MapperService.MergeReason.MAPPING_UPDATE);
+            if (applyDefaultMapping && indexSettings.getMode().getDefaultMapping(indexSettings) != null) {
+                mapperService.merge(
+                    null,
+                    indexSettings.getMode().getDefaultMapping(indexSettings),
+                    MapperService.MergeReason.MAPPING_UPDATE
+                );
             }
 
             return mapperService;

+ 781 - 0
x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/30_logsdb_default_mapping.yml

@@ -280,3 +280,784 @@ create logsdb data stream with timestamp object mapping:
 
   - match: { error.type: "illegal_argument_exception" }
   - match: { error.reason: "composable template [logsdb-index-template] template after composition with component templates [logsdb-mappings] is invalid" }
+
+---
+create logsdb data stream with custom sorting without host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-prod ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ agent.id ]
+                sort.order: [ desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                agent.id:
+                  type: keyword
+                host.hostname:
+                  type: keyword
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-http-prod
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-http-prod
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.agent.properties.id.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.hostname.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: null }
+
+---
+create logsdb data stream with custom sorting and host object:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-nginx-prod ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ host.hostname, host.region ]
+                sort.order: [ desc, desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host:
+                  type: object
+                  properties:
+                    ip:
+                      type: ip
+                    hostname:
+                      type: keyword
+                    region:
+                      type: keyword
+                    name:
+                      type: integer
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-nginx-prod
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-nginx-prod
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.ip.type: ip }
+  - match: { .$backing_index.mappings.properties.host.properties.hostname.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.region.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: integer } # Overrides LogsDB injected
+
+---
+create logsdb data stream with custom sorting and dynamically mapped host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-kafka-qa ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ "agent.id", "@timestamp" ]
+                sort.order: [ desc, asc ]
+                mode: logsdb
+            mappings:
+              properties:
+                agent:
+                  type: object
+                  properties:
+                    name:
+                      type: keyword
+                    id:
+                      type: keyword
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-kafka-qa
+  - is_true: acknowledged
+
+  - do:
+      bulk:
+        index: logs-kafka-qa
+        refresh: true
+        body:
+          - { "create": { } }
+          - { "@timestamp": "2022-01-01T00:00:00", agent.name: "foo", agent.id: "foo-568", host: { id: "db8fdcf1-b1e2-444b-8c6a-0466c61dcce4" } }
+          - { "create": { } }
+          - { "@timestamp": "2022-01-01T00:01:00", agent.name: "bar", agent.id: "foo-309", host: { id: "35e1ed10-961e-46c7-83ea-4109c913a1d6" } }
+
+  - do:
+      indices.get_data_stream:
+        name: logs-kafka-qa
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.agent.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.agent.properties.id.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name: null }
+  - match: { .$backing_index.mappings.properties.host.properties.id.type: text }
+
+---
+create logsdb data stream with custom sorting and host.name object:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-nginx-qa ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ "host.name.value", "@timestamp" ]
+                sort.order: [ desc, desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host:
+                  type: object
+                  properties:
+                    name:
+                      type: object
+                      properties:
+                        value:
+                          type: keyword
+                        alias:
+                          type: keyword
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-nginx-qa
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-nginx-qa
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.name.properties.value.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.properties.alias.type: keyword }
+
+---
+create logsdb data stream with default sorting on malformed host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-win-prod ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                mode: logsdb
+            mappings:
+              properties:
+                agent:
+                  type: object
+                  properties:
+                    name:
+                      type: keyword
+                    id:
+                      type: keyword
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-win-prod
+  - is_true: acknowledged
+
+  - do:
+      bulk:
+        index: logs-win-prod
+        refresh: true
+        body:
+          - { "create": { } }
+          - { "@timestamp": "2022-01-01T00:00:00", agent.name: "foo", agent.id: "foo-568", host: { name: 192.168.10.12, id: "e70e91cd-bb3f-43f0-909c-2748e7fdfd54" } }
+          - { "create": { } }
+          - { "@timestamp": "2022-01-01T00:01:00", agent.name: "bar", agent.id: "foo-309", host: { name: 192.168.15.17, id: "ad2e3edb-2c4b-4f12-83dd-255691ed614c" } }
+
+  - do:
+      indices.get_data_stream:
+        name: logs-win-prod
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.agent.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.agent.properties.id.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: keyword } # LogsDB injected
+  - match: { .$backing_index.mappings.properties.host.properties.name.ignore_above: 1024 } # LogsDB injected
+  - match: { .$backing_index.mappings.properties.host.properties.id.type: text }
+
+---
+create logsdb data stream with custom sorting and host.name date field:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-prod ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ host.name, host.hostname ]
+                sort.order: [ desc, desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host:
+                  type: object
+                  properties:
+                    hostname:
+                      type: keyword
+                    name:
+                      type: date
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-http-prod
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-http-prod
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.hostname.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: date }
+
+---
+create logsdb data stream with custom sorting and missing host.name field mapping:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-qa ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ host.name, host.hostname ]
+                sort.order: [ desc, desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host:
+                  type: object
+                  properties:
+                    hostname:
+                      type: keyword
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-http-qa
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-http-qa
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.hostname.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.ignore_above: 1024 }
+
+---
+create logsdb data stream with custom sorting and host.name field without doc values:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-dev ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ "host.name", "@timestamp" ]
+                sort.order: [ desc, desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host:
+                  type: object
+                  properties:
+                    name:
+                      type: keyword
+                      doc_values: false
+
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      catch: bad_request
+      indices.create_data_stream:
+        name: logs-http-dev
+
+  - match: { error.type: "illegal_argument_exception" }
+  - match: { error.reason: "docvalues not found for index sort field:[host.name]" }
+
+---
+create logsdb data stream with incompatible ignore_above on host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logsdb-index-template-ignore-above] has index patterns [logsdb-ignore-above] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-index-template-ignore-above
+        body:
+          index_patterns: [ logsdb-ignore-above ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ host.name ]
+                sort.order: [ desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: keyword
+                  ignore_above: 128
+          data_stream: {}
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logsdb-ignore-above
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logsdb-ignore-above
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.ignore_above: 128 }
+
+---
+create logsdb data stream with no sorting and host.name as text:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logsdb-index-template-non-keyword] has index patterns [logsdb-non-keyword] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-index-template-non-keyword
+        body:
+          index_patterns: [ logsdb-non-keyword ]
+          priority: 10000
+          template:
+            settings:
+              mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: text
+          data_stream: {}
+  - is_true: acknowledged
+
+  - do:
+      catch: bad_request
+      indices.create_data_stream:
+        name: logsdb-non-keyword
+
+  - match: { error.type: "illegal_argument_exception" }
+  - match: { error.reason: "docvalues not found for index sort field:[host.name]" }
+
+---
+create logsdb data stream without index sorting and ignore_above on host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logsdb-index-template-ignore-above-override] has index patterns [logsdb-ignore-above-override] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-index-template-ignore-above-override
+        body:
+          index_patterns: [ logsdb-ignore-above-override ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: keyword
+                  ignore_above: 128
+          data_stream: {}
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logsdb-ignore-above-override
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logsdb-ignore-above-override
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.ignore_above: 128 }
+
+---
+create logsdb data stream with host.name as alias and sorting on it:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logsdb-index-template-alias] has index patterns [logsdb-alias] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-index-template-alias
+        body:
+          index_patterns: [ logsdb-alias ]
+          template:
+            settings:
+              index:
+                sort.field: [ host.name ]
+                sort.order: [ desc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: alias
+                  path: host.hostname
+                host.hostname:
+                  type:
+                    keyword
+          data_stream: {}
+  - do:
+      catch: bad_request
+      indices.create_data_stream:
+        name: logsdb-alias
+
+  - match: { error.type: "illegal_argument_exception" }
+  - match: { error.reason: "Cannot use alias [host.name] as an index sort field" }
+
+---
+create logsdb data stream with multi-fields on host.name:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logsdb-index-template-multi-fields] has index patterns [logsdb-multi-fields] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-index-template-multi-fields
+        body:
+          index_patterns: [ logsdb-multi-fields ]
+          template:
+            settings:
+              index:
+                sort.field: [ host.name.keyword ]
+                sort.order: [ asc ]
+                mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: "text"
+                  fields:
+                    keyword:
+                      type: "keyword"
+          data_stream: {}
+
+  - do:
+      indices.create_data_stream:
+        name: logsdb-multi-fields
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logsdb-multi-fields
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.name.fields.keyword.type: keyword }
+
+---
+create logsdb data stream with multi-fields on host.name and no sorting:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [ logsdb-no-sort-multi-fields-template ] has index patterns [logsdb-no-sort-multi-fields] matching patterns from existing older templates [global]"
+      indices.put_index_template:
+        name: logsdb-no-sort-multi-fields-template
+        body:
+          index_patterns: [ logsdb-no-sort-multi-fields ]
+          template:
+            settings:
+              mode: logsdb
+            mappings:
+              properties:
+                host.name:
+                  type: text
+                  fields:
+                    keyword:
+                      type: keyword
+          data_stream: {}
+
+  - do:
+      catch: bad_request
+      indices.create_data_stream:
+        name: logsdb-no-sort-multi-fields
+
+  - match: { error.type: "illegal_argument_exception" }
+  - match: { error.reason: "docvalues not found for index sort field:[host.name]" }
+
+---
+create logsdb data stream with custom empty sorting:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-empty ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ ]
+                sort.order: [ ]
+                mode: logsdb
+            mappings:
+              properties:
+                hostname:
+                  type: keyword
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-http-empty
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-http-empty
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }
+  - match: { .$backing_index.mappings.properties.host.properties.name.type: keyword }
+  - match: { .$backing_index.mappings.properties.host.properties.name.ignore_above: 1024 }
+
+---
+create logsdb data stream with custom sorting on timestamp:
+  - skip:
+      features: [ "allowed_warnings" ]
+  - requires:
+      cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
+      reason: support for normalizer on keyword fields
+
+  - do:
+      allowed_warnings:
+        - "index template [logs-template] has index patterns [logs-*-*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [logs-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: logs-template
+        body:
+          index_patterns: [ logs-http-dev ]
+          priority: 10000
+          template:
+            settings:
+              index:
+                sort.field: [ "@timestamp" ]
+                sort.order: [ "asc" ]
+                mode: logsdb
+            mappings:
+              properties:
+                hostname:
+                  type: keyword
+          data_stream: { }
+  - is_true: acknowledged
+
+  - do:
+      indices.create_data_stream:
+        name: logs-http-dev
+  - is_true: acknowledged
+
+  - do:
+      indices.get_data_stream:
+        name: logs-http-dev
+
+  - set: { data_streams.0.indices.0.index_name: backing_index }
+  - do:
+      indices.get_mapping:
+        index: $backing_index
+
+  - match: { .$backing_index.mappings.properties.@timestamp.type: date }

+ 24 - 0
x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml

@@ -163,3 +163,27 @@ Structured log body:
           fields: ["event.dataset"]
   - length: { hits.hits: 1 }
   - match: { hits.hits.0.fields.event\.dataset: ["generic.otel"] }
+---
+host.name pass-through:
+  - do:
+      bulk:
+        index: logs-generic.otel-default
+        refresh: true
+        body:
+          - create: {}
+          - "@timestamp": 2024-07-18T14:48:33.467654000Z
+            resource:
+              attributes:
+                host.name: localhost
+  - is_false: errors
+  - do:
+      search:
+        index: logs-generic.otel-default
+        body:
+          query:
+            term:
+              host.name: localhost
+          fields: [ "*" ]
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0.fields.resource\.attributes\.host\.name: [ "localhost" ] }
+  - match: { hits.hits.0.fields.host\.name: [ "localhost" ] }