Browse Source

Better out-of-the-box mappings for logs, metrics and synthetics (#64978)

One of the problems we have today with the default templates is that ip addresses and message fields are not mapped correct. Auto detection of ip addresses would be great: https://github.com/elastic/elasticsearch/issues/64400 But in the meantime, we could also match on the naming convention that all `*.ip` fields are of type ip address.
Nicolas Ruflin 4 years ago
parent
commit
2bd1b33d6a

+ 67 - 0
x-pack/plugin/core/src/main/resources/data-streams-mappings.json

@@ -0,0 +1,67 @@
+{
+  "template": {
+    "mappings": {
+      "dynamic_templates": [
+        {
+          "match_ip": {
+            "match_mapping_type": "string",
+            "match": "ip",
+            "mapping": {
+              "type": "ip"
+            }
+          }
+        },
+        {
+          "match_message": {
+            "match_mapping_type": "string",
+            "match": "message",
+            "mapping": {
+              "type": "match_only_text"
+            }
+          }
+        },
+        {
+          "strings_as_keyword": {
+            "mapping": {
+              "ignore_above": 1024,
+              "type": "keyword"
+            },
+            "match_mapping_type": "string"
+          }
+        }
+      ],
+      "date_detection": false,
+      "properties": {
+        "@timestamp": {
+          "type": "date"
+        },
+        "data_stream": {
+          "properties": {
+            "dataset": {
+              "type": "constant_keyword"
+            },
+            "namespace": {
+              "type": "constant_keyword"
+            }
+          }
+        },
+        "ecs": {
+          "properties": {
+            "version": {
+              "ignore_above": 1024,
+              "type": "keyword"
+            }
+          }
+        },
+        "host": {
+          "type": "object"
+        }
+      }
+    }
+  },
+  "_meta": {
+    "description": "general mapping conventions for data streams",
+    "managed": true
+  },
+  "version": ${xpack.stack.template.version}
+}

+ 0 - 39
x-pack/plugin/core/src/main/resources/logs-mappings.json

@@ -1,53 +1,14 @@
 {
   "template": {
     "mappings": {
-      "dynamic_templates": [
-        {
-          "strings_as_keyword": {
-            "mapping": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            },
-            "match_mapping_type": "string"
-          }
-        }
-      ],
-      "date_detection": false,
       "properties": {
-        "@timestamp": {
-          "type": "date"
-        },
         "data_stream": {
           "properties": {
             "type": {
               "type": "constant_keyword",
               "value": "logs"
-            },
-            "dataset": {
-              "type": "constant_keyword"
-            },
-            "namespace": {
-              "type": "constant_keyword"
-            }
-          }
-        },
-        "ecs": {
-          "properties": {
-            "version": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            }
-          }
-        },
-        "host": {
-          "properties": {
-            "ip": {
-              "type": "ip"
             }
           }
-        },
-        "message": {
-          "type": "text"
         }
       }
     }

+ 1 - 0
x-pack/plugin/core/src/main/resources/logs-template.json

@@ -4,6 +4,7 @@
   "data_stream": {},
   "composed_of": [
     "logs-mappings",
+    "data-streams-mappings",
     "logs-settings"
   ],
   "allow_auto_create": true,

+ 0 - 36
x-pack/plugin/core/src/main/resources/metrics-mappings.json

@@ -1,48 +1,12 @@
 {
   "template": {
     "mappings": {
-      "dynamic_templates": [
-        {
-          "strings_as_keyword": {
-            "mapping": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            },
-            "match_mapping_type": "string"
-          }
-        }
-      ],
-      "date_detection": false,
       "properties": {
-        "@timestamp": {
-          "type": "date"
-        },
         "data_stream": {
           "properties": {
             "type": {
               "type": "constant_keyword",
               "value": "metrics"
-            },
-            "dataset": {
-              "type": "constant_keyword"
-            },
-            "namespace": {
-              "type": "constant_keyword"
-            }
-          }
-        },
-        "ecs": {
-          "properties": {
-            "version": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            }
-          }
-        },
-        "host": {
-          "properties": {
-            "ip": {
-              "type": "ip"
             }
           }
         }

+ 1 - 0
x-pack/plugin/core/src/main/resources/metrics-template.json

@@ -4,6 +4,7 @@
   "data_stream": {},
   "composed_of": [
     "metrics-mappings",
+    "data-streams-mappings",
     "metrics-settings"
   ],
   "allow_auto_create": true,

+ 0 - 43
x-pack/plugin/core/src/main/resources/synthetics-mappings.json

@@ -1,55 +1,12 @@
 {
   "template": {
     "mappings": {
-      "dynamic_templates": [
-        {
-          "strings_as_keyword": {
-            "mapping": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            },
-            "match_mapping_type": "string"
-          }
-        }
-      ],
-      "date_detection": false,
       "properties": {
-        "@timestamp": {
-          "type": "date"
-        },
         "data_stream": {
           "properties": {
             "type": {
               "type": "constant_keyword",
               "value": "synthetics"
-            },
-            "dataset": {
-              "type": "constant_keyword"
-            },
-            "namespace": {
-              "type": "constant_keyword"
-            }
-          }
-        },
-        "ecs": {
-          "properties": {
-            "version": {
-              "ignore_above": 1024,
-              "type": "keyword"
-            }
-          }
-        },
-        "host": {
-          "properties": {
-            "ip": {
-              "type": "ip"
-            }
-          }
-        },
-        "observer": {
-          "properties": {
-            "ip": {
-              "type": "ip"
             }
           }
         }

+ 1 - 0
x-pack/plugin/core/src/main/resources/synthetics-template.json

@@ -4,6 +4,7 @@
   "data_stream": {},
   "composed_of": [
     "synthetics-mappings",
+    "data-streams-mappings",
     "synthetics-settings"
   ],
   "allow_auto_create": true,

+ 106 - 2
x-pack/plugin/stack/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/stack/10_basic.yml

@@ -14,6 +14,10 @@ setup:
       ilm.get_lifecycle:
         policy: "metrics"
 
+  - do:
+      cluster.get_component_template:
+        name: data-streams-mappings
+
   - do:
       cluster.get_component_template:
         name: logs-mappings
@@ -46,6 +50,11 @@ setup:
         body:
           "@timestamp": "2020-01-01"
           message: "test-log-message"
+          source.ip: "10.1.2.3"
+          log.file.path: "/var/log/web/access.log"
+          data_stream.type: "logs"
+          data_stream.dataset: "foo"
+          data_stream.namespace: "bar"
 
   - do:
       indices.get_data_stream:
@@ -66,7 +75,16 @@ setup:
   - is_true: .$idx0name.settings
   - is_true: .$idx0name.mappings
   - match: { .$idx0name.settings.index.lifecycle.name: "logs" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.value: "logs" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.value: "foo" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.value: "bar" }
   - is_true: .$idx0name.mappings.properties.message
+  - match: { .$idx0name.mappings.properties.message.type: "match_only_text" }
+  - match: { .$idx0name.mappings.properties.source.properties.ip.type: "ip" }
+  - match: { .$idx0name.mappings.properties.log.properties.file.properties.path.type: "keyword" }
   - match: { .$idx0name.data_stream: "logs-foo-bar" }
 
   - do:
@@ -80,7 +98,10 @@ setup:
         index: metrics-foo-bar
         body:
           "@timestamp": "2020-01-01"
-          message: "test-log-message"
+          source.ip: "10.1.2.3"
+          data_stream.type: "metrics"
+          data_stream.dataset: "foo"
+          data_stream.namespace: "bar"
 
   - do:
       indices.get_data_stream:
@@ -101,9 +122,92 @@ setup:
   - is_true: .$idx0name.settings
   - is_true: .$idx0name.mappings
   - match: { .$idx0name.settings.index.lifecycle.name: "metrics" }
-  - is_true: .$idx0name.mappings.properties.message
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.value: "metrics" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.value: "foo" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.value: "bar" }
+  - match: { .$idx0name.mappings.properties.source.properties.ip.type: "ip" }
   - match: { .$idx0name.data_stream: "metrics-foo-bar" }
 
   - do:
       indices.delete_data_stream:
         name: metrics-foo-bar
+
+---
+"Test synthetics index auto creation":
+  - do:
+      index:
+        index: synthetics-foo-bar
+        body:
+          "@timestamp": "2020-01-01"
+          source.ip: "10.1.2.3"
+          data_stream.type: "synthetics"
+          data_stream.dataset: "foo"
+          data_stream.namespace: "bar"
+
+  - do:
+      indices.get_data_stream:
+        name: synthetics-foo-bar
+
+  - match: { data_streams.0.name: synthetics-foo-bar }
+  - match: { data_streams.0.timestamp_field.name: '@timestamp' }
+  - match: { data_streams.0.generation: 1 }
+  - length: { data_streams.0.indices: 1 }
+  - match: { data_streams.0.indices.0.index_name: '/\.ds-synthetics-foo-bar-(\d{4}\.\d{2}\.\d{2}-)?000001/' }
+
+  - set: { data_streams.0.indices.0.index_name: idx0name }
+
+  - do:
+      indices.get:
+        index: $idx0name
+
+  - is_true: .$idx0name.settings
+  - is_true: .$idx0name.mappings
+  - match: { .$idx0name.settings.index.lifecycle.name: "synthetics" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.type.value: "synthetics" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.dataset.value: "foo" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.type: "constant_keyword" }
+  - match: { .$idx0name.mappings.properties.data_stream.properties.namespace.value: "bar" }
+  - match: { .$idx0name.mappings.properties.source.properties.ip.type: "ip" }
+  - match: { .$idx0name.data_stream: "synthetics-foo-bar" }
+
+  - do:
+      indices.delete_data_stream:
+        name: synthetics-foo-bar
+
+---
+"Test wrong data_stream type":
+
+ - do:
+      catch: bad_request
+      index:
+        index: synthetics-dataset0-namespace1
+        body:
+          "@timestamp": "2020-01-01"
+          data_stream.type: "logs"
+          data_stream.dataset: "dataset0"
+          data_stream.namespace: "namespace1"
+
+ - do:
+      catch: bad_request
+      index:
+        index: logs-dataset0-namespace1
+        body:
+          "@timestamp": "2020-01-01"
+          data_stream.type: "metrics"
+          data_stream.dataset: "dataset0"
+          data_stream.namespace: "namespace1"
+
+ - do:
+      catch: bad_request
+      index:
+        index: metrics-dataset0-namespace1
+        body:
+          "@timestamp": "2020-01-01"
+          data_stream.type: "synthetics"
+          data_stream.dataset: "dataset0"
+          data_stream.namespace: "namespace1"

+ 14 - 5
x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java

@@ -28,11 +28,9 @@ import java.util.stream.Collectors;
 public class StackTemplateRegistry extends IndexTemplateRegistry {
     private static final Logger logger = LogManager.getLogger(StackTemplateRegistry.class);
 
-    // The stack template registry should remain at version 0. This is because templates and
-    // policies will be changed by the ingest manager once they exist, and ES should only ever put
-    // the template in place if it does not exist. If this were incremented we could accidentally
-    // overwrite a template or policy changed by the ingest manager.
-    public static final int REGISTRY_VERSION = 0;
+    // The stack template registry version. This number must be incremented when we make changes
+    // to built-in templates.
+    public static final int REGISTRY_VERSION = 1;
 
     public static final String TEMPLATE_VERSION_VARIABLE = "xpack.stack.template.version";
     public static final Setting<Boolean> STACK_TEMPLATES_ENABLED = Setting.boolSetting(
@@ -45,6 +43,16 @@ public class StackTemplateRegistry extends IndexTemplateRegistry {
     private final ClusterService clusterService;
     private volatile boolean stackTemplateEnabled;
 
+    // General mappings conventions for any data that ends up in a data stream
+    public static final String DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE_NAME = "data-streams-mappings";
+
+    public static final IndexTemplateConfig DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE = new IndexTemplateConfig(
+        DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE_NAME,
+        "/data-streams-mappings.json",
+        REGISTRY_VERSION,
+        TEMPLATE_VERSION_VARIABLE
+    );
+
     //////////////////////////////////////////////////////////
     // Logs components (for matching logs-*-* indices)
     //////////////////////////////////////////////////////////
@@ -179,6 +187,7 @@ public class StackTemplateRegistry extends IndexTemplateRegistry {
     protected List<IndexTemplateConfig> getComponentTemplateConfigs() {
         if (stackTemplateEnabled) {
             return Arrays.asList(
+                DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE,
                 LOGS_MAPPINGS_COMPONENT_TEMPLATE,
                 LOGS_SETTINGS_COMPONENT_TEMPLATE,
                 METRICS_MAPPINGS_COMPONENT_TEMPLATE,

+ 5 - 0
x-pack/plugin/stack/src/test/java/org/elasticsearch/xpack/stack/StackTemplateRegistryTests.java

@@ -277,6 +277,7 @@ public class StackTemplateRegistryTests extends ESTestCase {
         DiscoveryNodes nodes = DiscoveryNodes.builder().localNodeId("node").masterNodeId("node").add(node).build();
 
         Map<String, Integer> versions = new HashMap<>();
+        versions.put(StackTemplateRegistry.DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE_NAME, StackTemplateRegistry.REGISTRY_VERSION);
         versions.put(StackTemplateRegistry.LOGS_SETTINGS_COMPONENT_TEMPLATE_NAME, StackTemplateRegistry.REGISTRY_VERSION);
         versions.put(StackTemplateRegistry.LOGS_MAPPINGS_COMPONENT_TEMPLATE_NAME, StackTemplateRegistry.REGISTRY_VERSION);
         versions.put(StackTemplateRegistry.METRICS_SETTINGS_COMPONENT_TEMPLATE_NAME, StackTemplateRegistry.REGISTRY_VERSION);
@@ -302,6 +303,10 @@ public class StackTemplateRegistryTests extends ESTestCase {
         registry.clusterChanged(sameVersionEvent);
 
         versions.clear();
+        versions.put(
+            StackTemplateRegistry.DATA_STREAMS_MAPPINGS_COMPONENT_TEMPLATE_NAME,
+            StackTemplateRegistry.REGISTRY_VERSION + randomIntBetween(1, 1000)
+        );
         versions.put(
             StackTemplateRegistry.LOGS_SETTINGS_COMPONENT_TEMPLATE_NAME,
             StackTemplateRegistry.REGISTRY_VERSION + randomIntBetween(1, 1000)