Browse Source

[DOCS] Updates datafeed related runtime field examples (#73725)

István Zoltán Szabó 4 years ago
parent
commit
20d0dc300f
1 changed files with 38 additions and 94 deletions
  1. 38 94
      docs/reference/ml/anomaly-detection/ml-configuring-transform.asciidoc

+ 38 - 94
docs/reference/ml/anomaly-detection/ml-configuring-transform.asciidoc

@@ -12,6 +12,18 @@ If your {dfeed} defines runtime fields, you can use those fields in your
 functions in one or more detectors. Runtime fields can impact search performance 
 based on the computation defined in the runtime script.
 
+[NOTE]
+===============================
+Some of these examples use regular expressions. By default, regular
+expressions are disabled because they circumvent the protection that Painless
+provides against long running and memory hungry scripts. For more information,
+see {ref}/modules-scripting-painless.html[Painless scripting language].
+
+{ml-cap} analysis is case sensitive. For example, "John" is considered to be 
+different than "john". This is one reason you might consider using scripts that 
+convert your strings to upper or lowercase letters.
+===============================
+
 * <<ml-configuring-transform1>>
 * <<ml-configuring-transform2>>
 * <<ml-configuring-transform3>>
@@ -31,46 +43,22 @@ PUT /my-index-000001
 {
   "mappings":{
     "properties": {
-      "@timestamp": {
-        "type": "date"
-      },
-      "aborted_count": {
-        "type": "long"
-      },
-      "another_field": {
-        "type": "keyword" <1>
-      },
-      "clientip": {
-        "type": "keyword"
-      },
+      "@timestamp": { "type": "date" },
+      "aborted_count": { "type": "long" },
+      "another_field": { "type": "keyword" }, <1>
+      "clientip": { "type": "keyword" },
       "coords": {
         "properties": {
-          "lat": {
-            "type": "keyword"
-          },
-          "lon": {
-            "type": "keyword"
-          }
+          "lat": { "type": "keyword" },
+          "lon": { "type": "keyword" }
         }
       },
-      "error_count": {
-        "type": "long"
-      },
-      "query": {
-        "type": "keyword"
-      },
-      "some_field": {
-        "type": "keyword"
-      },
-      "tokenstring1":{
-        "type":"keyword"
-      },
-      "tokenstring2":{
-        "type":"keyword"
-      },
-      "tokenstring3":{
-        "type":"keyword"
-      }
+      "error_count": { "type": "long" },
+      "query": { "type": "keyword" },
+      "some_field": { "type": "keyword" },
+      "tokenstring1":{ "type":"keyword" },
+      "tokenstring2":{ "type":"keyword" },
+      "tokenstring3":{ "type":"keyword" }
     }
   }
 }
@@ -113,14 +101,12 @@ PUT _ml/anomaly_detectors/test1
     "detectors":[
       {
         "function":"mean",
-        "field_name": "total_error_count", <1>
-        "detector_description": "Custom script field transformation"
+        "field_name": "total_error_count" <1>
       }
     ]
   },
   "data_description": {
-  "time_field":"@timestamp",
-  "time_format":"epoch_ms"
+    "time_field":"@timestamp"
   }
 }
 
@@ -130,11 +116,6 @@ PUT _ml/datafeeds/datafeed-test1
   "indices": [
     "my-index-000001"
   ],
-  "query": {
-    "match_all": {
-      "boost": 1
-    }
-  },
   "runtime_mappings": {
     "total_error_count": { <2>
       "type": "long",
@@ -194,24 +175,6 @@ use the **Edit JSON** tab. For example:
 image::images/ml-runtimefields.jpg[Using runtime_mappings in {dfeed} config via {kib}]
 
 
-[[ml-configuring-transform-examples]]
-== Common runtime field examples
-
-While the possibilities are limitless, there are a number of common scenarios
-where you might use runtime fields in your {dfeeds}.
-
-[NOTE]
-===============================
-Some of these examples use regular expressions. By default, regular
-expressions are disabled because they circumvent the protection that Painless
-provides against long running and memory hungry scripts. For more information,
-see {ref}/modules-scripting-painless.html[Painless scripting language].
-
-{ml-cap} analysis is case sensitive. For example, "John" is considered to be 
-different than "john". This is one reason you might consider using scripts that 
-convert your strings to upper or lowercase letters.
-===============================
-
 [[ml-configuring-transform2]]
 .Example 2: Concatenating strings
 
@@ -224,14 +187,12 @@ PUT _ml/anomaly_detectors/test2
     "detectors":[
       {
         "function":"low_info_content",
-        "field_name":"my_runtime_field", <1>
-        "detector_description": "Custom script field transformation"
+        "field_name":"my_runtime_field" <1>
       }
     ]
   },
   "data_description": {
-  "time_field":"@timestamp",
-  "time_format":"epoch_ms"
+    "time_field":"@timestamp"
   }
 }
 
@@ -239,11 +200,6 @@ PUT _ml/datafeeds/datafeed-test2
 {
   "job_id": "test2",
   "indices": ["my-index-000001"],
-  "query": {
-    "match_all": {
-          "boost": 1
-    }
-  },
   "runtime_mappings": {
     "my_runtime_field": {
       "type": "keyword",
@@ -469,7 +425,7 @@ The preview {dfeed} API returns the following results, which show that
 
 [source,console]
 --------------------------------------------------
-PUT _ml/anomaly_detectors/test4
+PUT _ml/anomaly_detectors/test3
 {
   "analysis_config":{
     "bucket_span": "10m",
@@ -481,20 +437,14 @@ PUT _ml/anomaly_detectors/test4
     ]
   },
   "data_description": {
-  "time_field":"@timestamp",
-  "time_format":"epoch_ms"
+    "time_field":"@timestamp"
   }
 }
 
-PUT _ml/datafeeds/datafeed-test4
+PUT _ml/datafeeds/datafeed-test3
 {
-  "job_id": "test4",
+  "job_id": "test3",
   "indices": ["my-index-000001"],
-  "query": {
-    "match_all": {
-          "boost": 1
-    }
-  },
   "runtime_mappings": {
     "my_coordinates": {
       "type": "keyword",
@@ -505,7 +455,7 @@ PUT _ml/datafeeds/datafeed-test4
   }
 }
 
-GET _ml/datafeeds/datafeed-test4/_preview
+GET _ml/datafeeds/datafeed-test3/_preview
 --------------------------------------------------
 // TEST[skip:needs-licence]
 
@@ -534,7 +484,7 @@ The preview {dfeed} API returns the following results, which show that
 
 [source,console]
 --------------------------------------------------
-PUT _ml/anomaly_detectors/test3
+PUT _ml/anomaly_detectors/test4
 {
   "description":"DNS tunneling",
   "analysis_config":{
@@ -550,20 +500,14 @@ PUT _ml/anomaly_detectors/test3
     ]
   },
   "data_description": {
-  "time_field":"@timestamp",
-  "time_format":"epoch_ms"
+    "time_field":"@timestamp"
   }
 }
 
-PUT _ml/datafeeds/datafeed-test3
+PUT _ml/datafeeds/datafeed-test4
 {
-  "job_id": "test3",
+  "job_id": "test4",
   "indices": ["my-index-000001"],
-  "query": {
-    "match_all": {
-          "boost": 1
-    }
-  },
   "script_fields":{
     "sub":{
       "script":"return domainSplit(doc['query'].value).get(0);"
@@ -574,7 +518,7 @@ PUT _ml/datafeeds/datafeed-test3
   }
 }
 
-GET _ml/datafeeds/datafeed-test3/_preview
+GET _ml/datafeeds/datafeed-test4/_preview
 --------------------------------------------------
 // TEST[skip:needs-licence]