|
@@ -12,6 +12,18 @@ If your {dfeed} defines runtime fields, you can use those fields in your
|
|
|
functions in one or more detectors. Runtime fields can impact search performance
|
|
|
based on the computation defined in the runtime script.
|
|
|
|
|
|
+[NOTE]
|
|
|
+===============================
|
|
|
+Some of these examples use regular expressions. By default, regular
|
|
|
+expressions are disabled because they circumvent the protection that Painless
|
|
|
+provides against long running and memory hungry scripts. For more information,
|
|
|
+see {ref}/modules-scripting-painless.html[Painless scripting language].
|
|
|
+
|
|
|
+{ml-cap} analysis is case sensitive. For example, "John" is considered to be
|
|
|
+different than "john". This is one reason you might consider using scripts that
|
|
|
+convert your strings to upper or lowercase letters.
|
|
|
+===============================
|
|
|
+
|
|
|
* <<ml-configuring-transform1>>
|
|
|
* <<ml-configuring-transform2>>
|
|
|
* <<ml-configuring-transform3>>
|
|
@@ -31,46 +43,22 @@ PUT /my-index-000001
|
|
|
{
|
|
|
"mappings":{
|
|
|
"properties": {
|
|
|
- "@timestamp": {
|
|
|
- "type": "date"
|
|
|
- },
|
|
|
- "aborted_count": {
|
|
|
- "type": "long"
|
|
|
- },
|
|
|
- "another_field": {
|
|
|
- "type": "keyword" <1>
|
|
|
- },
|
|
|
- "clientip": {
|
|
|
- "type": "keyword"
|
|
|
- },
|
|
|
+ "@timestamp": { "type": "date" },
|
|
|
+ "aborted_count": { "type": "long" },
|
|
|
+ "another_field": { "type": "keyword" }, <1>
|
|
|
+ "clientip": { "type": "keyword" },
|
|
|
"coords": {
|
|
|
"properties": {
|
|
|
- "lat": {
|
|
|
- "type": "keyword"
|
|
|
- },
|
|
|
- "lon": {
|
|
|
- "type": "keyword"
|
|
|
- }
|
|
|
+ "lat": { "type": "keyword" },
|
|
|
+ "lon": { "type": "keyword" }
|
|
|
}
|
|
|
},
|
|
|
- "error_count": {
|
|
|
- "type": "long"
|
|
|
- },
|
|
|
- "query": {
|
|
|
- "type": "keyword"
|
|
|
- },
|
|
|
- "some_field": {
|
|
|
- "type": "keyword"
|
|
|
- },
|
|
|
- "tokenstring1":{
|
|
|
- "type":"keyword"
|
|
|
- },
|
|
|
- "tokenstring2":{
|
|
|
- "type":"keyword"
|
|
|
- },
|
|
|
- "tokenstring3":{
|
|
|
- "type":"keyword"
|
|
|
- }
|
|
|
+ "error_count": { "type": "long" },
|
|
|
+ "query": { "type": "keyword" },
|
|
|
+ "some_field": { "type": "keyword" },
|
|
|
+ "tokenstring1":{ "type":"keyword" },
|
|
|
+ "tokenstring2":{ "type":"keyword" },
|
|
|
+ "tokenstring3":{ "type":"keyword" }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -113,14 +101,12 @@ PUT _ml/anomaly_detectors/test1
|
|
|
"detectors":[
|
|
|
{
|
|
|
"function":"mean",
|
|
|
- "field_name": "total_error_count", <1>
|
|
|
- "detector_description": "Custom script field transformation"
|
|
|
+ "field_name": "total_error_count" <1>
|
|
|
}
|
|
|
]
|
|
|
},
|
|
|
"data_description": {
|
|
|
- "time_field":"@timestamp",
|
|
|
- "time_format":"epoch_ms"
|
|
|
+ "time_field":"@timestamp"
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -130,11 +116,6 @@ PUT _ml/datafeeds/datafeed-test1
|
|
|
"indices": [
|
|
|
"my-index-000001"
|
|
|
],
|
|
|
- "query": {
|
|
|
- "match_all": {
|
|
|
- "boost": 1
|
|
|
- }
|
|
|
- },
|
|
|
"runtime_mappings": {
|
|
|
"total_error_count": { <2>
|
|
|
"type": "long",
|
|
@@ -194,24 +175,6 @@ use the **Edit JSON** tab. For example:
|
|
|
image::images/ml-runtimefields.jpg[Using runtime_mappings in {dfeed} config via {kib}]
|
|
|
|
|
|
|
|
|
-[[ml-configuring-transform-examples]]
|
|
|
-== Common runtime field examples
|
|
|
-
|
|
|
-While the possibilities are limitless, there are a number of common scenarios
|
|
|
-where you might use runtime fields in your {dfeeds}.
|
|
|
-
|
|
|
-[NOTE]
|
|
|
-===============================
|
|
|
-Some of these examples use regular expressions. By default, regular
|
|
|
-expressions are disabled because they circumvent the protection that Painless
|
|
|
-provides against long running and memory hungry scripts. For more information,
|
|
|
-see {ref}/modules-scripting-painless.html[Painless scripting language].
|
|
|
-
|
|
|
-{ml-cap} analysis is case sensitive. For example, "John" is considered to be
|
|
|
-different than "john". This is one reason you might consider using scripts that
|
|
|
-convert your strings to upper or lowercase letters.
|
|
|
-===============================
|
|
|
-
|
|
|
[[ml-configuring-transform2]]
|
|
|
.Example 2: Concatenating strings
|
|
|
|
|
@@ -224,14 +187,12 @@ PUT _ml/anomaly_detectors/test2
|
|
|
"detectors":[
|
|
|
{
|
|
|
"function":"low_info_content",
|
|
|
- "field_name":"my_runtime_field", <1>
|
|
|
- "detector_description": "Custom script field transformation"
|
|
|
+ "field_name":"my_runtime_field" <1>
|
|
|
}
|
|
|
]
|
|
|
},
|
|
|
"data_description": {
|
|
|
- "time_field":"@timestamp",
|
|
|
- "time_format":"epoch_ms"
|
|
|
+ "time_field":"@timestamp"
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -239,11 +200,6 @@ PUT _ml/datafeeds/datafeed-test2
|
|
|
{
|
|
|
"job_id": "test2",
|
|
|
"indices": ["my-index-000001"],
|
|
|
- "query": {
|
|
|
- "match_all": {
|
|
|
- "boost": 1
|
|
|
- }
|
|
|
- },
|
|
|
"runtime_mappings": {
|
|
|
"my_runtime_field": {
|
|
|
"type": "keyword",
|
|
@@ -469,7 +425,7 @@ The preview {dfeed} API returns the following results, which show that
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
|
-PUT _ml/anomaly_detectors/test4
|
|
|
+PUT _ml/anomaly_detectors/test3
|
|
|
{
|
|
|
"analysis_config":{
|
|
|
"bucket_span": "10m",
|
|
@@ -481,20 +437,14 @@ PUT _ml/anomaly_detectors/test4
|
|
|
]
|
|
|
},
|
|
|
"data_description": {
|
|
|
- "time_field":"@timestamp",
|
|
|
- "time_format":"epoch_ms"
|
|
|
+ "time_field":"@timestamp"
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-PUT _ml/datafeeds/datafeed-test4
|
|
|
+PUT _ml/datafeeds/datafeed-test3
|
|
|
{
|
|
|
- "job_id": "test4",
|
|
|
+ "job_id": "test3",
|
|
|
"indices": ["my-index-000001"],
|
|
|
- "query": {
|
|
|
- "match_all": {
|
|
|
- "boost": 1
|
|
|
- }
|
|
|
- },
|
|
|
"runtime_mappings": {
|
|
|
"my_coordinates": {
|
|
|
"type": "keyword",
|
|
@@ -505,7 +455,7 @@ PUT _ml/datafeeds/datafeed-test4
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-GET _ml/datafeeds/datafeed-test4/_preview
|
|
|
+GET _ml/datafeeds/datafeed-test3/_preview
|
|
|
--------------------------------------------------
|
|
|
// TEST[skip:needs-licence]
|
|
|
|
|
@@ -534,7 +484,7 @@ The preview {dfeed} API returns the following results, which show that
|
|
|
|
|
|
[source,console]
|
|
|
--------------------------------------------------
|
|
|
-PUT _ml/anomaly_detectors/test3
|
|
|
+PUT _ml/anomaly_detectors/test4
|
|
|
{
|
|
|
"description":"DNS tunneling",
|
|
|
"analysis_config":{
|
|
@@ -550,20 +500,14 @@ PUT _ml/anomaly_detectors/test3
|
|
|
]
|
|
|
},
|
|
|
"data_description": {
|
|
|
- "time_field":"@timestamp",
|
|
|
- "time_format":"epoch_ms"
|
|
|
+ "time_field":"@timestamp"
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-PUT _ml/datafeeds/datafeed-test3
|
|
|
+PUT _ml/datafeeds/datafeed-test4
|
|
|
{
|
|
|
- "job_id": "test3",
|
|
|
+ "job_id": "test4",
|
|
|
"indices": ["my-index-000001"],
|
|
|
- "query": {
|
|
|
- "match_all": {
|
|
|
- "boost": 1
|
|
|
- }
|
|
|
- },
|
|
|
"script_fields":{
|
|
|
"sub":{
|
|
|
"script":"return domainSplit(doc['query'].value).get(0);"
|
|
@@ -574,7 +518,7 @@ PUT _ml/datafeeds/datafeed-test3
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-GET _ml/datafeeds/datafeed-test3/_preview
|
|
|
+GET _ml/datafeeds/datafeed-test4/_preview
|
|
|
--------------------------------------------------
|
|
|
// TEST[skip:needs-licence]
|
|
|
|