|
@@ -15,35 +15,41 @@ more about the Painless scripting language in the
|
|
|
* <<painless-group-by>>
|
|
|
* <<painless-bucket-script>>
|
|
|
|
|
|
+NOTE: While the context of the following examples is the {transform} use case,
|
|
|
+the Painless scripts in the snippets below can be used in other {es} search
|
|
|
+aggregations, too.
|
|
|
+
|
|
|
|
|
|
[discrete]
|
|
|
[[painless-top-hits]]
|
|
|
-==== Getting top hits by using scripted metric
|
|
|
+==== Getting top hits by using scripted metric aggregation
|
|
|
|
|
|
This snippet shows how to find the latest document, in other words the document
|
|
|
with the earliest timestamp. From a technical perspective, it helps to achieve
|
|
|
the function of a <<search-aggregations-metrics-top-hits-aggregation>> by using
|
|
|
-scripted metric aggregation which provides a metric output.
|
|
|
+scripted metric aggregation in a {transform}, which provides a metric output.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-"latest_doc": {
|
|
|
- "scripted_metric": {
|
|
|
- "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
|
|
|
- "map_script": """ <2>
|
|
|
- def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli();
|
|
|
- if (current_date > state.timestamp_latest)
|
|
|
- {state.timestamp_latest = current_date;
|
|
|
- state.last_doc = new HashMap(params['_source']);}
|
|
|
- """,
|
|
|
- "combine_script": "return state", <3>
|
|
|
- "reduce_script": """ <4>
|
|
|
- def last_doc = '';
|
|
|
- def timestamp_latest = 0L;
|
|
|
- for (s in states) {if (s.timestamp_latest > (timestamp_latest))
|
|
|
- {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}}
|
|
|
- return last_doc
|
|
|
- """
|
|
|
+"aggregations": {
|
|
|
+ "latest_doc": {
|
|
|
+ "scripted_metric": {
|
|
|
+ "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
|
|
|
+ "map_script": """ <2>
|
|
|
+ def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli();
|
|
|
+ if (current_date > state.timestamp_latest)
|
|
|
+ {state.timestamp_latest = current_date;
|
|
|
+ state.last_doc = new HashMap(params['_source']);}
|
|
|
+ """,
|
|
|
+ "combine_script": "return state", <3>
|
|
|
+ "reduce_script": """ <4>
|
|
|
+ def last_doc = '';
|
|
|
+ def timestamp_latest = 0L;
|
|
|
+ for (s in states) {if (s.timestamp_latest > (timestamp_latest))
|
|
|
+ {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}}
|
|
|
+ return last_doc
|
|
|
+ """
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
--------------------------------------------------
|
|
@@ -70,23 +76,25 @@ You can retrieve the last value in a similar way:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-"latest_value": {
|
|
|
- "scripted_metric": {
|
|
|
- "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
|
|
|
- "map_script": """
|
|
|
- def current_date = doc['date'].getValue().toInstant().toEpochMilli();
|
|
|
- if (current_date > state.timestamp_latest)
|
|
|
- {state.timestamp_latest = current_date;
|
|
|
- state.last_value = params['_source']['value'];}
|
|
|
- """,
|
|
|
- "combine_script": "return state",
|
|
|
- "reduce_script": """
|
|
|
- def last_value = '';
|
|
|
- def timestamp_latest = 0L;
|
|
|
- for (s in states) {if (s.timestamp_latest > (timestamp_latest))
|
|
|
- {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}}
|
|
|
- return last_value
|
|
|
- """
|
|
|
+"aggregations": {
|
|
|
+ "latest_value": {
|
|
|
+ "scripted_metric": {
|
|
|
+ "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
|
|
|
+ "map_script": """
|
|
|
+ def current_date = doc['date'].getValue().toInstant().toEpochMilli();
|
|
|
+ if (current_date > state.timestamp_latest)
|
|
|
+ {state.timestamp_latest = current_date;
|
|
|
+ state.last_value = params['_source']['value'];}
|
|
|
+ """,
|
|
|
+ "combine_script": "return state",
|
|
|
+ "reduce_script": """
|
|
|
+ def last_value = '';
|
|
|
+ def timestamp_latest = 0L;
|
|
|
+ for (s in states) {if (s.timestamp_latest > (timestamp_latest))
|
|
|
+ {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}}
|
|
|
+ return last_value
|
|
|
+ """
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
--------------------------------------------------
|
|
@@ -97,31 +105,35 @@ You can retrieve the last value in a similar way:
|
|
|
[[painless-time-features]]
|
|
|
==== Getting time features as scripted fields
|
|
|
|
|
|
-This snippet shows how to extract time based features by using Painless. The
|
|
|
-snippet uses an index where `@timestamp` is defined as a `date` type field.
|
|
|
+This snippet shows how to extract time based features by using Painless in a
|
|
|
+{transform}. The snippet uses an index where `@timestamp` is defined as a `date`
|
|
|
+type field.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-"script_fields": {
|
|
|
- "hour_of_day": { <1>
|
|
|
- "script": {
|
|
|
- "lang": "painless",
|
|
|
- "source": """
|
|
|
- ZonedDateTime date = doc['@timestamp'].value; <2>
|
|
|
- return date.getHour(); <3>
|
|
|
- """
|
|
|
+"aggregations": {
|
|
|
+ "script_fields": {
|
|
|
+ "hour_of_day": { <1>
|
|
|
+ "script": {
|
|
|
+ "lang": "painless",
|
|
|
+ "source": """
|
|
|
+ ZonedDateTime date = doc['@timestamp'].value; <2>
|
|
|
+ return date.getHour(); <3>
|
|
|
+ """
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "month_of_year": { <4>
|
|
|
+ "script": {
|
|
|
+ "lang": "painless",
|
|
|
+ "source": """
|
|
|
+ ZonedDateTime date = doc['@timestamp'].value; <5>
|
|
|
+ return date.getMonthValue(); <6>
|
|
|
+ """
|
|
|
+ }
|
|
|
}
|
|
|
},
|
|
|
- "month_of_year": { <4>
|
|
|
- "script": {
|
|
|
- "lang": "painless",
|
|
|
- "source": """
|
|
|
- ZonedDateTime date = doc['@timestamp'].value; <5>
|
|
|
- return date.getMonthValue(); <6>
|
|
|
- """
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ ...
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
// NOTCONSOLE
|
|
|
|
|
@@ -327,3 +339,63 @@ the buckets you want to use for the variable. In this particular case, `min` and
|
|
|
`max` are variables mapped to `time_frame.gte.value` and `time_frame.lte.value`.
|
|
|
<3> Finally, the script substracts the start date of the session from the end
|
|
|
date which results in the duration of the session.
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[painless-count-http]]
|
|
|
+==== Counting HTTP responses by using scripted metric aggregation
|
|
|
+
|
|
|
+You can count the different HTTP response types in a web log data set by using
|
|
|
+scripted metric aggregation as part of the {transform}. The example below
|
|
|
+assumes that the HTTP response codes are stored as keywords in the `response`
|
|
|
+field of the documents.
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+"aggregations": { <1>
|
|
|
+ "responses.counts": { <2>
|
|
|
+ "scripted_metric": { <3>
|
|
|
+ "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", <4>
|
|
|
+ "map_script": """ <5>
|
|
|
+ def code = doc['response.keyword'].value;
|
|
|
+ if (code.startsWith('5') || code.startsWith('4')) {
|
|
|
+ state.responses.error += 1 ;
|
|
|
+ } else if(code.startsWith('2')) {
|
|
|
+ state.responses.success += 1;
|
|
|
+ } else {
|
|
|
+ state.responses.other += 1;
|
|
|
+ }
|
|
|
+ """,
|
|
|
+ "combine_script": "state.responses", <6>
|
|
|
+ "reduce_script": """ <7>
|
|
|
+ def counts = ['error': 0L, 'success': 0L, 'other': 0L];
|
|
|
+ for (responses in states) {
|
|
|
+ counts.error += responses['error'];
|
|
|
+ counts.success += responses['success'];
|
|
|
+ counts.other += responses['other'];
|
|
|
+ }
|
|
|
+ return counts;
|
|
|
+ """
|
|
|
+ }
|
|
|
+ },
|
|
|
+ ...
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+// NOTCONSOLE
|
|
|
+
|
|
|
+<1> The `aggregations` object of the {transform} that contains all aggregations.
|
|
|
+<2> Object of the `scripted_metric` aggregation.
|
|
|
+<3> This `scripted_metric` performs a distributed operation on the web log data
|
|
|
+to count specific types of HTTP responses (error, success, and other).
|
|
|
+<4> The `init_script` creates a `responses` array in the `state` object with
|
|
|
+three properties (`error`, `success`, `other`) with long data type.
|
|
|
+<5> The `map_script` defines `code` based on the `response.keyword` value of the
|
|
|
+document, then it counts the errors, successes, and other responses based on the
|
|
|
+first digit of the responses.
|
|
|
+<6> The `combine_script` returns `state.responses` from each shard.
|
|
|
+<7> The `reduce_script` creates a `counts` array with the `error`, `success`,
|
|
|
+and `other` properties, then iterates through the value of `responses` returned
|
|
|
+by each shard and assigns the different response types to the appropriate
|
|
|
+properties of the `counts` object; error responses to the error counts, success
|
|
|
+responses to the success counts, and other responses to the other counts.
|
|
|
+Finally, returns the `counts` array with the response counts.
|