5 years ago · b7d6ebc21b
--- a/docs/reference/transform/painless-examples.asciidoc
+++ b/docs/reference/transform/painless-examples.asciidoc
@@ -15,35 +15,41 @@ more about the Painless scripting language in the
 
				 * <<painless-group-by>>
			
 
				 * <<painless-bucket-script>>
			
 
				 
			
 
				+NOTE: While the context of the following examples is the {transform} use case, 
			
 
				+the Painless scripts in the snippets below can be used in other {es} search 
			
 
				+aggregations, too.
			
 
				+
			
 
				 
			
 
				 [discrete]
			
 
				 [[painless-top-hits]]
			
 
				-==== Getting top hits by using scripted metric
			
 
				+==== Getting top hits by using scripted metric aggregation
			
 
				 
			
 
				 This snippet shows how to find the latest document, in other words the document 
			
 
				 with the earliest timestamp. From a technical perspective, it helps to achieve 
			
 
				 the function of a <<search-aggregations-metrics-top-hits-aggregation>> by using 
			
 
				-scripted metric aggregation which provides a metric output.
			
 
				+scripted metric aggregation in a {transform}, which provides a metric output.
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"latest_doc": { 
			
 
				-  "scripted_metric": {
			
 
				-    "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
			
 
				-    "map_script": """ <2>
			
 
				-      def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); 
			
 
				-      if (current_date > state.timestamp_latest) 
			
 
				-      {state.timestamp_latest = current_date;
			
 
				-      state.last_doc = new HashMap(params['_source']);}
			
 
				-    """,
			
 
				-    "combine_script": "return state", <3>
			
 
				-    "reduce_script": """ <4>
			
 
				-      def last_doc = '';
			
 
				-      def timestamp_latest = 0L;
			
 
				-      for (s in states) {if (s.timestamp_latest > (timestamp_latest))
			
 
				-      {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} 
			
 
				-      return last_doc
			
 
				-    """
			
 
				+"aggregations": {
			
 
				+  "latest_doc": { 
			
 
				+    "scripted_metric": {
			
 
				+      "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
			
 
				+      "map_script": """ <2>
			
 
				+        def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); 
			
 
				+        if (current_date > state.timestamp_latest) 
			
 
				+        {state.timestamp_latest = current_date;
			
 
				+        state.last_doc = new HashMap(params['_source']);}
			
 
				+      """,
			
 
				+      "combine_script": "return state", <3>
			
 
				+      "reduce_script": """ <4>
			
 
				+        def last_doc = '';
			
 
				+        def timestamp_latest = 0L;
			
 
				+        for (s in states) {if (s.timestamp_latest > (timestamp_latest))
			
 
				+        {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} 
			
 
				+        return last_doc
			
 
				+      """
			
 
				+    }
			
 
				   }
			
 
				 }
			
 
				 --------------------------------------------------
			
@@ -70,23 +76,25 @@ You can retrieve the last value in a similar way:
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"latest_value": {
			
 
				-  "scripted_metric": {
			
 
				-    "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
			
 
				-    "map_script": """
			
 
				-      def current_date = doc['date'].getValue().toInstant().toEpochMilli(); 
			
 
				-      if (current_date > state.timestamp_latest) 
			
 
				-      {state.timestamp_latest = current_date;
			
 
				-      state.last_value = params['_source']['value'];}
			
 
				-    """,
			
 
				-    "combine_script": "return state",
			
 
				-    "reduce_script": """
			
 
				-      def last_value = '';
			
 
				-      def timestamp_latest = 0L; 
			
 
				-      for (s in states) {if (s.timestamp_latest > (timestamp_latest)) 
			
 
				-      {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} 
			
 
				-      return last_value
			
 
				-    """
			
 
				+"aggregations": {
			
 
				+  "latest_value": {
			
 
				+    "scripted_metric": {
			
 
				+      "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
			
 
				+      "map_script": """
			
 
				+        def current_date = doc['date'].getValue().toInstant().toEpochMilli(); 
			
 
				+        if (current_date > state.timestamp_latest) 
			
 
				+        {state.timestamp_latest = current_date;
			
 
				+        state.last_value = params['_source']['value'];}
			
 
				+      """,
			
 
				+      "combine_script": "return state",
			
 
				+      "reduce_script": """
			
 
				+        def last_value = '';
			
 
				+        def timestamp_latest = 0L; 
			
 
				+        for (s in states) {if (s.timestamp_latest > (timestamp_latest)) 
			
 
				+        {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} 
			
 
				+        return last_value
			
 
				+      """
			
 
				+    }
			
 
				   }
			
 
				 }
			
 
				 --------------------------------------------------
			
@@ -97,31 +105,35 @@ You can retrieve the last value in a similar way:
 
				 [[painless-time-features]]
			
 
				 ==== Getting time features as scripted fields
			
 
				 
			
 
				-This snippet shows how to extract time based features by using Painless. The 
			
 
				-snippet uses an index where `@timestamp` is defined as a `date` type field.
			
 
				+This snippet shows how to extract time based features by using Painless in a 
			
 
				+{transform}. The snippet uses an index where `@timestamp` is defined as a `date` 
			
 
				+type field.
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
 
				-"script_fields": {
			
 
				-    "hour_of_day": { <1>
			
 
				-      "script": {
			
 
				-        "lang": "painless",
			
 
				-        "source": """
			
 
				-          ZonedDateTime date =  doc['@timestamp'].value; <2>
			
 
				-          return date.getHour(); <3>
			
 
				-        """
			
 
				+"aggregations": {
			
 
				+  "script_fields": {
			
 
				+      "hour_of_day": { <1>
			
 
				+        "script": {
			
 
				+          "lang": "painless",
			
 
				+          "source": """
			
 
				+            ZonedDateTime date =  doc['@timestamp'].value; <2>
			
 
				+            return date.getHour(); <3>
			
 
				+          """
			
 
				+        }
			
 
				+      },
			
 
				+      "month_of_year": { <4>
			
 
				+        "script": {
			
 
				+          "lang": "painless",
			
 
				+          "source": """
			
 
				+            ZonedDateTime date =  doc['@timestamp'].value; <5>
			
 
				+            return date.getMonthValue(); <6>
			
 
				+          """
			
 
				+        }
			
 
				       }
			
 
				     },
			
 
				-    "month_of_year": { <4>
			
 
				-      "script": {
			
 
				-        "lang": "painless",
			
 
				-        "source": """
			
 
				-          ZonedDateTime date =  doc['@timestamp'].value; <5>
			
 
				-          return date.getMonthValue(); <6>
			
 
				-        """
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				+  ...
			
 
				+}
			
 
				 --------------------------------------------------
			
 
				 // NOTCONSOLE
			
 
				 
			
@@ -327,3 +339,63 @@ the buckets you want to use for the variable. In this particular case, `min` and
 
				 `max` are variables mapped to `time_frame.gte.value` and `time_frame.lte.value`.
			
 
				 <3> Finally, the script substracts the start date of the session from the end 
			
 
				 date which results in the duration of the session.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[painless-count-http]]
			
 
				+==== Counting HTTP responses by using scripted metric aggregation
			
 
				+
			
 
				+You can count the different HTTP response types in a web log data set by using 
			
 
				+scripted metric aggregation as part of the {transform}. The example below 
			
 
				+assumes that the HTTP response codes are stored as keywords in the `response` 
			
 
				+field of the documents.
			
 
				+
			
 
				+[source,js]
			
 
				+--------------------------------------------------
			
 
				+"aggregations": { <1>
			
 
				+  "responses.counts": { <2>
			
 
				+    "scripted_metric": { <3>
			
 
				+      "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", <4>
			
 
				+      "map_script": """ <5>
			
 
				+        def code = doc['response.keyword'].value;
			
 
				+        if (code.startsWith('5') || code.startsWith('4')) {
			
 
				+          state.responses.error += 1 ;
			
 
				+        } else if(code.startsWith('2')) {
			
 
				+          state.responses.success += 1;
			
 
				+        } else {
			
 
				+          state.responses.other += 1;
			
 
				+        }
			
 
				+        """,
			
 
				+      "combine_script": "state.responses", <6>
			
 
				+      "reduce_script": """ <7>
			
 
				+        def counts = ['error': 0L, 'success': 0L, 'other': 0L];
			
 
				+        for (responses in states) {
			
 
				+          counts.error += responses['error'];
			
 
				+          counts.success += responses['success'];
			
 
				+          counts.other += responses['other'];
			
 
				+        }
			
 
				+        return counts;
			
 
				+        """
			
 
				+      }
			
 
				+    },
			
 
				+  ...  
			
 
				+}
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
 
				+
			
 
				+<1> The `aggregations` object of the {transform} that contains all aggregations.
			
 
				+<2> Object of the `scripted_metric` aggregation.
			
 
				+<3> This `scripted_metric` performs a distributed operation on the web log data 
			
 
				+to count specific types of HTTP responses (error, success, and other).
			
 
				+<4> The `init_script` creates a `responses` array in the `state` object with 
			
 
				+three properties (`error`, `success`, `other`) with long data type.
			
 
				+<5> The `map_script` defines `code` based on the `response.keyword` value of the 
			
 
				+document, then it counts the errors, successes, and other responses based on the 
			
 
				+first digit of the responses.
			
 
				+<6> The `combine_script` returns `state.responses` from each shard.
			
 
				+<7> The `reduce_script` creates a `counts` array with the `error`, `success`, 
			
 
				+and `other` properties, then iterates through the value of `responses` returned 
			
 
				+by each shard and assigns the different response types to the appropriate 
			
 
				+properties of the `counts` object; error responses to the error counts, success 
			
 
				+responses to the success counts, and other responses to the other counts. 
			
 
				+Finally, returns the `counts` array with the response counts.