Browse Source

[DOCS] Adds HTTP response count example to Painless examples (#54412)

István Zoltán Szabó 5 years ago
parent
commit
b7d6ebc21b
1 changed files with 128 additions and 56 deletions
  1. 128 56
      docs/reference/transform/painless-examples.asciidoc

+ 128 - 56
docs/reference/transform/painless-examples.asciidoc

@@ -15,35 +15,41 @@ more about the Painless scripting language in the
 * <<painless-group-by>>
 * <<painless-bucket-script>>
 
+NOTE: While the context of the following examples is the {transform} use case, 
+the Painless scripts in the snippets below can be used in other {es} search 
+aggregations, too.
+
 
 [discrete]
 [[painless-top-hits]]
-==== Getting top hits by using scripted metric
+==== Getting top hits by using scripted metric aggregation
 
 This snippet shows how to find the latest document, in other words the document 
 with the earliest timestamp. From a technical perspective, it helps to achieve 
 the function of a <<search-aggregations-metrics-top-hits-aggregation>> by using 
-scripted metric aggregation which provides a metric output.
+scripted metric aggregation in a {transform}, which provides a metric output.
 
 [source,js]
 --------------------------------------------------
-"latest_doc": { 
-  "scripted_metric": {
-    "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
-    "map_script": """ <2>
-      def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); 
-      if (current_date > state.timestamp_latest) 
-      {state.timestamp_latest = current_date;
-      state.last_doc = new HashMap(params['_source']);}
-    """,
-    "combine_script": "return state", <3>
-    "reduce_script": """ <4>
-      def last_doc = '';
-      def timestamp_latest = 0L;
-      for (s in states) {if (s.timestamp_latest > (timestamp_latest))
-      {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} 
-      return last_doc
-    """
+"aggregations": {
+  "latest_doc": { 
+    "scripted_metric": {
+      "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1>
+      "map_script": """ <2>
+        def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); 
+        if (current_date > state.timestamp_latest) 
+        {state.timestamp_latest = current_date;
+        state.last_doc = new HashMap(params['_source']);}
+      """,
+      "combine_script": "return state", <3>
+      "reduce_script": """ <4>
+        def last_doc = '';
+        def timestamp_latest = 0L;
+        for (s in states) {if (s.timestamp_latest > (timestamp_latest))
+        {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} 
+        return last_doc
+      """
+    }
   }
 }
 --------------------------------------------------
@@ -70,23 +76,25 @@ You can retrieve the last value in a similar way:
 
 [source,js]
 --------------------------------------------------
-"latest_value": {
-  "scripted_metric": {
-    "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
-    "map_script": """
-      def current_date = doc['date'].getValue().toInstant().toEpochMilli(); 
-      if (current_date > state.timestamp_latest) 
-      {state.timestamp_latest = current_date;
-      state.last_value = params['_source']['value'];}
-    """,
-    "combine_script": "return state",
-    "reduce_script": """
-      def last_value = '';
-      def timestamp_latest = 0L; 
-      for (s in states) {if (s.timestamp_latest > (timestamp_latest)) 
-      {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} 
-      return last_value
-    """
+"aggregations": {
+  "latest_value": {
+    "scripted_metric": {
+      "init_script": "state.timestamp_latest = 0L; state.last_value = ''",
+      "map_script": """
+        def current_date = doc['date'].getValue().toInstant().toEpochMilli(); 
+        if (current_date > state.timestamp_latest) 
+        {state.timestamp_latest = current_date;
+        state.last_value = params['_source']['value'];}
+      """,
+      "combine_script": "return state",
+      "reduce_script": """
+        def last_value = '';
+        def timestamp_latest = 0L; 
+        for (s in states) {if (s.timestamp_latest > (timestamp_latest)) 
+        {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} 
+        return last_value
+      """
+    }
   }
 }
 --------------------------------------------------
@@ -97,31 +105,35 @@ You can retrieve the last value in a similar way:
 [[painless-time-features]]
 ==== Getting time features as scripted fields
 
-This snippet shows how to extract time based features by using Painless. The 
-snippet uses an index where `@timestamp` is defined as a `date` type field.
+This snippet shows how to extract time based features by using Painless in a 
+{transform}. The snippet uses an index where `@timestamp` is defined as a `date` 
+type field.
 
 [source,js]
 --------------------------------------------------
-"script_fields": {
-    "hour_of_day": { <1>
-      "script": {
-        "lang": "painless",
-        "source": """
-          ZonedDateTime date =  doc['@timestamp'].value; <2>
-          return date.getHour(); <3>
-        """
+"aggregations": {
+  "script_fields": {
+      "hour_of_day": { <1>
+        "script": {
+          "lang": "painless",
+          "source": """
+            ZonedDateTime date =  doc['@timestamp'].value; <2>
+            return date.getHour(); <3>
+          """
+        }
+      },
+      "month_of_year": { <4>
+        "script": {
+          "lang": "painless",
+          "source": """
+            ZonedDateTime date =  doc['@timestamp'].value; <5>
+            return date.getMonthValue(); <6>
+          """
+        }
       }
     },
-    "month_of_year": { <4>
-      "script": {
-        "lang": "painless",
-        "source": """
-          ZonedDateTime date =  doc['@timestamp'].value; <5>
-          return date.getMonthValue(); <6>
-        """
-      }
-    }
-  }
+  ...
+}
 --------------------------------------------------
 // NOTCONSOLE
 
@@ -327,3 +339,63 @@ the buckets you want to use for the variable. In this particular case, `min` and
 `max` are variables mapped to `time_frame.gte.value` and `time_frame.lte.value`.
 <3> Finally, the script substracts the start date of the session from the end 
 date which results in the duration of the session.
+
+
+[discrete]
+[[painless-count-http]]
+==== Counting HTTP responses by using scripted metric aggregation
+
+You can count the different HTTP response types in a web log data set by using 
+scripted metric aggregation as part of the {transform}. The example below 
+assumes that the HTTP response codes are stored as keywords in the `response` 
+field of the documents.
+
+[source,js]
+--------------------------------------------------
+"aggregations": { <1>
+  "responses.counts": { <2>
+    "scripted_metric": { <3>
+      "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", <4>
+      "map_script": """ <5>
+        def code = doc['response.keyword'].value;
+        if (code.startsWith('5') || code.startsWith('4')) {
+          state.responses.error += 1 ;
+        } else if(code.startsWith('2')) {
+          state.responses.success += 1;
+        } else {
+          state.responses.other += 1;
+        }
+        """,
+      "combine_script": "state.responses", <6>
+      "reduce_script": """ <7>
+        def counts = ['error': 0L, 'success': 0L, 'other': 0L];
+        for (responses in states) {
+          counts.error += responses['error'];
+          counts.success += responses['success'];
+          counts.other += responses['other'];
+        }
+        return counts;
+        """
+      }
+    },
+  ...  
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+<1> The `aggregations` object of the {transform} that contains all aggregations.
+<2> Object of the `scripted_metric` aggregation.
+<3> This `scripted_metric` performs a distributed operation on the web log data 
+to count specific types of HTTP responses (error, success, and other).
+<4> The `init_script` creates a `responses` array in the `state` object with 
+three properties (`error`, `success`, `other`) with long data type.
+<5> The `map_script` defines `code` based on the `response.keyword` value of the 
+document, then it counts the errors, successes, and other responses based on the 
+first digit of the responses.
+<6> The `combine_script` returns `state.responses` from each shard.
+<7> The `reduce_script` creates a `counts` array with the `error`, `success`, 
+and `other` properties, then iterates through the value of `responses` returned 
+by each shard and assigns the different response types to the appropriate 
+properties of the `counts` object; error responses to the error counts, success 
+responses to the success counts, and other responses to the other counts. 
+Finally, returns the `counts` array with the response counts.