|
@@ -79,11 +79,17 @@ entire context of a document, including the original `_source` and any mapped
|
|
|
fields plus their values. At query time, the script runs and generates values
|
|
|
for each scripted field that is required for the query.
|
|
|
|
|
|
+.Emitting runtime field values
|
|
|
+****
|
|
|
When defining a Painless script to use with runtime fields, you must include
|
|
|
-`emit` to emit calculated values. For example, the script in the following
|
|
|
-request extracts the day of the week from the `@timestamp` field, which is
|
|
|
-defined as a `date` type. The script calculates the day of the week based on
|
|
|
-the value of `@timestamp`, and uses `emit` to return the calculated value.
|
|
|
+the {painless}/painless-runtime-fields-context.html[`emit` method] to emit
|
|
|
+calculated values.
|
|
|
+****
|
|
|
+
|
|
|
+For example, the script in the following request calculates the day of the week
|
|
|
+from the `@timestamp` field, which is defined as a `date` type. The script
|
|
|
+calculates the day of the week based on the value of `timestamp`, and uses
|
|
|
+`emit` to return the calculated value.
|
|
|
|
|
|
[source,console]
|
|
|
----
|
|
@@ -648,31 +654,26 @@ your log data into {es}. The following request uses the <<docs-bulk,bulk API>>
|
|
|
to index raw log data into `my-index`. Instead of indexing all of your log
|
|
|
data, you can use a small sample to experiment with runtime fields.
|
|
|
|
|
|
+The final document is not a valid Apache log format, but we can account for
|
|
|
+that scenario in our script.
|
|
|
+
|
|
|
[source,console]
|
|
|
----
|
|
|
POST /my-index/_bulk?refresh
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-06-21T15:00:01-05:00", "message" : "211.11.9.0 - - [2020-06-21T15:00:01-05:00] \"GET /english/index.html HTTP/1.0\" 304 0"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-06-21T15:00:01-05:00", "message" : "211.11.9.0 - - [2020-06-21T15:00:01-05:00] \"GET /english/index.html HTTP/1.0\" 304 0"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:30:17-05:00", "message" : "40.135.0.0 - - [2020-04-30T14:30:17-05:00] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:30:53-05:00", "message" : "232.0.0.0 - - [2020-04-30T14:30:53-05:00] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:12-05:00", "message" : "26.1.0.0 - - [2020-04-30T14:31:12-05:00] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:19-05:00", "message" : "247.37.0.0 - - [2020-04-30T14:31:19-05:00] \"GET /french/splash_inet.html HTTP/1.0\" 200 3781"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:27-05:00", "message" : "252.0.0.0 - - [2020-04-30T14:31:27-05:00] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:29-05:00", "message" : "247.37.0.0 - - [2020-04-30T14:31:29-05:00] \"GET /images/hm_brdl.gif HTTP/1.0\" 304 0"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:29-05:00", "message" : "247.37.0.0 - - [2020-04-30T14:31:29-05:00] \"GET /images/hm_arw.gif HTTP/1.0\" 304 0"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:32-05:00", "message" : "247.37.0.0 - - [2020-04-30T14:31:32-05:00] \"GET /images/nav_bg_top.gif HTTP/1.0\" 200 929"}
|
|
|
-{ "index": {}}
|
|
|
-{ "@timestamp": "2020-04-30T14:31:43-05:00", "message" : "247.37.0.0 - - [2020-04-30T14:31:43-05:00] \"GET /french/images/nav_venue_off.gif HTTP/1.0\" 304 0"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:30:17-05:00","message":"40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:30:53-05:00","message":"232.0.0.0 - - [30/Apr/2020:14:30:53 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:31:12-05:00","message":"26.1.0.0 - - [30/Apr/2020:14:31:12 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:31:19-05:00","message":"247.37.0.0 - - [30/Apr/2020:14:31:19 -0500] \"GET /french/splash_inet.html HTTP/1.0\" 200 3781"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:31:22-05:00","message":"247.37.0.0 - - [30/Apr/2020:14:31:22 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:31:27-05:00","message":"252.0.0.0 - - [30/Apr/2020:14:31:27 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"}
|
|
|
+{"index":{}}
|
|
|
+{"timestamp":"2020-04-30T14:31:28-05:00","message":"not a valid apache log"}
|
|
|
----
|
|
|
// TEST[continued]
|
|
|
|
|
@@ -699,6 +700,9 @@ The mapping contains two fields: `@timestamp` and `message`.
|
|
|
},
|
|
|
"message" : {
|
|
|
"type" : "wildcard"
|
|
|
+ },
|
|
|
+ "timestamp" : {
|
|
|
+ "type" : "date"
|
|
|
}
|
|
|
}
|
|
|
},
|
|
@@ -708,39 +712,50 @@ The mapping contains two fields: `@timestamp` and `message`.
|
|
|
----
|
|
|
// TESTRESPONSE[s/\.\.\./"settings": $body.my-index.settings/]
|
|
|
|
|
|
-[[runtime-examples-runtime-field]]
|
|
|
-==== Define a runtime field to search by IP address
|
|
|
-If you want to retrieve results that include `clientip`, you can add that field
|
|
|
-as a runtime field in the mapping. The runtime script operates on the `clientip`
|
|
|
-field at runtime to calculate values for that field.
|
|
|
+[[runtime-examples-grok]]
|
|
|
+==== Define a runtime field with a grok pattern
|
|
|
+If you want to retrieve results that include `clientip`, you can add that
|
|
|
+field as a runtime field in the mapping. The following runtime script defines a
|
|
|
+grok pattern that extracts structured fields out of a single text
|
|
|
+field within a document. A grok pattern is like a regular expression that
|
|
|
+supports aliased expressions that you can reuse. See <<grok-basics,Grok basics>> to learn more about grok syntax.
|
|
|
+
|
|
|
+The script matches on the `%{COMMONAPACHELOG}` log pattern, which understands
|
|
|
+the structure of Apache logs. If the pattern matches, the script emits the
|
|
|
+value matching IP address. If the pattern doesn't match
|
|
|
+(`clientip != null`), the script just returns the field value without crashing.
|
|
|
|
|
|
[source,console]
|
|
|
----
|
|
|
-PUT /my-index/_mapping
|
|
|
+PUT my-index/_mappings
|
|
|
{
|
|
|
"runtime": {
|
|
|
- "clientip": {
|
|
|
+ "http.clientip": {
|
|
|
"type": "ip",
|
|
|
- "script" : {
|
|
|
- "source" : "String m = doc[\"message\"].value; int end = m.indexOf(\" \"); emit(m.substring(0, end));"
|
|
|
- }
|
|
|
+ "script": """
|
|
|
+ String clientip=grok('%{COMMONAPACHELOG}').extract(doc["message"].value)?.clientip;
|
|
|
+ if (clientip != null) emit(clientip); <1>
|
|
|
+ """
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
----
|
|
|
// TEST[continued]
|
|
|
+<1> This condition ensures that the script doesn't crash even if the pattern of
|
|
|
+the message doesn't match.
|
|
|
|
|
|
-Using the `clientip` runtime field, you can define a simple query to run a
|
|
|
+[[runtime-examples-grok-ip]]
|
|
|
+===== Search for a specific IP address
|
|
|
+Using the `http.clientip` runtime field, you can define a simple query to run a
|
|
|
search for a specific IP address and return all related fields.
|
|
|
|
|
|
[source,console]
|
|
|
----
|
|
|
GET my-index/_search
|
|
|
{
|
|
|
- "size": 1,
|
|
|
"query": {
|
|
|
"match": {
|
|
|
- "clientip": "211.11.9.0"
|
|
|
+ "http.clientip": "40.135.0.0"
|
|
|
}
|
|
|
},
|
|
|
"fields" : ["*"]
|
|
@@ -752,6 +767,79 @@ The API returns the following result. Without building your data structure in
|
|
|
advance, you can search and explore your data in meaningful ways to experiment
|
|
|
and determine which fields to index.
|
|
|
|
|
|
+Also, remember that `if` statement in the script?
|
|
|
+
|
|
|
+[source,painless]
|
|
|
+----
|
|
|
+if (clientip != null) emit(clientip);
|
|
|
+----
|
|
|
+
|
|
|
+If the script didn't include this condition, the query would fail on any shard
|
|
|
+that doesn't match the pattern. By including this condition, the query skips
|
|
|
+data that doesn't match the grok pattern.
|
|
|
+
|
|
|
+[source,console-result]
|
|
|
+----
|
|
|
+{
|
|
|
+ ...
|
|
|
+ "hits" : {
|
|
|
+ "total" : {
|
|
|
+ "value" : 1,
|
|
|
+ "relation" : "eq"
|
|
|
+ },
|
|
|
+ "max_score" : 1.0,
|
|
|
+ "hits" : [
|
|
|
+ {
|
|
|
+ "_index" : "my-index",
|
|
|
+ "_id" : "FdLqu3cBhqheMnFKd0gK",
|
|
|
+ "_score" : 1.0,
|
|
|
+ "_source" : {
|
|
|
+ "timestamp" : "2020-04-30T14:30:17-05:00",
|
|
|
+ "message" : "40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"
|
|
|
+ },
|
|
|
+ "fields" : {
|
|
|
+ "http.clientip" : [
|
|
|
+ "40.135.0.0"
|
|
|
+ ],
|
|
|
+ "message" : [
|
|
|
+ "40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"
|
|
|
+ ],
|
|
|
+ "timestamp" : [
|
|
|
+ "2020-04-30T19:30:17.000Z"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+}
|
|
|
+----
|
|
|
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
+// TESTRESPONSE[s/"_id" : "FdLqu3cBhqheMnFKd0gK"/"_id": $body.hits.hits.0._id/]
|
|
|
+
|
|
|
+[[runtime-examples-grok-range]]
|
|
|
+===== Search for documents in a specific range
|
|
|
+You can also run a <<query-dsl-range-query,range query>> that operates on the
|
|
|
+`timestamp` field. The following query returns any documents where the
|
|
|
+`timestamp` is greater than or equal to `2020-04-30T14:31:27-05:00`:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+----
|
|
|
+GET my-index/_search
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "range": {
|
|
|
+ "timestamp": {
|
|
|
+ "gte": "2020-04-30T14:31:27-05:00"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+----
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+The response includes the document where the log format doesn't match, but the
|
|
|
+timestamp falls within the defined range.
|
|
|
+
|
|
|
[source,console-result]
|
|
|
----
|
|
|
{
|
|
@@ -765,21 +853,131 @@ and determine which fields to index.
|
|
|
"hits" : [
|
|
|
{
|
|
|
"_index" : "my-index",
|
|
|
- "_id" : "oWs5KXYB-XyJbifr9mrz",
|
|
|
+ "_id" : "hdEhyncBRSB6iD-PoBqe",
|
|
|
"_score" : 1.0,
|
|
|
"_source" : {
|
|
|
- "@timestamp" : "2020-06-21T15:00:01-05:00",
|
|
|
- "message" : "211.11.9.0 - - [2020-06-21T15:00:01-05:00] \"GET /english/index.html HTTP/1.0\" 304 0"
|
|
|
+ "timestamp" : "2020-04-30T14:31:27-05:00",
|
|
|
+ "message" : "252.0.0.0 - - [30/Apr/2020:14:31:27 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "_index" : "my-index",
|
|
|
+ "_id" : "htEhyncBRSB6iD-PoBqe",
|
|
|
+ "_score" : 1.0,
|
|
|
+ "_source" : {
|
|
|
+ "timestamp" : "2020-04-30T14:31:28-05:00",
|
|
|
+ "message" : "not a valid apache log"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+}
|
|
|
+----
|
|
|
+// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
+// TESTRESPONSE[s/"_id" : "hdEhyncBRSB6iD-PoBqe"/"_id": $body.hits.hits.0._id/]
|
|
|
+// TESTRESPONSE[s/"_id" : "htEhyncBRSB6iD-PoBqe"/"_id": $body.hits.hits.1._id/]
|
|
|
+
|
|
|
+[[runtime-examples-dissect]]
|
|
|
+==== Define a runtime field with a dissect pattern
|
|
|
+If you don't need the power of regular expressions, you can use
|
|
|
+<<dissect-processor,dissect patterns>> instead of grok patterns. Dissect
|
|
|
+patterns match on fixed delimiters but are typically faster that grok.
|
|
|
+
|
|
|
+You can use dissect to achieve the same results as parsing the Apache logs with
|
|
|
+a <<runtime-examples-grok,grok pattern>>. Instead of matching on a log
|
|
|
+pattern, you include the parts of the string that you want to discard. Paying
|
|
|
+special attention to the parts of the string you want to discard will help build
|
|
|
+successful dissect patterns.
|
|
|
+
|
|
|
+[source,console]
|
|
|
+----
|
|
|
+PUT my-index/_mappings
|
|
|
+{
|
|
|
+ "runtime": {
|
|
|
+ "http.client.ip": {
|
|
|
+ "type": "ip",
|
|
|
+ "script": """
|
|
|
+ String clientip=dissect('%{clientip} %{ident} %{auth} [%{@timestamp}] "%{verb} %{request} HTTP/%{httpversion}" %{status} %{size}').extract(doc["message"].value)?.clientip;
|
|
|
+ if (clientip != null) emit(clientip);
|
|
|
+ """
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+----
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+Similarly, you can define a dissect pattern to extract the https://developer.mozilla.org/en-US/docs/Web/HTTP/Status[HTTP response code]:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+----
|
|
|
+PUT my-index/_mappings
|
|
|
+{
|
|
|
+ "runtime": {
|
|
|
+ "http.response": {
|
|
|
+ "type": "long",
|
|
|
+ "script": """
|
|
|
+ String response=dissect('%{clientip} %{ident} %{auth} [%{@timestamp}] "%{verb} %{request} HTTP/%{httpversion}" %{response} %{size}').extract(doc["message"].value)?.response;
|
|
|
+ if (response != null) emit(Integer.parseInt(response));
|
|
|
+ """
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+----
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+You can then run a query to retrieve a specific HTTP response using the
|
|
|
+`http.response` runtime field:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+----
|
|
|
+GET my-index/_search
|
|
|
+{
|
|
|
+ "query": {
|
|
|
+ "match": {
|
|
|
+ "http.response": "304"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "fields" : ["*"]
|
|
|
+}
|
|
|
+----
|
|
|
+// TEST[continued]
|
|
|
+
|
|
|
+The response includes a single document where the HTTP response is `304`:
|
|
|
+
|
|
|
+[source,console-result]
|
|
|
+----
|
|
|
+{
|
|
|
+ ...
|
|
|
+ "hits" : {
|
|
|
+ "total" : {
|
|
|
+ "value" : 1,
|
|
|
+ "relation" : "eq"
|
|
|
+ },
|
|
|
+ "max_score" : 1.0,
|
|
|
+ "hits" : [
|
|
|
+ {
|
|
|
+ "_index" : "my-index",
|
|
|
+ "_id" : "A2qDy3cBWRMvVAuI7F8M",
|
|
|
+ "_score" : 1.0,
|
|
|
+ "_source" : {
|
|
|
+ "timestamp" : "2020-04-30T14:31:22-05:00",
|
|
|
+ "message" : "247.37.0.0 - - [30/Apr/2020:14:31:22 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0"
|
|
|
},
|
|
|
"fields" : {
|
|
|
- "@timestamp" : [
|
|
|
- "2020-06-21T20:00:01.000Z"
|
|
|
+ "http.clientip" : [
|
|
|
+ "247.37.0.0"
|
|
|
],
|
|
|
- "clientip" : [
|
|
|
- "211.11.9.0"
|
|
|
+ "http.response" : [
|
|
|
+ 304
|
|
|
],
|
|
|
"message" : [
|
|
|
- "211.11.9.0 - - [2020-06-21T15:00:01-05:00] \"GET /english/index.html HTTP/1.0\" 304 0"
|
|
|
+ "247.37.0.0 - - [30/Apr/2020:14:31:22 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0"
|
|
|
+ ],
|
|
|
+ "http.client.ip" : [
|
|
|
+ "247.37.0.0"
|
|
|
+ ],
|
|
|
+ "timestamp" : [
|
|
|
+ "2020-04-30T19:31:22.000Z"
|
|
|
]
|
|
|
}
|
|
|
}
|
|
@@ -788,4 +986,4 @@ and determine which fields to index.
|
|
|
}
|
|
|
----
|
|
|
// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/]
|
|
|
-// TESTRESPONSE[s/"_id" : "oWs5KXYB-XyJbifr9mrz"/"_id": $body.hits.hits.0._id/]
|
|
|
+// TESTRESPONSE[s/"_id" : "A2qDy3cBWRMvVAuI7F8M"/"_id": $body.hits.hits.0._id/]
|