|
@@ -17,432 +17,623 @@ what the pipeline attempts to achieve.
|
|
|
The `processors` parameter defines a list of processors to be executed in
|
|
|
order.
|
|
|
|
|
|
-== Processors
|
|
|
+== Ingest APIs
|
|
|
|
|
|
-All processors are defined in the following way within a pipeline definition:
|
|
|
+=== Put pipeline API
|
|
|
+
|
|
|
+The put pipeline api adds pipelines and updates existing pipelines in the cluster.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
+PUT _ingest/pipeline/my-pipeline-id
|
|
|
{
|
|
|
- "PROCESSOR_NAME" : {
|
|
|
- ... processor configuration options ...
|
|
|
- }
|
|
|
+ "description" : "describe pipeline",
|
|
|
+ "processors" : [
|
|
|
+ {
|
|
|
+ "simple" : {
|
|
|
+ // settings
|
|
|
+ }
|
|
|
+ },
|
|
|
+ // other processors
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
+// AUTOSENSE
|
|
|
|
|
|
-Each processor defines its own configuration parameters, but all processors have
|
|
|
-the ability to declare `tag` and `on_failure` fields. These fields are optional.
|
|
|
-
|
|
|
-A `tag` is simply a string identifier of the specific instantiation of a certain
|
|
|
-processor in a pipeline. The `tag` field does not affect any processor's behavior,
|
|
|
-but is very useful for bookkeeping and tracing errors to specific processors.
|
|
|
-
|
|
|
-See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
|
|
|
+NOTE: The put pipeline api also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
|
|
|
+ pipeline changes take immediately in effect.
|
|
|
|
|
|
-=== Set processor
|
|
|
-Sets one field and associates it with the specified value. If the field already exists,
|
|
|
-its value will be replaced with the provided one.
|
|
|
+=== Get pipeline API
|
|
|
|
|
|
-[[set-options]]
|
|
|
-.Set Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to insert, upsert, or update
|
|
|
-| `value` | yes | - | The value to be set for the field
|
|
|
-|======
|
|
|
+The get pipeline api returns pipelines based on id. This api always returns a local reference of the pipeline.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "field1",
|
|
|
- "value": 582.1
|
|
|
- }
|
|
|
-}
|
|
|
+GET _ingest/pipeline/my-pipeline-id
|
|
|
--------------------------------------------------
|
|
|
+// AUTOSENSE
|
|
|
|
|
|
-=== Append processor
|
|
|
-Appends one or more values to an existing array if the field already exists and it is an array.
|
|
|
-Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
|
|
|
-Creates an array containing the provided values if the fields doesn't exist.
|
|
|
-Accepts a single value or an array of values.
|
|
|
-
|
|
|
-[[append-options]]
|
|
|
-.Append Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to be appended to
|
|
|
-| `value` | yes | - | The value to be appended
|
|
|
-|======
|
|
|
+Example response:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "append": {
|
|
|
- "field": "field1"
|
|
|
- "value": ["item2", "item3", "item4"]
|
|
|
- }
|
|
|
+ "my-pipeline-id": {
|
|
|
+ "_source" : {
|
|
|
+ "description": "describe pipeline",
|
|
|
+ "processors": [
|
|
|
+ {
|
|
|
+ "simple" : {
|
|
|
+ // settings
|
|
|
+ }
|
|
|
+ },
|
|
|
+ // other processors
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "_version" : 0
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-=== Remove processor
|
|
|
-Removes an existing field. If the field doesn't exist, an exception will be thrown
|
|
|
-
|
|
|
-[[remove-options]]
|
|
|
-.Remove Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to be removed
|
|
|
-|======
|
|
|
-
|
|
|
-[source,js]
|
|
|
---------------------------------------------------
|
|
|
-{
|
|
|
- "remove": {
|
|
|
- "field": "foo"
|
|
|
- }
|
|
|
-}
|
|
|
---------------------------------------------------
|
|
|
+For each returned pipeline the source and the version is returned.
|
|
|
+The version is useful for knowing what version of the pipeline the node has.
|
|
|
+Multiple ids can be provided at the same time. Also wildcards are supported.
|
|
|
|
|
|
-=== Rename processor
|
|
|
-Renames an existing field. If the field doesn't exist, an exception will be thrown. Also, the new field
|
|
|
-name must not exist.
|
|
|
+=== Delete pipeline API
|
|
|
|
|
|
-[[rename-options]]
|
|
|
-.Rename Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to be renamed
|
|
|
-| `to` | yes | - | The new name of the field
|
|
|
-|======
|
|
|
+The delete pipeline api deletes pipelines by id.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "rename": {
|
|
|
- "field": "foo",
|
|
|
- "to": "foobar"
|
|
|
- }
|
|
|
-}
|
|
|
+DELETE _ingest/pipeline/my-pipeline-id
|
|
|
--------------------------------------------------
|
|
|
+// AUTOSENSE
|
|
|
|
|
|
+=== Simulate pipeline API
|
|
|
|
|
|
-=== Convert processor
|
|
|
-Converts an existing field's value to a different type, like turning a string to an integer.
|
|
|
-If the field value is an array, all members will be converted.
|
|
|
-
|
|
|
-The supported types include: `integer`, `float`, `string`, and `boolean`.
|
|
|
+The simulate pipeline api executes a specific pipeline against
|
|
|
+the set of documents provided in the body of the request.
|
|
|
|
|
|
-`boolean` will set the field to true if its string value is equal to `true` (ignore case), to
|
|
|
-false if its string value is equal to `false` (ignore case) and it will throw exception otherwise.
|
|
|
+A simulate request may call upon an existing pipeline to be executed
|
|
|
+against the provided documents, or supply a pipeline definition in
|
|
|
+the body of the request.
|
|
|
|
|
|
-[[convert-options]]
|
|
|
-.Convert Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field whose value is to be converted
|
|
|
-| `type` | yes | - | The type to convert the existing value to
|
|
|
-|======
|
|
|
+Here is the structure of a simulate request with a provided pipeline:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
+POST _ingest/pipeline/_simulate
|
|
|
{
|
|
|
- "convert": {
|
|
|
- "field" : "foo"
|
|
|
- "type": "integer"
|
|
|
- }
|
|
|
+ "pipeline" : {
|
|
|
+ // pipeline definition here
|
|
|
+ },
|
|
|
+ "docs" : [
|
|
|
+ { /** first document **/ },
|
|
|
+ { /** second document **/ },
|
|
|
+ // ...
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-=== Gsub processor
|
|
|
-Converts a string field by applying a regular expression and a replacement.
|
|
|
-If the field is not a string, the processor will throw an exception.
|
|
|
-
|
|
|
-[[gsub-options]]
|
|
|
-.Gsub Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field apply the replacement for
|
|
|
-| `pattern` | yes | - | The pattern to be replaced
|
|
|
-| `replacement` | yes | - | The string to replace the matching patterns with.
|
|
|
-|======
|
|
|
+Here is the structure of a simulate request against a pre-existing pipeline:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
+POST _ingest/pipeline/my-pipeline-id/_simulate
|
|
|
{
|
|
|
- "gsub": {
|
|
|
- "field": "field1",
|
|
|
- "pattern": "\.",
|
|
|
- "replacement": "-"
|
|
|
- }
|
|
|
+ "docs" : [
|
|
|
+ { /** first document **/ },
|
|
|
+ { /** second document **/ },
|
|
|
+ // ...
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-=== Join processor
|
|
|
-Joins each element of an array into a single string using a separator character between each element.
|
|
|
-Throws error when the field is not an array.
|
|
|
|
|
|
-[[join-options]]
|
|
|
-.Join Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to be separated
|
|
|
-| `separator` | yes | - | The separator character
|
|
|
-|======
|
|
|
+Here is an example simulate request with a provided pipeline and its response:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
+POST _ingest/pipeline/_simulate
|
|
|
{
|
|
|
- "join": {
|
|
|
- "field": "joined_array_field",
|
|
|
- "separator": "-"
|
|
|
- }
|
|
|
+ "pipeline" :
|
|
|
+ {
|
|
|
+ "description": "_description",
|
|
|
+ "processors": [
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "field2",
|
|
|
+ "value" : "_value"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "docs": [
|
|
|
+ {
|
|
|
+ "_index": "index",
|
|
|
+ "_type": "type",
|
|
|
+ "_id": "id",
|
|
|
+ "_source": {
|
|
|
+ "foo": "bar"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "_index": "index",
|
|
|
+ "_type": "type",
|
|
|
+ "_id": "id",
|
|
|
+ "_source": {
|
|
|
+ "foo": "rab"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
+// AUTOSENSE
|
|
|
|
|
|
-=== Split processor
|
|
|
-Split a field to an array using a separator character. Only works on string fields.
|
|
|
-
|
|
|
-[[split-options]]
|
|
|
-.Split Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to split
|
|
|
-|======
|
|
|
+response:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "split": {
|
|
|
- "field": ","
|
|
|
- }
|
|
|
-}
|
|
|
---------------------------------------------------
|
|
|
-
|
|
|
-=== Lowercase processor
|
|
|
-Converts a string to its lowercase equivalent.
|
|
|
-
|
|
|
-[[lowercase-options]]
|
|
|
-.Lowercase Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to lowercase
|
|
|
-|======
|
|
|
+ "docs": [
|
|
|
+ {
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field2": "_value",
|
|
|
+ "foo": "bar"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-04T23:53:27.186+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field2": "_value",
|
|
|
+ "foo": "rab"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-04T23:53:27.186+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+It is often useful to see how each processor affects the ingest document
|
|
|
+as it is passed through the pipeline. To see the intermediate results of
|
|
|
+each processor in the simulate request, a `verbose` parameter may be added
|
|
|
+to the request
|
|
|
+
|
|
|
+Here is an example verbose request and its response:
|
|
|
+
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
+POST _ingest/pipeline/_simulate?verbose
|
|
|
{
|
|
|
- "lowercase": {
|
|
|
- "field": "foo"
|
|
|
- }
|
|
|
+ "pipeline" :
|
|
|
+ {
|
|
|
+ "description": "_description",
|
|
|
+ "processors": [
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "field2",
|
|
|
+ "value" : "_value2"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "field3",
|
|
|
+ "value" : "_value3"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "docs": [
|
|
|
+ {
|
|
|
+ "_index": "index",
|
|
|
+ "_type": "type",
|
|
|
+ "_id": "id",
|
|
|
+ "_source": {
|
|
|
+ "foo": "bar"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "_index": "index",
|
|
|
+ "_type": "type",
|
|
|
+ "_id": "id",
|
|
|
+ "_source": {
|
|
|
+ "foo": "rab"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
+// AUTOSENSE
|
|
|
|
|
|
-=== Uppercase processor
|
|
|
-Converts a string to its uppercase equivalent.
|
|
|
-
|
|
|
-[[uppercase-options]]
|
|
|
-.Uppercase Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The field to uppercase
|
|
|
-|======
|
|
|
+response:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "uppercase": {
|
|
|
- "field": "foo"
|
|
|
- }
|
|
|
+ "docs": [
|
|
|
+ {
|
|
|
+ "processor_results": [
|
|
|
+ {
|
|
|
+ "tag": "processor[set]-0",
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field2": "_value2",
|
|
|
+ "foo": "bar"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-05T00:02:51.383+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "tag": "processor[set]-1",
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field3": "_value3",
|
|
|
+ "field2": "_value2",
|
|
|
+ "foo": "bar"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-05T00:02:51.383+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "processor_results": [
|
|
|
+ {
|
|
|
+ "tag": "processor[set]-0",
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field2": "_value2",
|
|
|
+ "foo": "rab"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-05T00:02:51.384+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "tag": "processor[set]-1",
|
|
|
+ "doc": {
|
|
|
+ "_id": "id",
|
|
|
+ "_ttl": null,
|
|
|
+ "_parent": null,
|
|
|
+ "_index": "index",
|
|
|
+ "_routing": null,
|
|
|
+ "_type": "type",
|
|
|
+ "_timestamp": null,
|
|
|
+ "_source": {
|
|
|
+ "field3": "_value3",
|
|
|
+ "field2": "_value2",
|
|
|
+ "foo": "rab"
|
|
|
+ },
|
|
|
+ "_ingest": {
|
|
|
+ "timestamp": "2016-01-05T00:02:51.384+0000"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-=== Trim processor
|
|
|
-Trims whitespace from field. NOTE: this only works on leading and trailing whitespaces.
|
|
|
+== Accessing data in pipelines
|
|
|
|
|
|
-[[trim-options]]
|
|
|
-.Trim Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `field` | yes | - | The string-valued field to trim whitespace from
|
|
|
-|======
|
|
|
+Processors in pipelines have read and write access to documents that pass through the pipeline.
|
|
|
+The fields in the source of a document and its metadata fields are accessible.
|
|
|
+
|
|
|
+Accessing a field in the source is straightforward and one can refer to fields by
|
|
|
+their name. For example:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "trim": {
|
|
|
- "field": "foo"
|
|
|
+ "set": {
|
|
|
+ "field": "my_field"
|
|
|
+ "value": 582.1
|
|
|
}
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-=== Grok Processor
|
|
|
+On top of this fields from the source are always accessible via the `_source` prefix:
|
|
|
|
|
|
-The Grok Processor extracts structured fields out of a single text field within a document. You choose which field to
|
|
|
-extract matched fields from, as well as the Grok Pattern you expect will match. A Grok Pattern is like a regular
|
|
|
-expression that supports aliased expressions that can be reused.
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "set": {
|
|
|
+ "field": "_source.my_field"
|
|
|
+ "value": 582.1
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
|
|
|
-This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
|
|
|
-that is generally written for humans and not computer consumption.
|
|
|
+Metadata fields can also be accessed in the same way as fields from the source. This
|
|
|
+is possible because Elasticsearch doesn't allow fields in the source that have the
|
|
|
+same name as metadata fields.
|
|
|
|
|
|
-The processor comes packaged with over 120 reusable patterns that are located at `$ES_HOME/config/ingest/grok/patterns`.
|
|
|
-Here, you can add your own custom grok pattern files with custom grok expressions to be used by the processor.
|
|
|
+The following example sets the id of a document to `1`:
|
|
|
|
|
|
-If you need help building patterns to match your logs, you will find the <http://grokdebug.herokuapp.com> and
|
|
|
-<http://grokconstructor.appspot.com/> applications quite useful!
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "set": {
|
|
|
+ "field": "_id"
|
|
|
+ "value": "1"
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
|
|
|
-==== Grok Basics
|
|
|
+The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`, `_parent`,
|
|
|
+`_timestamp` and `_ttl`.
|
|
|
|
|
|
-Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
|
|
|
-The regular expression library is Oniguruma, and you can see the full supported regexp syntax
|
|
|
-https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
|
|
|
-
|
|
|
-Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
|
|
|
-complex patterns that match your fields.
|
|
|
-
|
|
|
-The syntax for re-using a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
|
|
|
-
|
|
|
-The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
|
|
|
-pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
|
|
|
-patterns that are provided within the default patterns set.
|
|
|
-
|
|
|
-The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the
|
|
|
-duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify
|
|
|
-the `client` making a request.
|
|
|
-
|
|
|
-The `TYPE` is the type you wish to cast your named field. `int` and `float` are currently the only types supported for coercion.
|
|
|
+Beyond metadata fields and source fields, ingest also adds ingest metadata to documents being processed.
|
|
|
+These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
|
|
|
+under `_ingest.timestamp` key to the ingest metadata, which is the time ES received the index or bulk
|
|
|
+request to pre-process. But any processor is free to add more ingest related metadata to it. Ingest metadata is transient
|
|
|
+and is lost after a document has been processed by the pipeline and thus ingest metadata won't be indexed.
|
|
|
|
|
|
-For example, here is a grok pattern that would match the above example given. We would like to match a text with the following
|
|
|
-contents:
|
|
|
+The following example adds a field with the name `received` and the value is the ingest timestamp:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-3.44 55.3.244.1
|
|
|
+{
|
|
|
+ "set": {
|
|
|
+ "field": "received"
|
|
|
+ "value": "{{_ingest.timestamp}}"
|
|
|
+ }
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-We may know that the above message is a number followed by an IP-address. We can match this text with the following
|
|
|
-Grok expression.
|
|
|
-
|
|
|
-[source,js]
|
|
|
---------------------------------------------------
|
|
|
-%{NUMBER:duration} %{IP:client}
|
|
|
---------------------------------------------------
|
|
|
+As opposed to Elasticsearch metadata fields, the ingest metadata field name _ingest can be used as a valid field name
|
|
|
+in the source of a document. Use _source._ingest to refer to it, otherwise _ingest will be interpreted as ingest
|
|
|
+metadata fields.
|
|
|
|
|
|
-==== Custom Patterns and Pattern Files
|
|
|
+A number of processor settings also support templating. Settings that support templating can have zero or more
|
|
|
+template snippets. A template snippet begins with `{{` and ends with `}}`.
|
|
|
+Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
|
|
|
|
|
|
-The Grok Processor comes pre-packaged with a base set of pattern files. These patterns may not always have
|
|
|
-what you are looking for. These pattern files have a very basic format. Each line describes a named pattern with
|
|
|
-the following format:
|
|
|
+In this example a field by the name `field_c` is added and its value is a concatenation of
|
|
|
+the values of `field_a` and `field_b`.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-NAME ' '+ PATTERN '\n'
|
|
|
+{
|
|
|
+ "set": {
|
|
|
+ "field": "field_c"
|
|
|
+ "value": "{{field_a}} {{field_b}}"
|
|
|
+ }
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-You can add this pattern to an existing file, or add your own file in the patterns directory here: `$ES_HOME/config/ingest/grok/patterns`.
|
|
|
-The Ingest Plugin will pick up files in this directory to be loaded into the grok processor's known patterns. These patterns are loaded
|
|
|
-at startup, so you will need to do a restart your ingest node if you wish to update these files while running.
|
|
|
-
|
|
|
-Example snippet of pattern definitions found in the `grok-patterns` patterns file:
|
|
|
+The following example changes the index a document is going to be indexed into. The index a document will be redirected
|
|
|
+to depends on the field in the source with name `geoip.country_iso_code`.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-YEAR (?>\d\d){1,2}
|
|
|
-HOUR (?:2[0123]|[01]?[0-9])
|
|
|
-MINUTE (?:[0-5][0-9])
|
|
|
-SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
|
|
|
-TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
|
|
+{
|
|
|
+ "set": {
|
|
|
+ "field": "_index"
|
|
|
+ "value": "{{geoip.country_iso_code}}"
|
|
|
+ }
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-==== Using Grok Processor in a Pipeline
|
|
|
+[[handling-failure-in-pipelines]]
|
|
|
+=== Handling Failure in Pipelines
|
|
|
|
|
|
-[[grok-options]]
|
|
|
-.Grok Options
|
|
|
-[options="header"]
|
|
|
-|======
|
|
|
-| Name | Required | Default | Description
|
|
|
-| `match_field` | yes | - | The field to use for grok expression parsing
|
|
|
-| `match_pattern` | yes | - | The grok expression to match and extract named captures with
|
|
|
-| `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
|
|
|
-|======
|
|
|
+In its simplest case, pipelines describe a list of processors which
|
|
|
+are executed sequentially and processing halts at the first exception. This
|
|
|
+may not be desirable when failures are expected. For example, not all your logs
|
|
|
+may match a certain grok expression and you may wish to index such documents into
|
|
|
+a separate index.
|
|
|
|
|
|
-Here is an example of using the provided patterns to extract out and name structured fields from a string field in
|
|
|
-a document.
|
|
|
+To enable this behavior, you can utilize the `on_failure` parameter. `on_failure`
|
|
|
+defines a list of processors to be executed immediately following the failed processor.
|
|
|
+This parameter can be supplied at the pipeline level, as well as at the processor
|
|
|
+level. If a processor has an `on_failure` configuration option provided, whether
|
|
|
+it is empty or not, any exceptions that are thrown by it will be caught and the
|
|
|
+pipeline will continue executing the proceeding processors defined. Since further processors
|
|
|
+are defined within the scope of an `on_failure` statement, failure handling can be nested.
|
|
|
+
|
|
|
+Example: In the following example we define a pipeline that hopes to rename documents with
|
|
|
+a field named `foo` to `bar`. If the document does not contain the `foo` field, we
|
|
|
+go ahead and attach an error message within the document for later analysis within
|
|
|
+Elasticsearch.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "message": "55.3.244.1 GET /index.html 15824 0.043"
|
|
|
+ "description" : "my first pipeline with handled exceptions",
|
|
|
+ "processors" : [
|
|
|
+ {
|
|
|
+ "rename" : {
|
|
|
+ "field" : "foo",
|
|
|
+ "to" : "bar",
|
|
|
+ "on_failure" : [
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "error",
|
|
|
+ "value" : "field \"foo\" does not exist, cannot rename to \"bar\""
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-The pattern for this could be
|
|
|
+Example: Here we define an `on_failure` block on a whole pipeline to change
|
|
|
+the index for which failed documents get sent.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
|
|
|
+{
|
|
|
+ "description" : "my first pipeline with handled exceptions",
|
|
|
+ "processors" : [ ... ],
|
|
|
+ "on_failure" : [
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "_index",
|
|
|
+ "value" : "failed-{{ _index }}"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-An example pipeline for processing the above document using Grok:
|
|
|
+
|
|
|
+==== Accessing Error Metadata From Processors Handling Exceptions
|
|
|
+
|
|
|
+Sometimes you may want to retrieve the actual error message that was thrown
|
|
|
+by a failed processor. To do so you can access metadata fields called
|
|
|
+`on_failure_message`, `on_failure_processor_type`, `on_failure_processor_tag`. These fields are only accessible
|
|
|
+from within the context of an `on_failure` block. Here is an updated version of
|
|
|
+our first example which leverages these fields to provide the error message instead
|
|
|
+of manually setting it.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "description" : "...",
|
|
|
- "processors": [
|
|
|
+ "description" : "my first pipeline with handled exceptions",
|
|
|
+ "processors" : [
|
|
|
{
|
|
|
- "grok": {
|
|
|
- "match_field": "message",
|
|
|
- "match_pattern": "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
|
|
|
+ "rename" : {
|
|
|
+ "field" : "foo",
|
|
|
+ "to" : "bar",
|
|
|
+ "on_failure" : [
|
|
|
+ {
|
|
|
+ "set" : {
|
|
|
+ "field" : "error",
|
|
|
+ "value" : "{{ _ingest.on_failure_message }}"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
}
|
|
|
]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-This pipeline will insert these named captures as new fields within the document, like so:
|
|
|
+== Processors
|
|
|
+
|
|
|
+All processors are defined in the following way within a pipeline definition:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "message": "55.3.244.1 GET /index.html 15824 0.043",
|
|
|
- "client": "55.3.244.1",
|
|
|
- "method": "GET",
|
|
|
- "request": "/index.html",
|
|
|
- "bytes": 15824,
|
|
|
- "duration": "0.043"
|
|
|
+ "PROCESSOR_NAME" : {
|
|
|
+ ... processor configuration options ...
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-An example of a pipeline specifying custom pattern definitions:
|
|
|
+Each processor defines its own configuration parameters, but all processors have
|
|
|
+the ability to declare `tag` and `on_failure` fields. These fields are optional.
|
|
|
+
|
|
|
+A `tag` is simply a string identifier of the specific instantiation of a certain
|
|
|
+processor in a pipeline. The `tag` field does not affect any processor's behavior,
|
|
|
+but is very useful for bookkeeping and tracing errors to specific processors.
|
|
|
+
|
|
|
+See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
|
|
|
+
|
|
|
+=== Append processor
|
|
|
+Appends one or more values to an existing array if the field already exists and it is an array.
|
|
|
+Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
|
|
|
+Creates an array containing the provided values if the fields doesn't exist.
|
|
|
+Accepts a single value or an array of values.
|
|
|
+
|
|
|
+[[append-options]]
|
|
|
+.Append Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to be appended to
|
|
|
+| `value` | yes | - | The value to be appended
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "description" : "...",
|
|
|
- "processors": [
|
|
|
- {
|
|
|
- "grok": {
|
|
|
- "match_field": "message",
|
|
|
- "match_pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}"
|
|
|
- "pattern_definitions" : {
|
|
|
- "FAVORITE_DOG" : "beagle",
|
|
|
- "RGB" : "RED|GREEN|BLUE"
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "append": {
|
|
|
+ "field": "field1"
|
|
|
+ "value": ["item2", "item3", "item4"]
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+=== Convert processor
|
|
|
+Converts an existing field's value to a different type, like turning a string to an integer.
|
|
|
+If the field value is an array, all members will be converted.
|
|
|
+
|
|
|
+The supported types include: `integer`, `float`, `string`, and `boolean`.
|
|
|
+
|
|
|
+`boolean` will set the field to true if its string value is equal to `true` (ignore case), to
|
|
|
+false if its string value is equal to `false` (ignore case) and it will throw exception otherwise.
|
|
|
+
|
|
|
+[[convert-options]]
|
|
|
+.Convert Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field whose value is to be converted
|
|
|
+| `type` | yes | - | The type to convert the existing value to
|
|
|
+|======
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "convert": {
|
|
|
+ "field" : "foo"
|
|
|
+ "type": "integer"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
@@ -652,548 +843,357 @@ In this example if the `remove` processor does fail then
|
|
|
the array elements that have been processed thus far will
|
|
|
be updated.
|
|
|
|
|
|
-== Accessing data in pipelines
|
|
|
+=== Grok Processor
|
|
|
|
|
|
-Processors in pipelines have read and write access to documents that pass through the pipeline.
|
|
|
-The fields in the source of a document and its metadata fields are accessible.
|
|
|
+The Grok Processor extracts structured fields out of a single text field within a document. You choose which field to
|
|
|
+extract matched fields from, as well as the Grok Pattern you expect will match. A Grok Pattern is like a regular
|
|
|
+expression that supports aliased expressions that can be reused.
|
|
|
|
|
|
-Accessing a field in the source is straightforward and one can refer to fields by
|
|
|
-their name. For example:
|
|
|
+This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
|
|
|
+that is generally written for humans and not computer consumption.
|
|
|
+
|
|
|
+The processor comes packaged with over 120 reusable patterns that are located at `$ES_HOME/config/ingest/grok/patterns`.
|
|
|
+Here, you can add your own custom grok pattern files with custom grok expressions to be used by the processor.
|
|
|
+
|
|
|
+If you need help building patterns to match your logs, you will find the <http://grokdebug.herokuapp.com> and
|
|
|
+<http://grokconstructor.appspot.com/> applications quite useful!
|
|
|
+
|
|
|
+==== Grok Basics
|
|
|
+
|
|
|
+Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
|
|
|
+The regular expression library is Oniguruma, and you can see the full supported regexp syntax
|
|
|
+https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
|
|
|
+
|
|
|
+Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
|
|
|
+complex patterns that match your fields.
|
|
|
+
|
|
|
+The syntax for re-using a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
|
|
|
+
|
|
|
+The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
|
|
|
+pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
|
|
|
+patterns that are provided within the default patterns set.
|
|
|
+
|
|
|
+The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the
|
|
|
+duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify
|
|
|
+the `client` making a request.
|
|
|
+
|
|
|
+The `TYPE` is the type you wish to cast your named field. `int` and `float` are currently the only types supported for coercion.
|
|
|
+
|
|
|
+For example, here is a grok pattern that would match the above example given. We would like to match a text with the following
|
|
|
+contents:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "my_field"
|
|
|
- "value": 582.1
|
|
|
- }
|
|
|
-}
|
|
|
+3.44 55.3.244.1
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-On top of this fields from the source are always accessible via the `_source` prefix:
|
|
|
+We may know that the above message is a number followed by an IP-address. We can match this text with the following
|
|
|
+Grok expression.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "_source.my_field"
|
|
|
- "value": 582.1
|
|
|
- }
|
|
|
-}
|
|
|
+%{NUMBER:duration} %{IP:client}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-Metadata fields can also be accessed in the same way as fields from the source. This
|
|
|
-is possible because Elasticsearch doesn't allow fields in the source that have the
|
|
|
-same name as metadata fields.
|
|
|
+==== Custom Patterns and Pattern Files
|
|
|
|
|
|
-The following example sets the id of a document to `1`:
|
|
|
+The Grok Processor comes pre-packaged with a base set of pattern files. These patterns may not always have
|
|
|
+what you are looking for. These pattern files have a very basic format. Each line describes a named pattern with
|
|
|
+the following format:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "_id"
|
|
|
- "value": "1"
|
|
|
- }
|
|
|
-}
|
|
|
+NAME ' '+ PATTERN '\n'
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`, `_parent`,
|
|
|
-`_timestamp` and `_ttl`.
|
|
|
-
|
|
|
-Beyond metadata fields and source fields, ingest also adds ingest metadata to documents being processed.
|
|
|
-These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
|
|
|
-under `_ingest.timestamp` key to the ingest metadata, which is the time ES received the index or bulk
|
|
|
-request to pre-process. But any processor is free to add more ingest related metadata to it. Ingest metadata is transient
|
|
|
-and is lost after a document has been processed by the pipeline and thus ingest metadata won't be indexed.
|
|
|
+You can add this pattern to an existing file, or add your own file in the patterns directory here: `$ES_HOME/config/ingest/grok/patterns`.
|
|
|
+The Ingest Plugin will pick up files in this directory to be loaded into the grok processor's known patterns. These patterns are loaded
|
|
|
+at startup, so you will need to do a restart your ingest node if you wish to update these files while running.
|
|
|
|
|
|
-The following example adds a field with the name `received` and the value is the ingest timestamp:
|
|
|
+Example snippet of pattern definitions found in the `grok-patterns` patterns file:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "received"
|
|
|
- "value": "{{_ingest.timestamp}}"
|
|
|
- }
|
|
|
-}
|
|
|
+YEAR (?>\d\d){1,2}
|
|
|
+HOUR (?:2[0123]|[01]?[0-9])
|
|
|
+MINUTE (?:[0-5][0-9])
|
|
|
+SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
|
|
|
+TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-As opposed to Elasticsearch metadata fields, the ingest metadata field name _ingest can be used as a valid field name
|
|
|
-in the source of a document. Use _source._ingest to refer to it, otherwise _ingest will be interpreted as ingest
|
|
|
-metadata fields.
|
|
|
+==== Using Grok Processor in a Pipeline
|
|
|
|
|
|
-A number of processor settings also support templating. Settings that support templating can have zero or more
|
|
|
-template snippets. A template snippet begins with `{{` and ends with `}}`.
|
|
|
-Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
|
|
|
+[[grok-options]]
|
|
|
+.Grok Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `match_field` | yes | - | The field to use for grok expression parsing
|
|
|
+| `match_pattern` | yes | - | The grok expression to match and extract named captures with
|
|
|
+| `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
|
|
|
+|======
|
|
|
|
|
|
-In this example a field by the name `field_c` is added and its value is a concatenation of
|
|
|
-the values of `field_a` and `field_b`.
|
|
|
+Here is an example of using the provided patterns to extract out and name structured fields from a string field in
|
|
|
+a document.
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "set": {
|
|
|
- "field": "field_c"
|
|
|
- "value": "{{field_a}} {{field_b}}"
|
|
|
- }
|
|
|
+ "message": "55.3.244.1 GET /index.html 15824 0.043"
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-The following example changes the index a document is going to be indexed into. The index a document will be redirected
|
|
|
-to depends on the field in the source with name `geoip.country_iso_code`.
|
|
|
+The pattern for this could be
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-{
|
|
|
- "set": {
|
|
|
- "field": "_index"
|
|
|
- "value": "{{geoip.country_iso_code}}"
|
|
|
- }
|
|
|
-}
|
|
|
+%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-[[handling-failure-in-pipelines]]
|
|
|
-=== Handling Failure in Pipelines
|
|
|
-
|
|
|
-In its simplest case, pipelines describe a list of processors which
|
|
|
-are executed sequentially and processing halts at the first exception. This
|
|
|
-may not be desirable when failures are expected. For example, not all your logs
|
|
|
-may match a certain grok expression and you may wish to index such documents into
|
|
|
-a separate index.
|
|
|
-
|
|
|
-To enable this behavior, you can utilize the `on_failure` parameter. `on_failure`
|
|
|
-defines a list of processors to be executed immediately following the failed processor.
|
|
|
-This parameter can be supplied at the pipeline level, as well as at the processor
|
|
|
-level. If a processor has an `on_failure` configuration option provided, whether
|
|
|
-it is empty or not, any exceptions that are thrown by it will be caught and the
|
|
|
-pipeline will continue executing the proceeding processors defined. Since further processors
|
|
|
-are defined within the scope of an `on_failure` statement, failure handling can be nested.
|
|
|
-
|
|
|
-Example: In the following example we define a pipeline that hopes to rename documents with
|
|
|
-a field named `foo` to `bar`. If the document does not contain the `foo` field, we
|
|
|
-go ahead and attach an error message within the document for later analysis within
|
|
|
-Elasticsearch.
|
|
|
+An example pipeline for processing the above document using Grok:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "description" : "my first pipeline with handled exceptions",
|
|
|
- "processors" : [
|
|
|
+ "description" : "...",
|
|
|
+ "processors": [
|
|
|
{
|
|
|
- "rename" : {
|
|
|
- "field" : "foo",
|
|
|
- "to" : "bar",
|
|
|
- "on_failure" : [
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "error",
|
|
|
- "value" : "field \"foo\" does not exist, cannot rename to \"bar\""
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "grok": {
|
|
|
+ "match_field": "message",
|
|
|
+ "match_pattern": "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
|
|
|
}
|
|
|
}
|
|
|
]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-Example: Here we define an `on_failure` block on a whole pipeline to change
|
|
|
-the index for which failed documents get sent.
|
|
|
+This pipeline will insert these named captures as new fields within the document, like so:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "description" : "my first pipeline with handled exceptions",
|
|
|
- "processors" : [ ... ],
|
|
|
- "on_failure" : [
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "_index",
|
|
|
- "value" : "failed-{{ _index }}"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "message": "55.3.244.1 GET /index.html 15824 0.043",
|
|
|
+ "client": "55.3.244.1",
|
|
|
+ "method": "GET",
|
|
|
+ "request": "/index.html",
|
|
|
+ "bytes": 15824,
|
|
|
+ "duration": "0.043"
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-
|
|
|
-==== Accessing Error Metadata From Processors Handling Exceptions
|
|
|
-
|
|
|
-Sometimes you may want to retrieve the actual error message that was thrown
|
|
|
-by a failed processor. To do so you can access metadata fields called
|
|
|
-`on_failure_message`, `on_failure_processor_type`, `on_failure_processor_tag`. These fields are only accessible
|
|
|
-from within the context of an `on_failure` block. Here is an updated version of
|
|
|
-our first example which leverages these fields to provide the error message instead
|
|
|
-of manually setting it.
|
|
|
+An example of a pipeline specifying custom pattern definitions:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "description" : "my first pipeline with handled exceptions",
|
|
|
- "processors" : [
|
|
|
+ "description" : "...",
|
|
|
+ "processors": [
|
|
|
{
|
|
|
- "rename" : {
|
|
|
- "field" : "foo",
|
|
|
- "to" : "bar",
|
|
|
- "on_failure" : [
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "error",
|
|
|
- "value" : "{{ _ingest.on_failure_message }}"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "grok": {
|
|
|
+ "match_field": "message",
|
|
|
+ "match_pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}"
|
|
|
+ "pattern_definitions" : {
|
|
|
+ "FAVORITE_DOG" : "beagle",
|
|
|
+ "RGB" : "RED|GREEN|BLUE"
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
+=== Gsub processor
|
|
|
+Converts a string field by applying a regular expression and a replacement.
|
|
|
+If the field is not a string, the processor will throw an exception.
|
|
|
|
|
|
-== Ingest APIs
|
|
|
-
|
|
|
-=== Put pipeline API
|
|
|
-
|
|
|
-The put pipeline api adds pipelines and updates existing pipelines in the cluster.
|
|
|
+[[gsub-options]]
|
|
|
+.Gsub Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field apply the replacement for
|
|
|
+| `pattern` | yes | - | The pattern to be replaced
|
|
|
+| `replacement` | yes | - | The string to replace the matching patterns with.
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-PUT _ingest/pipeline/my-pipeline-id
|
|
|
{
|
|
|
- "description" : "describe pipeline",
|
|
|
- "processors" : [
|
|
|
- {
|
|
|
- "simple" : {
|
|
|
- // settings
|
|
|
- }
|
|
|
- },
|
|
|
- // other processors
|
|
|
- ]
|
|
|
+ "gsub": {
|
|
|
+ "field": "field1",
|
|
|
+ "pattern": "\.",
|
|
|
+ "replacement": "-"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
-// AUTOSENSE
|
|
|
-
|
|
|
-NOTE: The put pipeline api also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
|
|
|
- pipeline changes take immediately in effect.
|
|
|
|
|
|
-=== Get pipeline API
|
|
|
+=== Join processor
|
|
|
+Joins each element of an array into a single string using a separator character between each element.
|
|
|
+Throws error when the field is not an array.
|
|
|
|
|
|
-The get pipeline api returns pipelines based on id. This api always returns a local reference of the pipeline.
|
|
|
-
|
|
|
-[source,js]
|
|
|
---------------------------------------------------
|
|
|
-GET _ingest/pipeline/my-pipeline-id
|
|
|
---------------------------------------------------
|
|
|
-// AUTOSENSE
|
|
|
-
|
|
|
-Example response:
|
|
|
+[[join-options]]
|
|
|
+.Join Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to be separated
|
|
|
+| `separator` | yes | - | The separator character
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "my-pipeline-id": {
|
|
|
- "_source" : {
|
|
|
- "description": "describe pipeline",
|
|
|
- "processors": [
|
|
|
- {
|
|
|
- "simple" : {
|
|
|
- // settings
|
|
|
- }
|
|
|
- },
|
|
|
- // other processors
|
|
|
- ]
|
|
|
- },
|
|
|
- "_version" : 0
|
|
|
- }
|
|
|
+ "join": {
|
|
|
+ "field": "joined_array_field",
|
|
|
+ "separator": "-"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-For each returned pipeline the source and the version is returned.
|
|
|
-The version is useful for knowing what version of the pipeline the node has.
|
|
|
-Multiple ids can be provided at the same time. Also wildcards are supported.
|
|
|
-
|
|
|
-=== Delete pipeline API
|
|
|
+=== Lowercase processor
|
|
|
+Converts a string to its lowercase equivalent.
|
|
|
|
|
|
-The delete pipeline api deletes pipelines by id.
|
|
|
+[[lowercase-options]]
|
|
|
+.Lowercase Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to lowercase
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-DELETE _ingest/pipeline/my-pipeline-id
|
|
|
+{
|
|
|
+ "lowercase": {
|
|
|
+ "field": "foo"
|
|
|
+ }
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
-// AUTOSENSE
|
|
|
-
|
|
|
-=== Simulate pipeline API
|
|
|
|
|
|
-The simulate pipeline api executes a specific pipeline against
|
|
|
-the set of documents provided in the body of the request.
|
|
|
-
|
|
|
-A simulate request may call upon an existing pipeline to be executed
|
|
|
-against the provided documents, or supply a pipeline definition in
|
|
|
-the body of the request.
|
|
|
+=== Remove processor
|
|
|
+Removes an existing field. If the field doesn't exist, an exception will be thrown
|
|
|
|
|
|
-Here is the structure of a simulate request with a provided pipeline:
|
|
|
+[[remove-options]]
|
|
|
+.Remove Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to be removed
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-POST _ingest/pipeline/_simulate
|
|
|
{
|
|
|
- "pipeline" : {
|
|
|
- // pipeline definition here
|
|
|
- },
|
|
|
- "docs" : [
|
|
|
- { /** first document **/ },
|
|
|
- { /** second document **/ },
|
|
|
- // ...
|
|
|
- ]
|
|
|
+ "remove": {
|
|
|
+ "field": "foo"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-Here is the structure of a simulate request against a pre-existing pipeline:
|
|
|
+=== Rename processor
|
|
|
+Renames an existing field. If the field doesn't exist, an exception will be thrown. Also, the new field
|
|
|
+name must not exist.
|
|
|
+
|
|
|
+[[rename-options]]
|
|
|
+.Rename Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to be renamed
|
|
|
+| `to` | yes | - | The new name of the field
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-POST _ingest/pipeline/my-pipeline-id/_simulate
|
|
|
{
|
|
|
- "docs" : [
|
|
|
- { /** first document **/ },
|
|
|
- { /** second document **/ },
|
|
|
- // ...
|
|
|
- ]
|
|
|
+ "rename": {
|
|
|
+ "field": "foo",
|
|
|
+ "to": "foobar"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
+=== Set processor
|
|
|
+Sets one field and associates it with the specified value. If the field already exists,
|
|
|
+its value will be replaced with the provided one.
|
|
|
|
|
|
-Here is an example simulate request with a provided pipeline and its response:
|
|
|
+[[set-options]]
|
|
|
+.Set Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to insert, upsert, or update
|
|
|
+| `value` | yes | - | The value to be set for the field
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-POST _ingest/pipeline/_simulate
|
|
|
{
|
|
|
- "pipeline" :
|
|
|
- {
|
|
|
- "description": "_description",
|
|
|
- "processors": [
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "field2",
|
|
|
- "value" : "_value"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "docs": [
|
|
|
- {
|
|
|
- "_index": "index",
|
|
|
- "_type": "type",
|
|
|
- "_id": "id",
|
|
|
- "_source": {
|
|
|
- "foo": "bar"
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "_index": "index",
|
|
|
- "_type": "type",
|
|
|
- "_id": "id",
|
|
|
- "_source": {
|
|
|
- "foo": "rab"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "set": {
|
|
|
+ "field": "field1",
|
|
|
+ "value": 582.1
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
-// AUTOSENSE
|
|
|
|
|
|
-response:
|
|
|
+=== Split processor
|
|
|
+Split a field to an array using a separator character. Only works on string fields.
|
|
|
+
|
|
|
+[[split-options]]
|
|
|
+.Split Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to split
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "docs": [
|
|
|
- {
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field2": "_value",
|
|
|
- "foo": "bar"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-04T23:53:27.186+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field2": "_value",
|
|
|
- "foo": "rab"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-04T23:53:27.186+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "split": {
|
|
|
+ "field": ","
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
|
|
|
-It is often useful to see how each processor affects the ingest document
|
|
|
-as it is passed through the pipeline. To see the intermediate results of
|
|
|
-each processor in the simulate request, a `verbose` parameter may be added
|
|
|
-to the request
|
|
|
-
|
|
|
-Here is an example verbose request and its response:
|
|
|
+=== Trim processor
|
|
|
+Trims whitespace from field. NOTE: this only works on leading and trailing whitespaces.
|
|
|
|
|
|
+[[trim-options]]
|
|
|
+.Trim Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The string-valued field to trim whitespace from
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-POST _ingest/pipeline/_simulate?verbose
|
|
|
{
|
|
|
- "pipeline" :
|
|
|
- {
|
|
|
- "description": "_description",
|
|
|
- "processors": [
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "field2",
|
|
|
- "value" : "_value2"
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "set" : {
|
|
|
- "field" : "field3",
|
|
|
- "value" : "_value3"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- "docs": [
|
|
|
- {
|
|
|
- "_index": "index",
|
|
|
- "_type": "type",
|
|
|
- "_id": "id",
|
|
|
- "_source": {
|
|
|
- "foo": "bar"
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "_index": "index",
|
|
|
- "_type": "type",
|
|
|
- "_id": "id",
|
|
|
- "_source": {
|
|
|
- "foo": "rab"
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
+ "trim": {
|
|
|
+ "field": "foo"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
-// AUTOSENSE
|
|
|
|
|
|
-response:
|
|
|
+=== Uppercase processor
|
|
|
+Converts a string to its uppercase equivalent.
|
|
|
+
|
|
|
+[[uppercase-options]]
|
|
|
+.Uppercase Options
|
|
|
+[options="header"]
|
|
|
+|======
|
|
|
+| Name | Required | Default | Description
|
|
|
+| `field` | yes | - | The field to uppercase
|
|
|
+|======
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "docs": [
|
|
|
- {
|
|
|
- "processor_results": [
|
|
|
- {
|
|
|
- "tag": "processor[set]-0",
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field2": "_value2",
|
|
|
- "foo": "bar"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-05T00:02:51.383+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "tag": "processor[set]-1",
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field3": "_value3",
|
|
|
- "field2": "_value2",
|
|
|
- "foo": "bar"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-05T00:02:51.383+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "processor_results": [
|
|
|
- {
|
|
|
- "tag": "processor[set]-0",
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field2": "_value2",
|
|
|
- "foo": "rab"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-05T00:02:51.384+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- },
|
|
|
- {
|
|
|
- "tag": "processor[set]-1",
|
|
|
- "doc": {
|
|
|
- "_id": "id",
|
|
|
- "_ttl": null,
|
|
|
- "_parent": null,
|
|
|
- "_index": "index",
|
|
|
- "_routing": null,
|
|
|
- "_type": "type",
|
|
|
- "_timestamp": null,
|
|
|
- "_source": {
|
|
|
- "field3": "_value3",
|
|
|
- "field2": "_value2",
|
|
|
- "foo": "rab"
|
|
|
- },
|
|
|
- "_ingest": {
|
|
|
- "timestamp": "2016-01-05T00:02:51.384+0000"
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ]
|
|
|
- }
|
|
|
- ]
|
|
|
+ "uppercase": {
|
|
|
+ "field": "foo"
|
|
|
+ }
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
+
|
|
|
+
|