123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151 |
- [[pipeline]]
- == Pipeline Definition
- A pipeline is a definition of a series of <<ingest-processors, processors>> that are to be executed
- in the same order as they are declared. A pipeline consists of two main fields: a `description`
- and a list of `processors`:
- [source,js]
- --------------------------------------------------
- {
- "description" : "...",
- "processors" : [ ... ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The `description` is a special field to store a helpful description of
- what the pipeline does.
- The `processors` parameter defines a list of processors to be executed in
- order.
- [[ingest-apis]]
- == Ingest APIs
- The following ingest APIs are available for managing pipelines:
- * <<put-pipeline-api>> to add or update a pipeline
- * <<get-pipeline-api>> to return a specific pipeline
- * <<delete-pipeline-api>> to delete a pipeline
- * <<simulate-pipeline-api>> to simulate a call to a pipeline
- [[put-pipeline-api]]
- === Put Pipeline API
- The put pipeline API adds pipelines and updates existing pipelines in the cluster.
- [source,js]
- --------------------------------------------------
- PUT _ingest/pipeline/my-pipeline-id
- {
- "description" : "describe pipeline",
- "processors" : [
- {
- "set" : {
- "field": "foo",
- "value": "bar"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- NOTE: The put pipeline API also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
- pipeline changes take effect immediately.
- [[get-pipeline-api]]
- === Get Pipeline API
- The get pipeline API returns pipelines based on ID. This API always returns a local reference of the pipeline.
- [source,js]
- --------------------------------------------------
- GET _ingest/pipeline/my-pipeline-id
- --------------------------------------------------
- // CONSOLE
- // TEST[continued]
- Example response:
- [source,js]
- --------------------------------------------------
- {
- "my-pipeline-id" : {
- "description" : "describe pipeline",
- "processors" : [
- {
- "set" : {
- "field" : "foo",
- "value" : "bar"
- }
- }
- ]
- }
- }
- --------------------------------------------------
- // TESTRESPONSE
- For each returned pipeline, the source and the version are returned.
- The version is useful for knowing which version of the pipeline the node has.
- You can specify multiple IDs to return more than one pipeline. Wildcards are also supported.
- [float]
- [[versioning-pipelines]]
- ==== Pipeline Versioning
- Pipelines can optionally add a `version` number, which can be any integer value,
- in order to simplify pipeline management by external systems. The `version`
- field is completely optional and it is meant solely for external management of
- pipelines. To unset a `version`, simply replace the pipeline without specifying
- one.
- [source,js]
- --------------------------------------------------
- PUT _ingest/pipeline/my-pipeline-id
- {
- "description" : "describe pipeline",
- "version" : 123,
- "processors" : [
- {
- "set" : {
- "field": "foo",
- "value": "bar"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- To check for the `version`, you can
- <<common-options-response-filtering, filter responses>>
- using `filter_path` to limit the response to just the `version`:
- [source,js]
- --------------------------------------------------
- GET /_ingest/pipeline/my-pipeline-id?filter_path=*.version
- --------------------------------------------------
- // CONSOLE
- // TEST[continued]
- This should give a small response that makes it both easy and inexpensive to parse:
- [source,js]
- --------------------------------------------------
- {
- "my-pipeline-id" : {
- "version" : 123
- }
- }
- --------------------------------------------------
- // TESTRESPONSE
- [[delete-pipeline-api]]
- === Delete Pipeline API
- The delete pipeline API deletes pipelines by ID or wildcard match (`my-*`, `*`).
- [source,js]
- --------------------------------------------------
- DELETE _ingest/pipeline/my-pipeline-id
- --------------------------------------------------
- // CONSOLE
- // TEST[continued]
- ////
- Hidden setup for wildcard test:
- [source,js]
- --------------------------------------------------
- PUT _ingest/pipeline/wild-one
- {
- "description" : "first pipeline to be wildcard deleted",
- "processors" : [ ]
- }
- PUT _ingest/pipeline/wild-two
- {
- "description" : "second pipeline to be wildcard deleted",
- "processors" : [ ]
- }
- DELETE _ingest/pipeline/*
- --------------------------------------------------
- // CONSOLE
- Hidden expected response:
- [source,js]
- --------------------------------------------------
- {
- "acknowledged": true
- }
- --------------------------------------------------
- // TESTRESPONSE
- ////
- [[simulate-pipeline-api]]
- === Simulate Pipeline API
- The simulate pipeline API executes a specific pipeline against
- the set of documents provided in the body of the request.
- You can either specify an existing pipeline to execute
- against the provided documents, or supply a pipeline definition in
- the body of the request.
- Here is the structure of a simulate request with a pipeline definition provided
- in the body of the request:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate
- {
- "pipeline" : {
- // pipeline definition here
- },
- "docs" : [
- { "_source": {/** first document **/} },
- { "_source": {/** second document **/} },
- // ...
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- Here is the structure of a simulate request against an existing pipeline:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/my-pipeline-id/_simulate
- {
- "docs" : [
- { "_source": {/** first document **/} },
- { "_source": {/** second document **/} },
- // ...
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- Here is an example of a simulate request with a pipeline defined in the request
- and its response:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate
- {
- "pipeline" :
- {
- "description": "_description",
- "processors": [
- {
- "set" : {
- "field" : "field2",
- "value" : "_value"
- }
- }
- ]
- },
- "docs": [
- {
- "_index": "index",
- "_type": "_doc",
- "_id": "id",
- "_source": {
- "foo": "bar"
- }
- },
- {
- "_index": "index",
- "_type": "_doc",
- "_id": "id",
- "_source": {
- "foo": "rab"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- Response:
- [source,js]
- --------------------------------------------------
- {
- "docs": [
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field2": "_value",
- "foo": "bar"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:30:03.187Z"
- }
- }
- },
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field2": "_value",
- "foo": "rab"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:30:03.188Z"
- }
- }
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/"2017-05-04T22:30:03.187Z"/$body.docs.0.doc._ingest.timestamp/]
- // TESTRESPONSE[s/"2017-05-04T22:30:03.188Z"/$body.docs.1.doc._ingest.timestamp/]
- [[ingest-verbose-param]]
- ==== Viewing Verbose Results
- You can use the simulate pipeline API to see how each processor affects the ingest document
- as it passes through the pipeline. To see the intermediate results of
- each processor in the simulate request, you can add the `verbose` parameter
- to the request.
- Here is an example of a verbose request and its response:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate?verbose
- {
- "pipeline" :
- {
- "description": "_description",
- "processors": [
- {
- "set" : {
- "field" : "field2",
- "value" : "_value2"
- }
- },
- {
- "set" : {
- "field" : "field3",
- "value" : "_value3"
- }
- }
- ]
- },
- "docs": [
- {
- "_index": "index",
- "_type": "_doc",
- "_id": "id",
- "_source": {
- "foo": "bar"
- }
- },
- {
- "_index": "index",
- "_type": "_doc",
- "_id": "id",
- "_source": {
- "foo": "rab"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- Response:
- [source,js]
- --------------------------------------------------
- {
- "docs": [
- {
- "processor_results": [
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field2": "_value2",
- "foo": "bar"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:46:09.674Z"
- }
- }
- },
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field3": "_value3",
- "field2": "_value2",
- "foo": "bar"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:46:09.675Z"
- }
- }
- }
- ]
- },
- {
- "processor_results": [
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field2": "_value2",
- "foo": "rab"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:46:09.676Z"
- }
- }
- },
- {
- "doc": {
- "_id": "id",
- "_index": "index",
- "_type": "_doc",
- "_source": {
- "field3": "_value3",
- "field2": "_value2",
- "foo": "rab"
- },
- "_ingest": {
- "timestamp": "2017-05-04T22:46:09.677Z"
- }
- }
- }
- ]
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/"2017-05-04T22:46:09.674Z"/$body.docs.0.processor_results.0.doc._ingest.timestamp/]
- // TESTRESPONSE[s/"2017-05-04T22:46:09.675Z"/$body.docs.0.processor_results.1.doc._ingest.timestamp/]
- // TESTRESPONSE[s/"2017-05-04T22:46:09.676Z"/$body.docs.1.processor_results.0.doc._ingest.timestamp/]
- // TESTRESPONSE[s/"2017-05-04T22:46:09.677Z"/$body.docs.1.processor_results.1.doc._ingest.timestamp/]
- [[accessing-data-in-pipelines]]
- == Accessing Data in Pipelines
- The processors in a pipeline have read and write access to documents that pass through the pipeline.
- The processors can access fields in the source of a document and the document's metadata fields.
- [float]
- [[accessing-source-fields]]
- === Accessing Fields in the Source
- Accessing a field in the source is straightforward. You simply refer to fields by
- their name. For example:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "my_field",
- "value": 582.1
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- On top of this, fields from the source are always accessible via the `_source` prefix:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "_source.my_field",
- "value": 582.1
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [float]
- [[accessing-metadata-fields]]
- === Accessing Metadata Fields
- You can access metadata fields in the same way that you access fields in the source. This
- is possible because Elasticsearch doesn't allow fields in the source that have the
- same name as metadata fields.
- The following example sets the `_id` metadata field of a document to `1`:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "_id",
- "value": "1"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`.
- [float]
- [[accessing-ingest-metadata]]
- === Accessing Ingest Metadata Fields
- Beyond metadata fields and source fields, ingest also adds ingest metadata to the documents that it processes.
- These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
- under the `_ingest.timestamp` key of the ingest metadata. The ingest timestamp is the time when Elasticsearch
- received the index or bulk request to pre-process the document.
- Any processor can add ingest-related metadata during document processing. Ingest metadata is transient
- and is lost after a document has been processed by the pipeline. Therefore, ingest metadata won't be indexed.
- The following example adds a field with the name `received`. The value is the ingest timestamp:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "received",
- "value": "{{_ingest.timestamp}}"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- Unlike Elasticsearch metadata fields, the ingest metadata field name `_ingest` can be used as a valid field name
- in the source of a document. Use `_source._ingest` to refer to the field in the source document. Otherwise, `_ingest`
- will be interpreted as an ingest metadata field.
- [float]
- [[accessing-template-fields]]
- === Accessing Fields and Metafields in Templates
- A number of processor settings also support templating. Settings that support templating can have zero or more
- template snippets. A template snippet begins with `{{` and ends with `}}`.
- Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
- The following example adds a field named `field_c`. Its value is a concatenation of
- the values of `field_a` and `field_b`.
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "field_c",
- "value": "{{field_a}} {{field_b}}"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- The following example uses the value of the `geoip.country_iso_code` field in the source
- to set the index that the document will be indexed into:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "_index",
- "value": "{{geoip.country_iso_code}}"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- Dynamic field names are also supported. This example sets the field named after the
- value of `service` to the value of the field `code`:
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "{{service}}",
- "value": "{{code}}"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[handling-failure-in-pipelines]]
- == Handling Failures in Pipelines
- In its simplest use case, a pipeline defines a list of processors that
- are executed sequentially, and processing halts at the first exception. This
- behavior may not be desirable when failures are expected. For example, you may have logs
- that don't match the specified grok expression. Instead of halting execution, you may
- want to index such documents into a separate index.
- To enable this behavior, you can use the `on_failure` parameter. The `on_failure` parameter
- defines a list of processors to be executed immediately following the failed processor.
- You can specify this parameter at the pipeline level, as well as at the processor
- level. If a processor specifies an `on_failure` configuration, whether
- it is empty or not, any exceptions that are thrown by the processor are caught, and the
- pipeline continues executing the remaining processors. Because you can define further processors
- within the scope of an `on_failure` statement, you can nest failure handling.
- The following example defines a pipeline that renames the `foo` field in
- the processed document to `bar`. If the document does not contain the `foo` field, the processor
- attaches an error message to the document for later analysis within
- Elasticsearch.
- [source,js]
- --------------------------------------------------
- {
- "description" : "my first pipeline with handled exceptions",
- "processors" : [
- {
- "rename" : {
- "field" : "foo",
- "target_field" : "bar",
- "on_failure" : [
- {
- "set" : {
- "field" : "error",
- "value" : "field \"foo\" does not exist, cannot rename to \"bar\""
- }
- }
- ]
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The following example defines an `on_failure` block on a whole pipeline to change
- the index to which failed documents get sent.
- [source,js]
- --------------------------------------------------
- {
- "description" : "my first pipeline with handled exceptions",
- "processors" : [ ... ],
- "on_failure" : [
- {
- "set" : {
- "field" : "_index",
- "value" : "failed-{{ _index }}"
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- Alternatively instead of defining behaviour in case of processor failure, it is also possible
- to ignore a failure and continue with the next processor by specifying the `ignore_failure` setting.
- In case in the example below the field `foo` doesn't exist the failure will be caught and the pipeline
- continues to execute, which in this case means that the pipeline does nothing.
- [source,js]
- --------------------------------------------------
- {
- "description" : "my first pipeline with handled exceptions",
- "processors" : [
- {
- "rename" : {
- "field" : "foo",
- "target_field" : "bar",
- "ignore_failure" : true
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The `ignore_failure` can be set on any processor and defaults to `false`.
- [float]
- [[accessing-error-metadata]]
- === Accessing Error Metadata From Processors Handling Exceptions
- You may want to retrieve the actual error message that was thrown
- by a failed processor. To do so you can access metadata fields called
- `on_failure_message`, `on_failure_processor_type`, and `on_failure_processor_tag`. These fields are only accessible
- from within the context of an `on_failure` block.
- Here is an updated version of the example that you
- saw earlier. But instead of setting the error message manually, the example leverages the `on_failure_message`
- metadata field to provide the error message.
- [source,js]
- --------------------------------------------------
- {
- "description" : "my first pipeline with handled exceptions",
- "processors" : [
- {
- "rename" : {
- "field" : "foo",
- "to" : "bar",
- "on_failure" : [
- {
- "set" : {
- "field" : "error",
- "value" : "{{ _ingest.on_failure_message }}"
- }
- }
- ]
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[ingest-processors]]
- == Processors
- All processors are defined in the following way within a pipeline definition:
- [source,js]
- --------------------------------------------------
- {
- "PROCESSOR_NAME" : {
- ... processor configuration options ...
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- Each processor defines its own configuration parameters, but all processors have
- the ability to declare `tag` and `on_failure` fields. These fields are optional.
- A `tag` is simply a string identifier of the specific instantiation of a certain
- processor in a pipeline. The `tag` field does not affect the processor's behavior,
- but is very useful for bookkeeping and tracing errors to specific processors.
- See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
- The <<ingest-info,node info API>> can be used to figure out what processors are available in a cluster.
- The <<ingest-info,node info API>> will provide a per node list of what processors are available.
- Custom processors must be installed on all nodes. The put pipeline API will fail if a processor specified in a pipeline
- doesn't exist on all nodes. If you rely on custom processor plugins make sure to mark these plugins as mandatory by adding
- `plugin.mandatory` setting to the `config/elasticsearch.yml` file, for example:
- [source,yaml]
- --------------------------------------------------
- plugin.mandatory: ingest-attachment,ingest-geoip
- --------------------------------------------------
- A node will not start if either of these plugins are not available.
- The <<ingest-stats,node stats API>> can be used to fetch ingest usage statistics, globally and on a per
- pipeline basis. Useful to find out which pipelines are used the most or spent the most time on preprocessing.
- [[append-processor]]
- === Append Processor
- Appends one or more values to an existing array if the field already exists and it is an array.
- Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
- Creates an array containing the provided values if the field doesn't exist.
- Accepts a single value or an array of values.
- [[append-options]]
- .Append Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be appended to
- | `value` | yes | - | The value to be appended
- |======
- [source,js]
- --------------------------------------------------
- {
- "append": {
- "field": "field1",
- "value": ["item2", "item3", "item4"]
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[convert-processor]]
- === Convert Processor
- Converts an existing field's value to a different type, such as converting a string to an integer.
- If the field value is an array, all members will be converted.
- The supported types include: `integer`, `long`, `float`, `double`, `string`, `boolean`, and `auto`.
- Specifying `boolean` will set the field to true if its string value is equal to `true` (ignore case), to
- false if its string value is equal to `false` (ignore case), or it will throw an exception otherwise.
- Specifying `auto` will attempt to convert the string-valued `field` into the closest non-string type.
- For example, a field whose value is `"true"` will be converted to its respective boolean type: `true`. Do note
- that float takes precedence of double in `auto`. A value of `"242.15"` will "automatically" be converted to
- `242.15` of type `float`. If a provided field cannot be appropriately converted, the Convert Processor will
- still process successfully and leave the field value as-is. In such a case, `target_field` will
- still be updated with the unconverted field value.
- [[convert-options]]
- .Convert Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field whose value is to be converted
- | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
- | `type` | yes | - | The type to convert the existing value to
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "convert": {
- "field" : "foo",
- "type": "integer"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[date-processor]]
- === Date Processor
- Parses dates from fields, and then uses the date or timestamp as the timestamp for the document.
- By default, the date processor adds the parsed date as a new field called `@timestamp`. You can specify a
- different field by setting the `target_field` configuration parameter. Multiple date formats are supported
- as part of the same date processor definition. They will be used sequentially to attempt parsing the date field,
- in the same order they were defined as part of the processor definition.
- [[date-options]]
- .Date options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to get the date from.
- | `target_field` | no | @timestamp | The field that will hold the parsed date.
- | `formats` | yes | - | An array of the expected date formats. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
- | `timezone` | no | UTC | The timezone to use when parsing the date.
- | `locale` | no | ENGLISH | The locale to use when parsing the date, relevant when parsing month names or week days.
- |======
- Here is an example that adds the parsed date to the `timestamp` field based on the `initial_date` field:
- [source,js]
- --------------------------------------------------
- {
- "description" : "...",
- "processors" : [
- {
- "date" : {
- "field" : "initial_date",
- "target_field" : "timestamp",
- "formats" : ["dd/MM/yyyy hh:mm:ss"],
- "timezone" : "Europe/Amsterdam"
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The `timezone` and `locale` processor parameters are templated. This means that their values can be
- extracted from fields within documents. The example below shows how to extract the locale/timezone
- details from existing fields, `my_timezone` and `my_locale`, in the ingested document that contain
- the timezone and locale values.
- [source,js]
- --------------------------------------------------
- {
- "description" : "...",
- "processors" : [
- {
- "date" : {
- "field" : "initial_date",
- "target_field" : "timestamp",
- "formats" : ["ISO8601"],
- "timezone" : "{{ my_timezone }}",
- "locale" : "{{ my_locale }}"
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[date-index-name-processor]]
- === Date Index Name Processor
- The purpose of this processor is to point documents to the right time based index based
- on a date or timestamp field in a document by using the <<date-math-index-names, date math index name support>>.
- The processor sets the `_index` meta field with a date math index name expression based on the provided index name
- prefix, a date or timestamp field in the documents being processed and the provided date rounding.
- First, this processor fetches the date or timestamp from a field in the document being processed. Optionally,
- date formatting can be configured on how the field's value should be parsed into a date. Then this date,
- the provided index name prefix and the provided date rounding get formatted into a date math index name expression.
- Also here optionally date formatting can be specified on how the date should be formatted into a date math index name
- expression.
- An example pipeline that points documents to a monthly index that starts with a `myindex-` prefix based on a
- date in the `date1` field:
- [source,js]
- --------------------------------------------------
- PUT _ingest/pipeline/monthlyindex
- {
- "description": "monthly date-time index naming",
- "processors" : [
- {
- "date_index_name" : {
- "field" : "date1",
- "index_name_prefix" : "myindex-",
- "date_rounding" : "M"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- Using that pipeline for an index request:
- [source,js]
- --------------------------------------------------
- PUT /myindex/_doc/1?pipeline=monthlyindex
- {
- "date1" : "2016-04-25T12:02:01.789Z"
- }
- --------------------------------------------------
- // CONSOLE
- // TEST[continued]
- [source,js]
- --------------------------------------------------
- {
- "_index" : "myindex-2016-04-01",
- "_type" : "_doc",
- "_id" : "1",
- "_version" : 1,
- "result" : "created",
- "_shards" : {
- "total" : 2,
- "successful" : 1,
- "failed" : 0
- },
- "_seq_no" : 0,
- "_primary_term" : 1
- }
- --------------------------------------------------
- // TESTRESPONSE
- The above request will not index this document into the `myindex` index, but into the `myindex-2016-04-01` index because
- it was rounded by month. This is because the date-index-name-processor overrides the `_index` property of the document.
- To see the date-math value of the index supplied in the actual index request which resulted in the above document being
- indexed into `myindex-2016-04-01` we can inspect the effects of the processor using a simulate request.
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate
- {
- "pipeline" :
- {
- "description": "monthly date-time index naming",
- "processors" : [
- {
- "date_index_name" : {
- "field" : "date1",
- "index_name_prefix" : "myindex-",
- "date_rounding" : "M"
- }
- }
- ]
- },
- "docs": [
- {
- "_source": {
- "date1": "2016-04-25T12:02:01.789Z"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- and the result:
- [source,js]
- --------------------------------------------------
- {
- "docs" : [
- {
- "doc" : {
- "_id" : "_id",
- "_index" : "<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>",
- "_type" : "_type",
- "_source" : {
- "date1" : "2016-04-25T12:02:01.789Z"
- },
- "_ingest" : {
- "timestamp" : "2016-11-08T19:43:03.850+0000"
- }
- }
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
- The above example shows that `_index` was set to `<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>`. Elasticsearch
- understands this to mean `2016-04-01` as is explained in the <<date-math-index-names, date math index name documentation>>
- [[date-index-name-options]]
- .Date index name options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to get the date or timestamp from.
- | `index_name_prefix` | no | - | A prefix of the index name to be prepended before the printed date.
- | `date_rounding` | yes | - | How to round the date when formatting the date into the index name. Valid values are: `y` (year), `M` (month), `w` (week), `d` (day), `h` (hour), `m` (minute) and `s` (second).
- | `date_formats` | no | yyyy-MM-dd'T'HH:mm:ss.SSSZ | An array of the expected date formats for parsing dates / timestamps in the document being preprocessed. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
- | `timezone` | no | UTC | The timezone to use when parsing the date and when date math index supports resolves expressions into concrete index names.
- | `locale` | no | ENGLISH | The locale to use when parsing the date from the document being preprocessed, relevant when parsing month names or week days.
- | `index_name_format` | no | yyyy-MM-dd | The format to be used when printing the parsed date into the index name. An valid Joda pattern is expected here.
- |======
- [[fail-processor]]
- === Fail Processor
- Raises an exception. This is useful for when
- you expect a pipeline to fail and want to relay a specific message
- to the requester.
- [[fail-options]]
- .Fail Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `message` | yes | - | The error message of the `FailException` thrown by the processor
- |======
- [source,js]
- --------------------------------------------------
- {
- "fail": {
- "message": "an error message"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[foreach-processor]]
- === Foreach Processor
- Processes elements in an array of unknown length.
- All processors can operate on elements inside an array, but if all elements of an array need to
- be processed in the same way, defining a processor for each element becomes cumbersome and tricky
- because it is likely that the number of elements in an array is unknown. For this reason the `foreach`
- processor exists. By specifying the field holding array elements and a processor that
- defines what should happen to each element, array fields can easily be preprocessed.
- A processor inside the foreach processor works in the array element context and puts that in the ingest metadata
- under the `_ingest._value` key. If the array element is a json object it holds all immediate fields of that json object.
- and if the nested object is a value is `_ingest._value` just holds that value. Note that if a processor prior to the
- `foreach` processor used `_ingest._value` key then the specified value will not be available to the processor inside
- the `foreach` processor. The `foreach` processor does restore the original value, so that value is available to processors
- after the `foreach` processor.
- Note that any other field from the document are accessible and modifiable like with all other processors. This processor
- just puts the current array element being read into `_ingest._value` ingest metadata attribute, so that it may be
- pre-processed.
- If the `foreach` processor fails to process an element inside the array, and no `on_failure` processor has been specified,
- then it aborts the execution and leaves the array unmodified.
- [[foreach-options]]
- .Foreach Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The array field
- | `processor` | yes | - | The processor to execute against each field
- |======
- Assume the following document:
- [source,js]
- --------------------------------------------------
- {
- "values" : ["foo", "bar", "baz"]
- }
- --------------------------------------------------
- // NOTCONSOLE
- When this `foreach` processor operates on this sample document:
- [source,js]
- --------------------------------------------------
- {
- "foreach" : {
- "field" : "values",
- "processor" : {
- "uppercase" : {
- "field" : "_ingest._value"
- }
- }
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- Then the document will look like this after preprocessing:
- [source,js]
- --------------------------------------------------
- {
- "values" : ["FOO", "BAR", "BAZ"]
- }
- --------------------------------------------------
- // NOTCONSOLE
- Let's take a look at another example:
- [source,js]
- --------------------------------------------------
- {
- "persons" : [
- {
- "id" : "1",
- "name" : "John Doe"
- },
- {
- "id" : "2",
- "name" : "Jane Doe"
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- In this case, the `id` field needs to be removed,
- so the following `foreach` processor is used:
- [source,js]
- --------------------------------------------------
- {
- "foreach" : {
- "field" : "persons",
- "processor" : {
- "remove" : {
- "field" : "_ingest._value.id"
- }
- }
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- After preprocessing the result is:
- [source,js]
- --------------------------------------------------
- {
- "persons" : [
- {
- "name" : "John Doe"
- },
- {
- "name" : "Jane Doe"
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The wrapped processor can have a `on_failure` definition.
- For example, the `id` field may not exist on all person objects.
- Instead of failing the index request, you can use an `on_failure`
- block to send the document to the 'failure_index' index for later inspection:
- [source,js]
- --------------------------------------------------
- {
- "foreach" : {
- "field" : "persons",
- "processor" : {
- "remove" : {
- "field" : "_value.id",
- "on_failure" : [
- {
- "set" : {
- "field", "_index",
- "value", "failure_index"
- }
- }
- ]
- }
- }
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- In this example, if the `remove` processor does fail, then
- the array elements that have been processed thus far will
- be updated.
- Another advanced example can be found in the {plugins}/ingest-attachment-with-arrays.html[attachment processor documentation].
- [[grok-processor]]
- === Grok Processor
- Extracts structured fields out of a single text field within a document. You choose which field to
- extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular
- expression that supports aliased expressions that can be reused.
- This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
- that is generally written for humans and not computer consumption.
- This processor comes packaged with many
- https://github.com/elastic/elasticsearch/blob/{branch}/libs/grok/src/main/resources/patterns[reusable patterns].
- If you need help building patterns to match your logs, you will find the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] tool quite useful! The Grok Debugger is an {xpack} feature under the Basic License and is therefore *free to use*. The Grok Constructor at <http://grokconstructor.appspot.com/> is also a useful tool.
- [[grok-basics]]
- ==== Grok Basics
- Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
- The regular expression library is Oniguruma, and you can see the full supported regexp syntax
- https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
- Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
- complex patterns that match your fields.
- The syntax for reusing a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
- The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
- pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
- patterns that are provided within the default patterns set.
- The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the
- duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify
- the `client` making a request.
- The `TYPE` is the type you wish to cast your named field. `int`, `long`, `double`, `float` and `boolean` are supported types for coercion.
- For example, you might want to match the following text:
- [source,txt]
- --------------------------------------------------
- 3.44 55.3.244.1
- --------------------------------------------------
- You may know that the message in the example is a number followed by an IP address. You can match this text by using the following
- Grok expression.
- [source,txt]
- --------------------------------------------------
- %{NUMBER:duration} %{IP:client}
- --------------------------------------------------
- [[using-grok]]
- ==== Using the Grok Processor in a Pipeline
- [[grok-options]]
- .Grok Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to use for grok expression parsing
- | `patterns` | yes | - | An ordered list of grok expression to match and extract named captures with. Returns on the first expression in the list that matches.
- | `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
- | `trace_match` | no | false | when true, `_ingest._grok_match_index` will be inserted into your matched document's metadata with the index into the pattern found in `patterns` that matched.
- | `ignore_missing` | no | false | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- Here is an example of using the provided patterns to extract out and name structured fields from a string field in
- a document.
- [source,js]
- --------------------------------------------------
- {
- "message": "55.3.244.1 GET /index.html 15824 0.043"
- }
- --------------------------------------------------
- // NOTCONSOLE
- The pattern for this could be:
- [source,txt]
- --------------------------------------------------
- %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
- --------------------------------------------------
- Here is an example pipeline for processing the above document by using Grok:
- [source,js]
- --------------------------------------------------
- {
- "description" : "...",
- "processors": [
- {
- "grok": {
- "field": "message",
- "patterns": ["%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"]
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- This pipeline will insert these named captures as new fields within the document, like so:
- [source,js]
- --------------------------------------------------
- {
- "message": "55.3.244.1 GET /index.html 15824 0.043",
- "client": "55.3.244.1",
- "method": "GET",
- "request": "/index.html",
- "bytes": 15824,
- "duration": "0.043"
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[custom-patterns]]
- ==== Custom Patterns and Pattern Files
- The Grok processor comes pre-packaged with a base set of pattern. These patterns may not always have
- what you are looking for. Pattern have a very basic format. Each entry describes has a name and the pattern itself.
- You can add your own patterns to a processor definition under the `pattern_definitions` option.
- Here is an example of a pipeline specifying custom pattern definitions:
- [source,js]
- --------------------------------------------------
- {
- "description" : "...",
- "processors": [
- {
- "grok": {
- "field": "message",
- "patterns": ["my %{FAVORITE_DOG:dog} is colored %{RGB:color}"],
- "pattern_definitions" : {
- "FAVORITE_DOG" : "beagle",
- "RGB" : "RED|GREEN|BLUE"
- }
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[trace-match]]
- ==== Providing Multiple Match Patterns
- Sometimes one pattern is not enough to capture the potential structure of a field. Let's assume we
- want to match all messages that contain your favorite pet breeds of either cats or dogs. One way to accomplish
- this is to provide two distinct patterns that can be matched, instead of one really complicated expression capturing
- the same `or` behavior.
- Here is an example of such a configuration executed against the simulate API:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate
- {
- "pipeline": {
- "description" : "parse multiple patterns",
- "processors": [
- {
- "grok": {
- "field": "message",
- "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
- "pattern_definitions" : {
- "FAVORITE_DOG" : "beagle",
- "FAVORITE_CAT" : "burmese"
- }
- }
- }
- ]
- },
- "docs":[
- {
- "_source": {
- "message": "I love burmese cats!"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- response:
- [source,js]
- --------------------------------------------------
- {
- "docs": [
- {
- "doc": {
- "_type": "_type",
- "_index": "_index",
- "_id": "_id",
- "_source": {
- "message": "I love burmese cats!",
- "pet": "burmese"
- },
- "_ingest": {
- "timestamp": "2016-11-08T19:43:03.850+0000"
- }
- }
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
- Both patterns will set the field `pet` with the appropriate match, but what if we want to trace which of our
- patterns matched and populated our fields? We can do this with the `trace_match` parameter. Here is the output of
- that same pipeline, but with `"trace_match": true` configured:
- ////
- Hidden setup for example:
- [source,js]
- --------------------------------------------------
- POST _ingest/pipeline/_simulate
- {
- "pipeline": {
- "description" : "parse multiple patterns",
- "processors": [
- {
- "grok": {
- "field": "message",
- "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
- "trace_match": true,
- "pattern_definitions" : {
- "FAVORITE_DOG" : "beagle",
- "FAVORITE_CAT" : "burmese"
- }
- }
- }
- ]
- },
- "docs":[
- {
- "_source": {
- "message": "I love burmese cats!"
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- ////
- [source,js]
- --------------------------------------------------
- {
- "docs": [
- {
- "doc": {
- "_type": "_type",
- "_index": "_index",
- "_id": "_id",
- "_source": {
- "message": "I love burmese cats!",
- "pet": "burmese"
- },
- "_ingest": {
- "_grok_match_index": "1",
- "timestamp": "2016-11-08T19:43:03.850+0000"
- }
- }
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
- In the above response, you can see that the index of the pattern that matched was `"1"`. This is to say that it was the
- second (index starts at zero) pattern in `patterns` to match.
- This trace metadata enables debugging which of the patterns matched. This information is stored in the ingest
- metadata and will not be indexed.
- [[grok-processor-rest-get]]
- ==== Retrieving patterns from REST endpoint
- The Grok Processor comes packaged with its own REST endpoint for retrieving which patterns the processor is packaged with.
- [source,js]
- --------------------------------------------------
- GET _ingest/processor/grok
- --------------------------------------------------
- // CONSOLE
- The above request will return a response body containing a key-value representation of the built-in patterns dictionary.
- [source,js]
- --------------------------------------------------
- {
- "patterns" : {
- "BACULA_CAPACITY" : "%{INT}{1,3}(,%{INT}{3})*",
- "PATH" : "(?:%{UNIXPATH}|%{WINPATH})",
- ...
- }
- --------------------------------------------------
- // NOTCONSOLE
- This can be useful to reference as the built-in patterns change across versions.
- [[gsub-processor]]
- === Gsub Processor
- Converts a string field by applying a regular expression and a replacement.
- If the field is not a string, the processor will throw an exception.
- [[gsub-options]]
- .Gsub Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to apply the replacement to
- | `pattern` | yes | - | The pattern to be replaced
- | `replacement` | yes | - | The string to replace the matching patterns with
- | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "gsub": {
- "field": "field1",
- "pattern": "\.",
- "replacement": "-"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[join-processor]]
- === Join Processor
- Joins each element of an array into a single string using a separator character between each element.
- Throws an error when the field is not an array.
- [[join-options]]
- .Join Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be separated
- | `separator` | yes | - | The separator character
- | `target_field` | no | `field` | The field to assign the joined value to, by default `field` is updated in-place
- |======
- [source,js]
- --------------------------------------------------
- {
- "join": {
- "field": "joined_array_field",
- "separator": "-"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[json-processor]]
- === JSON Processor
- Converts a JSON string into a structured JSON object.
- [[json-options]]
- .Json Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be parsed
- | `target_field` | no | `field` | The field to insert the converted structured object into
- | `add_to_root` | no | false | Flag that forces the serialized json to be injected into the top level of the document. `target_field` must not be set when this option is chosen.
- |======
- All JSON-supported types will be parsed (null, boolean, number, array, object, string).
- Suppose you provide this configuration of the `json` processor:
- [source,js]
- --------------------------------------------------
- {
- "json" : {
- "field" : "string_source",
- "target_field" : "json_target"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- If the following document is processed:
- [source,js]
- --------------------------------------------------
- {
- "string_source": "{\"foo\": 2000}"
- }
- --------------------------------------------------
- // NOTCONSOLE
- after the `json` processor operates on it, it will look like:
- [source,js]
- --------------------------------------------------
- {
- "string_source": "{\"foo\": 2000}",
- "json_target": {
- "foo": 2000
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- If the following configuration is provided, omitting the optional `target_field` setting:
- [source,js]
- --------------------------------------------------
- {
- "json" : {
- "field" : "source_and_target"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- then after the `json` processor operates on this document:
- [source,js]
- --------------------------------------------------
- {
- "source_and_target": "{\"foo\": 2000}"
- }
- --------------------------------------------------
- // NOTCONSOLE
- it will look like:
- [source,js]
- --------------------------------------------------
- {
- "source_and_target": {
- "foo": 2000
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- This illustrates that, unless it is explicitly named in the processor configuration, the `target_field`
- is the same field provided in the required `field` configuration.
- [[kv-processor]]
- === KV Processor
- This processor helps automatically parse messages (or specific event fields) which are of the foo=bar variety.
- For example, if you have a log message which contains `ip=1.2.3.4 error=REFUSED`, you can parse those automatically by configuring:
- [source,js]
- --------------------------------------------------
- {
- "kv": {
- "field": "message",
- "field_split": " ",
- "value_split": "="
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[kv-options]]
- .Kv Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be parsed
- | `field_split` | yes | - | Regex pattern to use for splitting key-value pairs
- | `value_split` | yes | - | Regex pattern to use for splitting the key from the value within a key-value pair
- | `target_field` | no | `null` | The field to insert the extracted keys into. Defaults to the root of the document
- | `include_keys` | no | `null` | List of keys to filter and insert into document. Defaults to including all keys
- | `exclude_keys` | no | `null` | List of keys to exclude from document
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [[lowercase-processor]]
- === Lowercase Processor
- Converts a string to its lowercase equivalent.
- [[lowercase-options]]
- .Lowercase Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to make lowercase
- | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "lowercase": {
- "field": "foo"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[remove-processor]]
- === Remove Processor
- Removes existing fields. If one field doesn't exist, an exception will be thrown.
- [[remove-options]]
- .Remove Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | Fields to be removed
- |======
- Here is an example to remove a single field:
- [source,js]
- --------------------------------------------------
- {
- "remove": {
- "field": "foo"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- To remove multiple fields, you can use the following query:
- [source,js]
- --------------------------------------------------
- {
- "remove": {
- "field": ["foo", "bar"]
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[rename-processor]]
- === Rename Processor
- Renames an existing field. If the field doesn't exist or the new name is already used, an exception will be thrown.
- [[rename-options]]
- .Rename Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be renamed
- | `target_field` | yes | - | The new name of the field
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "rename": {
- "field": "foo",
- "target_field": "foobar"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[script-processor]]
- === Script Processor
- Allows inline and stored scripts to be executed within ingest pipelines.
- See <<modules-scripting-using, How to use scripts>> to learn more about writing scripts. The Script Processor
- leverages caching of compiled scripts for improved performance. Since the
- script specified within the processor is potentially re-compiled per document, it is important
- to understand how script caching works. To learn more about
- caching see <<modules-scripting-using-caching, Script Caching>>.
- [[script-options]]
- .Script Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `lang` | no | "painless" | The scripting language
- | `id` | no | - | The stored script id to refer to
- | `source` | no | - | An inline script to be executed
- | `params` | no | - | Script Parameters
- |======
- One of `id` or `source` options must be provided in order to properly reference a script to execute.
- You can access the current ingest document from within the script context by using the `ctx` variable.
- The following example sets a new field called `field_a_plus_b_times_c` to be the sum of two existing
- numeric fields `field_a` and `field_b` multiplied by the parameter param_c:
- [source,js]
- --------------------------------------------------
- {
- "script": {
- "lang": "painless",
- "source": "ctx.field_a_plus_b_times_c = (ctx.field_a + ctx.field_b) * params.param_c",
- "params": {
- "param_c": 10
- }
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- It is possible to use the Script Processor to manipulate document metadata like `_index` and `_type` during
- ingestion. Here is an example of an Ingest Pipeline that renames the index and type to `my_index` no matter what
- was provided in the original index request:
- [source,js]
- --------------------------------------------------
- PUT _ingest/pipeline/my_index
- {
- "description": "use index:my_index and type:_doc",
- "processors": [
- {
- "script": {
- "source": """
- ctx._index = 'my_index';
- ctx._type = '_doc';
- """
- }
- }
- ]
- }
- --------------------------------------------------
- // CONSOLE
- Using the above pipeline, we can attempt to index a document into the `any_index` index.
- [source,js]
- --------------------------------------------------
- PUT any_index/_doc/1?pipeline=my_index
- {
- "message": "text"
- }
- --------------------------------------------------
- // CONSOLE
- // TEST[continued]
- The response from the above index request:
- [source,js]
- --------------------------------------------------
- {
- "_index": "my_index",
- "_type": "_doc",
- "_id": "1",
- "_version": 1,
- "result": "created",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "_seq_no": 0,
- "_primary_term": 1,
- }
- --------------------------------------------------
- // TESTRESPONSE
- In the above response, you can see that our document was actually indexed into `my_index` instead of
- `any_index`. This type of manipulation is often convenient in pipelines that have various branches of transformation,
- and depending on the progress made, indexed into different indices.
- [[set-processor]]
- === Set Processor
- Sets one field and associates it with the specified value. If the field already exists,
- its value will be replaced with the provided one.
- [[set-options]]
- .Set Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to insert, upsert, or update
- | `value` | yes | - | The value to be set for the field
- | `override`| no | true | If processor will update fields with pre-existing non-null-valued field. When set to `false`, such fields will not be touched.
- |======
- [source,js]
- --------------------------------------------------
- {
- "set": {
- "field": "field1",
- "value": 582.1
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[split-processor]]
- === Split Processor
- Splits a field into an array using a separator character. Only works on string fields.
- [[split-options]]
- .Split Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to split
- | `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
- | `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "split": {
- "field": "my_field",
- "separator": "\\s+" <1>
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- <1> Treat all consecutive whitespace characters as a single separator
- [[sort-processor]]
- === Sort Processor
- Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted
- numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically.
- Throws an error when the field is not an array.
- [[sort-options]]
- .Sort Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to be sorted
- | `order` | no | `"asc"` | The sort order to use. Accepts `"asc"` or `"desc"`.
- | `target_field` | no | `field` | The field to assign the sorted value to, by default `field` is updated in-place
- |======
- [source,js]
- --------------------------------------------------
- {
- "sort": {
- "field": "field_to_sort",
- "order": "desc"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[trim-processor]]
- === Trim Processor
- Trims whitespace from field.
- NOTE: This only works on leading and trailing whitespace.
- [[trim-options]]
- .Trim Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The string-valued field to trim whitespace from
- | `target_field` | no | `field` | The field to assign the trimmed value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "trim": {
- "field": "foo"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[uppercase-processor]]
- === Uppercase Processor
- Converts a string to its uppercase equivalent.
- [[uppercase-options]]
- .Uppercase Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to make uppercase
- | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "uppercase": {
- "field": "foo"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- [[dot-expand-processor]]
- === Dot Expander Processor
- Expands a field with dots into an object field. This processor allows fields
- with dots in the name to be accessible by other processors in the pipeline.
- Otherwise these <<accessing-data-in-pipelines,fields>> can't be accessed by any processor.
- [[dot-expender-options]]
- .Dot Expand Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to expand into an object field
- | `path` | no | - | The field that contains the field to expand. Only required if the field to expand is part another object field, because the `field` option can only understand leaf fields.
- |======
- [source,js]
- --------------------------------------------------
- {
- "dot_expander": {
- "field": "foo.bar"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- For example the dot expand processor would turn this document:
- [source,js]
- --------------------------------------------------
- {
- "foo.bar" : "value"
- }
- --------------------------------------------------
- // NOTCONSOLE
- into:
- [source,js]
- --------------------------------------------------
- {
- "foo" : {
- "bar" : "value"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- If there is already a `bar` field nested under `foo` then
- this processor merges the `foo.bar` field into it. If the field is
- a scalar value then it will turn that field into an array field.
- For example, the following document:
- [source,js]
- --------------------------------------------------
- {
- "foo.bar" : "value2",
- "foo" : {
- "bar" : "value1"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- is transformed by the `dot_expander` processor into:
- [source,js]
- --------------------------------------------------
- {
- "foo" : {
- "bar" : ["value1", "value2"]
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
- If any field outside of the leaf field conflicts with a pre-existing field of the same name,
- then that field needs to be renamed first.
- Consider the following document:
- [source,js]
- --------------------------------------------------
- {
- "foo": "value1",
- "foo.bar": "value2"
- }
- --------------------------------------------------
- // NOTCONSOLE
- Then the `foo` needs to be renamed first before the `dot_expander`
- processor is applied. So in order for the `foo.bar` field to properly
- be expanded into the `bar` field under the `foo` field the following
- pipeline should be used:
- [source,js]
- --------------------------------------------------
- {
- "processors" : [
- {
- "rename" : {
- "field" : "foo",
- "target_field" : "foo.bar""
- }
- },
- {
- "dot_expander": {
- "field": "foo.bar"
- }
- }
- ]
- }
- --------------------------------------------------
- // NOTCONSOLE
- The reason for this is that Ingest doesn't know how to automatically cast
- a scalar field to an object field.
- [[urldecode-processor]]
- === URL Decode Processor
- URL-decodes a string
- [[urldecode-options]]
- .URL Decode Options
- [options="header"]
- |======
- | Name | Required | Default | Description
- | `field` | yes | - | The field to decode
- | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
- | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
- |======
- [source,js]
- --------------------------------------------------
- {
- "urldecode": {
- "field": "my_url_to_decode"
- }
- }
- --------------------------------------------------
- // NOTCONSOLE
|