ingest-node.asciidoc 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986
  1. [[pipeline]]
  2. == Pipeline Definition
  3. A pipeline is a definition of a series of <<ingest-processors, processors>> that are to be executed
  4. in the same order as they are declared. A pipeline consists of two main fields: a `description`
  5. and a list of `processors`:
  6. [source,js]
  7. --------------------------------------------------
  8. {
  9. "description" : "...",
  10. "processors" : [ ... ]
  11. }
  12. --------------------------------------------------
  13. // NOTCONSOLE
  14. The `description` is a special field to store a helpful description of
  15. what the pipeline does.
  16. The `processors` parameter defines a list of processors to be executed in
  17. order.
  18. [[ingest-apis]]
  19. == Ingest APIs
  20. The following ingest APIs are available for managing pipelines:
  21. * <<put-pipeline-api>> to add or update a pipeline
  22. * <<get-pipeline-api>> to return a specific pipeline
  23. * <<delete-pipeline-api>> to delete a pipeline
  24. * <<simulate-pipeline-api>> to simulate a call to a pipeline
  25. [[put-pipeline-api]]
  26. === Put Pipeline API
  27. The put pipeline API adds pipelines and updates existing pipelines in the cluster.
  28. [source,js]
  29. --------------------------------------------------
  30. PUT _ingest/pipeline/my-pipeline-id
  31. {
  32. "description" : "describe pipeline",
  33. "processors" : [
  34. {
  35. "set" : {
  36. "field": "foo",
  37. "value": "bar"
  38. }
  39. }
  40. ]
  41. }
  42. --------------------------------------------------
  43. // CONSOLE
  44. NOTE: The put pipeline API also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
  45. pipeline changes take effect immediately.
  46. [[get-pipeline-api]]
  47. === Get Pipeline API
  48. The get pipeline API returns pipelines based on ID. This API always returns a local reference of the pipeline.
  49. [source,js]
  50. --------------------------------------------------
  51. GET _ingest/pipeline/my-pipeline-id
  52. --------------------------------------------------
  53. // CONSOLE
  54. // TEST[continued]
  55. Example response:
  56. [source,js]
  57. --------------------------------------------------
  58. {
  59. "my-pipeline-id" : {
  60. "description" : "describe pipeline",
  61. "processors" : [
  62. {
  63. "set" : {
  64. "field" : "foo",
  65. "value" : "bar"
  66. }
  67. }
  68. ]
  69. }
  70. }
  71. --------------------------------------------------
  72. // TESTRESPONSE
  73. For each returned pipeline, the source and the version are returned.
  74. The version is useful for knowing which version of the pipeline the node has.
  75. You can specify multiple IDs to return more than one pipeline. Wildcards are also supported.
  76. [float]
  77. [[versioning-pipelines]]
  78. ==== Pipeline Versioning
  79. Pipelines can optionally add a `version` number, which can be any integer value,
  80. in order to simplify pipeline management by external systems. The `version`
  81. field is completely optional and it is meant solely for external management of
  82. pipelines. To unset a `version`, simply replace the pipeline without specifying
  83. one.
  84. [source,js]
  85. --------------------------------------------------
  86. PUT _ingest/pipeline/my-pipeline-id
  87. {
  88. "description" : "describe pipeline",
  89. "version" : 123,
  90. "processors" : [
  91. {
  92. "set" : {
  93. "field": "foo",
  94. "value": "bar"
  95. }
  96. }
  97. ]
  98. }
  99. --------------------------------------------------
  100. // CONSOLE
  101. To check for the `version`, you can
  102. <<common-options-response-filtering, filter responses>>
  103. using `filter_path` to limit the response to just the `version`:
  104. [source,js]
  105. --------------------------------------------------
  106. GET /_ingest/pipeline/my-pipeline-id?filter_path=*.version
  107. --------------------------------------------------
  108. // CONSOLE
  109. // TEST[continued]
  110. This should give a small response that makes it both easy and inexpensive to parse:
  111. [source,js]
  112. --------------------------------------------------
  113. {
  114. "my-pipeline-id" : {
  115. "version" : 123
  116. }
  117. }
  118. --------------------------------------------------
  119. // TESTRESPONSE
  120. [[delete-pipeline-api]]
  121. === Delete Pipeline API
  122. The delete pipeline API deletes pipelines by ID or wildcard match (`my-*`, `*`).
  123. [source,js]
  124. --------------------------------------------------
  125. DELETE _ingest/pipeline/my-pipeline-id
  126. --------------------------------------------------
  127. // CONSOLE
  128. // TEST[continued]
  129. ////
  130. Hidden setup for wildcard test:
  131. [source,js]
  132. --------------------------------------------------
  133. PUT _ingest/pipeline/wild-one
  134. {
  135. "description" : "first pipeline to be wildcard deleted",
  136. "processors" : [ ]
  137. }
  138. PUT _ingest/pipeline/wild-two
  139. {
  140. "description" : "second pipeline to be wildcard deleted",
  141. "processors" : [ ]
  142. }
  143. DELETE _ingest/pipeline/*
  144. --------------------------------------------------
  145. // CONSOLE
  146. Hidden expected response:
  147. [source,js]
  148. --------------------------------------------------
  149. {
  150. "acknowledged": true
  151. }
  152. --------------------------------------------------
  153. // TESTRESPONSE
  154. ////
  155. [[simulate-pipeline-api]]
  156. === Simulate Pipeline API
  157. The simulate pipeline API executes a specific pipeline against
  158. the set of documents provided in the body of the request.
  159. You can either specify an existing pipeline to execute
  160. against the provided documents, or supply a pipeline definition in
  161. the body of the request.
  162. Here is the structure of a simulate request with a pipeline definition provided
  163. in the body of the request:
  164. [source,js]
  165. --------------------------------------------------
  166. POST _ingest/pipeline/_simulate
  167. {
  168. "pipeline" : {
  169. // pipeline definition here
  170. },
  171. "docs" : [
  172. { /** first document **/ },
  173. { /** second document **/ },
  174. // ...
  175. ]
  176. }
  177. --------------------------------------------------
  178. // NOTCONSOLE
  179. Here is the structure of a simulate request against an existing pipeline:
  180. [source,js]
  181. --------------------------------------------------
  182. POST _ingest/pipeline/my-pipeline-id/_simulate
  183. {
  184. "docs" : [
  185. { /** first document **/ },
  186. { /** second document **/ },
  187. // ...
  188. ]
  189. }
  190. --------------------------------------------------
  191. // NOTCONSOLE
  192. Here is an example of a simulate request with a pipeline defined in the request
  193. and its response:
  194. [source,js]
  195. --------------------------------------------------
  196. POST _ingest/pipeline/_simulate
  197. {
  198. "pipeline" :
  199. {
  200. "description": "_description",
  201. "processors": [
  202. {
  203. "set" : {
  204. "field" : "field2",
  205. "value" : "_value"
  206. }
  207. }
  208. ]
  209. },
  210. "docs": [
  211. {
  212. "_index": "index",
  213. "_type": "type",
  214. "_id": "id",
  215. "_source": {
  216. "foo": "bar"
  217. }
  218. },
  219. {
  220. "_index": "index",
  221. "_type": "type",
  222. "_id": "id",
  223. "_source": {
  224. "foo": "rab"
  225. }
  226. }
  227. ]
  228. }
  229. --------------------------------------------------
  230. // CONSOLE
  231. Response:
  232. [source,js]
  233. --------------------------------------------------
  234. {
  235. "docs": [
  236. {
  237. "doc": {
  238. "_id": "id",
  239. "_index": "index",
  240. "_type": "type",
  241. "_source": {
  242. "field2": "_value",
  243. "foo": "bar"
  244. },
  245. "_ingest": {
  246. "timestamp": "2017-05-04T22:30:03.187Z"
  247. }
  248. }
  249. },
  250. {
  251. "doc": {
  252. "_id": "id",
  253. "_index": "index",
  254. "_type": "type",
  255. "_source": {
  256. "field2": "_value",
  257. "foo": "rab"
  258. },
  259. "_ingest": {
  260. "timestamp": "2017-05-04T22:30:03.188Z"
  261. }
  262. }
  263. }
  264. ]
  265. }
  266. --------------------------------------------------
  267. // TESTRESPONSE[s/"2017-05-04T22:30:03.187Z"/$body.docs.0.doc._ingest.timestamp/]
  268. // TESTRESPONSE[s/"2017-05-04T22:30:03.188Z"/$body.docs.1.doc._ingest.timestamp/]
  269. [[ingest-verbose-param]]
  270. ==== Viewing Verbose Results
  271. You can use the simulate pipeline API to see how each processor affects the ingest document
  272. as it passes through the pipeline. To see the intermediate results of
  273. each processor in the simulate request, you can add the `verbose` parameter
  274. to the request.
  275. Here is an example of a verbose request and its response:
  276. [source,js]
  277. --------------------------------------------------
  278. POST _ingest/pipeline/_simulate?verbose
  279. {
  280. "pipeline" :
  281. {
  282. "description": "_description",
  283. "processors": [
  284. {
  285. "set" : {
  286. "field" : "field2",
  287. "value" : "_value2"
  288. }
  289. },
  290. {
  291. "set" : {
  292. "field" : "field3",
  293. "value" : "_value3"
  294. }
  295. }
  296. ]
  297. },
  298. "docs": [
  299. {
  300. "_index": "index",
  301. "_type": "type",
  302. "_id": "id",
  303. "_source": {
  304. "foo": "bar"
  305. }
  306. },
  307. {
  308. "_index": "index",
  309. "_type": "type",
  310. "_id": "id",
  311. "_source": {
  312. "foo": "rab"
  313. }
  314. }
  315. ]
  316. }
  317. --------------------------------------------------
  318. // CONSOLE
  319. Response:
  320. [source,js]
  321. --------------------------------------------------
  322. {
  323. "docs": [
  324. {
  325. "processor_results": [
  326. {
  327. "doc": {
  328. "_id": "id",
  329. "_index": "index",
  330. "_type": "type",
  331. "_source": {
  332. "field2": "_value2",
  333. "foo": "bar"
  334. },
  335. "_ingest": {
  336. "timestamp": "2017-05-04T22:46:09.674Z"
  337. }
  338. }
  339. },
  340. {
  341. "doc": {
  342. "_id": "id",
  343. "_index": "index",
  344. "_type": "type",
  345. "_source": {
  346. "field3": "_value3",
  347. "field2": "_value2",
  348. "foo": "bar"
  349. },
  350. "_ingest": {
  351. "timestamp": "2017-05-04T22:46:09.675Z"
  352. }
  353. }
  354. }
  355. ]
  356. },
  357. {
  358. "processor_results": [
  359. {
  360. "doc": {
  361. "_id": "id",
  362. "_index": "index",
  363. "_type": "type",
  364. "_source": {
  365. "field2": "_value2",
  366. "foo": "rab"
  367. },
  368. "_ingest": {
  369. "timestamp": "2017-05-04T22:46:09.676Z"
  370. }
  371. }
  372. },
  373. {
  374. "doc": {
  375. "_id": "id",
  376. "_index": "index",
  377. "_type": "type",
  378. "_source": {
  379. "field3": "_value3",
  380. "field2": "_value2",
  381. "foo": "rab"
  382. },
  383. "_ingest": {
  384. "timestamp": "2017-05-04T22:46:09.677Z"
  385. }
  386. }
  387. }
  388. ]
  389. }
  390. ]
  391. }
  392. --------------------------------------------------
  393. // TESTRESPONSE[s/"2017-05-04T22:46:09.674Z"/$body.docs.0.processor_results.0.doc._ingest.timestamp/]
  394. // TESTRESPONSE[s/"2017-05-04T22:46:09.675Z"/$body.docs.0.processor_results.1.doc._ingest.timestamp/]
  395. // TESTRESPONSE[s/"2017-05-04T22:46:09.676Z"/$body.docs.1.processor_results.0.doc._ingest.timestamp/]
  396. // TESTRESPONSE[s/"2017-05-04T22:46:09.677Z"/$body.docs.1.processor_results.1.doc._ingest.timestamp/]
  397. [[accessing-data-in-pipelines]]
  398. == Accessing Data in Pipelines
  399. The processors in a pipeline have read and write access to documents that pass through the pipeline.
  400. The processors can access fields in the source of a document and the document's metadata fields.
  401. [float]
  402. [[accessing-source-fields]]
  403. === Accessing Fields in the Source
  404. Accessing a field in the source is straightforward. You simply refer to fields by
  405. their name. For example:
  406. [source,js]
  407. --------------------------------------------------
  408. {
  409. "set": {
  410. "field": "my_field"
  411. "value": 582.1
  412. }
  413. }
  414. --------------------------------------------------
  415. // NOTCONSOLE
  416. On top of this, fields from the source are always accessible via the `_source` prefix:
  417. [source,js]
  418. --------------------------------------------------
  419. {
  420. "set": {
  421. "field": "_source.my_field"
  422. "value": 582.1
  423. }
  424. }
  425. --------------------------------------------------
  426. // NOTCONSOLE
  427. [float]
  428. [[accessing-metadata-fields]]
  429. === Accessing Metadata Fields
  430. You can access metadata fields in the same way that you access fields in the source. This
  431. is possible because Elasticsearch doesn't allow fields in the source that have the
  432. same name as metadata fields.
  433. The following example sets the `_id` metadata field of a document to `1`:
  434. [source,js]
  435. --------------------------------------------------
  436. {
  437. "set": {
  438. "field": "_id"
  439. "value": "1"
  440. }
  441. }
  442. --------------------------------------------------
  443. // NOTCONSOLE
  444. The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`, `_parent`.
  445. [float]
  446. [[accessing-ingest-metadata]]
  447. === Accessing Ingest Metadata Fields
  448. Beyond metadata fields and source fields, ingest also adds ingest metadata to the documents that it processes.
  449. These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
  450. under the `_ingest.timestamp` key of the ingest metadata. The ingest timestamp is the time when Elasticsearch
  451. received the index or bulk request to pre-process the document.
  452. Any processor can add ingest-related metadata during document processing. Ingest metadata is transient
  453. and is lost after a document has been processed by the pipeline. Therefore, ingest metadata won't be indexed.
  454. The following example adds a field with the name `received`. The value is the ingest timestamp:
  455. [source,js]
  456. --------------------------------------------------
  457. {
  458. "set": {
  459. "field": "received"
  460. "value": "{{_ingest.timestamp}}"
  461. }
  462. }
  463. --------------------------------------------------
  464. // NOTCONSOLE
  465. Unlike Elasticsearch metadata fields, the ingest metadata field name `_ingest` can be used as a valid field name
  466. in the source of a document. Use `_source._ingest` to refer to the field in the source document. Otherwise, `_ingest`
  467. will be interpreted as an ingest metadata field.
  468. [float]
  469. [[accessing-template-fields]]
  470. === Accessing Fields and Metafields in Templates
  471. A number of processor settings also support templating. Settings that support templating can have zero or more
  472. template snippets. A template snippet begins with `{{` and ends with `}}`.
  473. Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
  474. The following example adds a field named `field_c`. Its value is a concatenation of
  475. the values of `field_a` and `field_b`.
  476. [source,js]
  477. --------------------------------------------------
  478. {
  479. "set": {
  480. "field": "field_c"
  481. "value": "{{field_a}} {{field_b}}"
  482. }
  483. }
  484. --------------------------------------------------
  485. // NOTCONSOLE
  486. The following example uses the value of the `geoip.country_iso_code` field in the source
  487. to set the index that the document will be indexed into:
  488. [source,js]
  489. --------------------------------------------------
  490. {
  491. "set": {
  492. "field": "_index"
  493. "value": "{{geoip.country_iso_code}}"
  494. }
  495. }
  496. --------------------------------------------------
  497. // NOTCONSOLE
  498. [[handling-failure-in-pipelines]]
  499. == Handling Failures in Pipelines
  500. In its simplest use case, a pipeline defines a list of processors that
  501. are executed sequentially, and processing halts at the first exception. This
  502. behavior may not be desirable when failures are expected. For example, you may have logs
  503. that don't match the specified grok expression. Instead of halting execution, you may
  504. want to index such documents into a separate index.
  505. To enable this behavior, you can use the `on_failure` parameter. The `on_failure` parameter
  506. defines a list of processors to be executed immediately following the failed processor.
  507. You can specify this parameter at the pipeline level, as well as at the processor
  508. level. If a processor specifies an `on_failure` configuration, whether
  509. it is empty or not, any exceptions that are thrown by the processor are caught, and the
  510. pipeline continues executing the remaining processors. Because you can define further processors
  511. within the scope of an `on_failure` statement, you can nest failure handling.
  512. The following example defines a pipeline that renames the `foo` field in
  513. the processed document to `bar`. If the document does not contain the `foo` field, the processor
  514. attaches an error message to the document for later analysis within
  515. Elasticsearch.
  516. [source,js]
  517. --------------------------------------------------
  518. {
  519. "description" : "my first pipeline with handled exceptions",
  520. "processors" : [
  521. {
  522. "rename" : {
  523. "field" : "foo",
  524. "target_field" : "bar",
  525. "on_failure" : [
  526. {
  527. "set" : {
  528. "field" : "error",
  529. "value" : "field \"foo\" does not exist, cannot rename to \"bar\""
  530. }
  531. }
  532. ]
  533. }
  534. }
  535. ]
  536. }
  537. --------------------------------------------------
  538. // NOTCONSOLE
  539. The following example defines an `on_failure` block on a whole pipeline to change
  540. the index to which failed documents get sent.
  541. [source,js]
  542. --------------------------------------------------
  543. {
  544. "description" : "my first pipeline with handled exceptions",
  545. "processors" : [ ... ],
  546. "on_failure" : [
  547. {
  548. "set" : {
  549. "field" : "_index",
  550. "value" : "failed-{{ _index }}"
  551. }
  552. }
  553. ]
  554. }
  555. --------------------------------------------------
  556. // NOTCONSOLE
  557. Alternatively instead of defining behaviour in case of processor failure, it is also possible
  558. to ignore a failure and continue with the next processor by specifying the `ignore_failure` setting.
  559. In case in the example below the field `foo` doesn't exist the failure will be caught and the pipeline
  560. continues to execute, which in this case means that the pipeline does nothing.
  561. [source,js]
  562. --------------------------------------------------
  563. {
  564. "description" : "my first pipeline with handled exceptions",
  565. "processors" : [
  566. {
  567. "rename" : {
  568. "field" : "foo",
  569. "target_field" : "bar",
  570. "ignore_failure" : true
  571. }
  572. }
  573. ]
  574. }
  575. --------------------------------------------------
  576. // NOTCONSOLE
  577. The `ignore_failure` can be set on any processor and defaults to `false`.
  578. [float]
  579. [[accessing-error-metadata]]
  580. === Accessing Error Metadata From Processors Handling Exceptions
  581. You may want to retrieve the actual error message that was thrown
  582. by a failed processor. To do so you can access metadata fields called
  583. `on_failure_message`, `on_failure_processor_type`, and `on_failure_processor_tag`. These fields are only accessible
  584. from within the context of an `on_failure` block.
  585. Here is an updated version of the example that you
  586. saw earlier. But instead of setting the error message manually, the example leverages the `on_failure_message`
  587. metadata field to provide the error message.
  588. [source,js]
  589. --------------------------------------------------
  590. {
  591. "description" : "my first pipeline with handled exceptions",
  592. "processors" : [
  593. {
  594. "rename" : {
  595. "field" : "foo",
  596. "to" : "bar",
  597. "on_failure" : [
  598. {
  599. "set" : {
  600. "field" : "error",
  601. "value" : "{{ _ingest.on_failure_message }}"
  602. }
  603. }
  604. ]
  605. }
  606. }
  607. ]
  608. }
  609. --------------------------------------------------
  610. // NOTCONSOLE
  611. [[ingest-processors]]
  612. == Processors
  613. All processors are defined in the following way within a pipeline definition:
  614. [source,js]
  615. --------------------------------------------------
  616. {
  617. "PROCESSOR_NAME" : {
  618. ... processor configuration options ...
  619. }
  620. }
  621. --------------------------------------------------
  622. // NOTCONSOLE
  623. Each processor defines its own configuration parameters, but all processors have
  624. the ability to declare `tag` and `on_failure` fields. These fields are optional.
  625. A `tag` is simply a string identifier of the specific instantiation of a certain
  626. processor in a pipeline. The `tag` field does not affect the processor's behavior,
  627. but is very useful for bookkeeping and tracing errors to specific processors.
  628. See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
  629. The <<ingest-info,node info API>> can be used to figure out what processors are available in a cluster.
  630. The <<ingest-info,node info API>> will provide a per node list of what processors are available.
  631. Custom processors must be installed on all nodes. The put pipeline API will fail if a processor specified in a pipeline
  632. doesn't exist on all nodes. If you rely on custom processor plugins make sure to mark these plugins as mandatory by adding
  633. `plugin.mandatory` setting to the `config/elasticsearch.yml` file, for example:
  634. [source,yaml]
  635. --------------------------------------------------
  636. plugin.mandatory: ingest-attachment,ingest-geoip
  637. --------------------------------------------------
  638. A node will not start if either of these plugins are not available.
  639. The <<ingest-stats,node stats API>> can be used to fetch ingest usage statistics, globally and on a per
  640. pipeline basis. Useful to find out which pipelines are used the most or spent the most time on preprocessing.
  641. [[append-processor]]
  642. === Append Processor
  643. Appends one or more values to an existing array if the field already exists and it is an array.
  644. Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
  645. Creates an array containing the provided values if the field doesn't exist.
  646. Accepts a single value or an array of values.
  647. [[append-options]]
  648. .Append Options
  649. [options="header"]
  650. |======
  651. | Name | Required | Default | Description
  652. | `field` | yes | - | The field to be appended to
  653. | `value` | yes | - | The value to be appended
  654. |======
  655. [source,js]
  656. --------------------------------------------------
  657. {
  658. "append": {
  659. "field": "field1"
  660. "value": ["item2", "item3", "item4"]
  661. }
  662. }
  663. --------------------------------------------------
  664. // NOTCONSOLE
  665. [[convert-processor]]
  666. === Convert Processor
  667. Converts an existing field's value to a different type, such as converting a string to an integer.
  668. If the field value is an array, all members will be converted.
  669. The supported types include: `integer`, `float`, `string`, `boolean`, and `auto`.
  670. Specifying `boolean` will set the field to true if its string value is equal to `true` (ignore case), to
  671. false if its string value is equal to `false` (ignore case), or it will throw an exception otherwise.
  672. Specifying `auto` will attempt to convert the string-valued `field` into the closest non-string type.
  673. For example, a field whose value is `"true"` will be converted to its respective boolean type: `true`. And
  674. a value of `"242.15"` will "automatically" be converted to `242.15` of type `float`. If a provided field cannot
  675. be appropriately converted, the Convert Processor will still process successfully and leave the field value as-is. In
  676. such a case, `target_field` will still be updated with the unconverted field value.
  677. [[convert-options]]
  678. .Convert Options
  679. [options="header"]
  680. |======
  681. | Name | Required | Default | Description
  682. | `field` | yes | - | The field whose value is to be converted
  683. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  684. | `type` | yes | - | The type to convert the existing value to
  685. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  686. |======
  687. [source,js]
  688. --------------------------------------------------
  689. {
  690. "convert": {
  691. "field" : "foo",
  692. "type": "integer"
  693. }
  694. }
  695. --------------------------------------------------
  696. // NOTCONSOLE
  697. [[date-processor]]
  698. === Date Processor
  699. Parses dates from fields, and then uses the date or timestamp as the timestamp for the document.
  700. By default, the date processor adds the parsed date as a new field called `@timestamp`. You can specify a
  701. different field by setting the `target_field` configuration parameter. Multiple date formats are supported
  702. as part of the same date processor definition. They will be used sequentially to attempt parsing the date field,
  703. in the same order they were defined as part of the processor definition.
  704. [[date-options]]
  705. .Date options
  706. [options="header"]
  707. |======
  708. | Name | Required | Default | Description
  709. | `field` | yes | - | The field to get the date from.
  710. | `target_field` | no | @timestamp | The field that will hold the parsed date.
  711. | `formats` | yes | - | An array of the expected date formats. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
  712. | `timezone` | no | UTC | The timezone to use when parsing the date.
  713. | `locale` | no | ENGLISH | The locale to use when parsing the date, relevant when parsing month names or week days.
  714. |======
  715. Here is an example that adds the parsed date to the `timestamp` field based on the `initial_date` field:
  716. [source,js]
  717. --------------------------------------------------
  718. {
  719. "description" : "...",
  720. "processors" : [
  721. {
  722. "date" : {
  723. "field" : "initial_date",
  724. "target_field" : "timestamp",
  725. "formats" : ["dd/MM/yyyy hh:mm:ss"],
  726. "timezone" : "Europe/Amsterdam"
  727. }
  728. }
  729. ]
  730. }
  731. --------------------------------------------------
  732. // NOTCONSOLE
  733. [[date-index-name-processor]]
  734. === Date Index Name Processor
  735. The purpose of this processor is to point documents to the right time based index based
  736. on a date or timestamp field in a document by using the <<date-math-index-names, date math index name support>>.
  737. The processor sets the `_index` meta field with a date math index name expression based on the provided index name
  738. prefix, a date or timestamp field in the documents being processed and the provided date rounding.
  739. First, this processor fetches the date or timestamp from a field in the document being processed. Optionally,
  740. date formatting can be configured on how the field's value should be parsed into a date. Then this date,
  741. the provided index name prefix and the provided date rounding get formatted into a date math index name expression.
  742. Also here optionally date formatting can be specified on how the date should be formatted into a date math index name
  743. expression.
  744. An example pipeline that points documents to a monthly index that starts with a `myindex-` prefix based on a
  745. date in the `date1` field:
  746. [source,js]
  747. --------------------------------------------------
  748. PUT _ingest/pipeline/monthlyindex
  749. {
  750. "description": "monthly date-time index naming",
  751. "processors" : [
  752. {
  753. "date_index_name" : {
  754. "field" : "date1",
  755. "index_name_prefix" : "myindex-",
  756. "date_rounding" : "M"
  757. }
  758. }
  759. ]
  760. }
  761. --------------------------------------------------
  762. // CONSOLE
  763. Using that pipeline for an index request:
  764. [source,js]
  765. --------------------------------------------------
  766. PUT /myindex/type/1?pipeline=monthlyindex
  767. {
  768. "date1" : "2016-04-25T12:02:01.789Z"
  769. }
  770. --------------------------------------------------
  771. // CONSOLE
  772. // TEST[continued]
  773. [source,js]
  774. --------------------------------------------------
  775. {
  776. "_index" : "myindex-2016-04-01",
  777. "_type" : "type",
  778. "_id" : "1",
  779. "_version" : 1,
  780. "result" : "created",
  781. "_shards" : {
  782. "total" : 2,
  783. "successful" : 1,
  784. "failed" : 0
  785. },
  786. "created" : true,
  787. "_seq_no" : 0,
  788. "_primary_term" : 1
  789. }
  790. --------------------------------------------------
  791. // TESTRESPONSE
  792. The above request will not index this document into the `myindex` index, but into the `myindex-2016-04-01` index because
  793. it was rounded by month. This is because the date-index-name-processor overrides the `_index` property of the document.
  794. To see the date-math value of the index supplied in the actual index request which resulted in the above document being
  795. indexed into `myindex-2016-04-01` we can inspect the effects of the processor using a simulate request.
  796. [source,js]
  797. --------------------------------------------------
  798. POST _ingest/pipeline/_simulate
  799. {
  800. "pipeline" :
  801. {
  802. "description": "monthly date-time index naming",
  803. "processors" : [
  804. {
  805. "date_index_name" : {
  806. "field" : "date1",
  807. "index_name_prefix" : "myindex-",
  808. "date_rounding" : "M"
  809. }
  810. }
  811. ]
  812. },
  813. "docs": [
  814. {
  815. "_source": {
  816. "date1": "2016-04-25T12:02:01.789Z"
  817. }
  818. }
  819. ]
  820. }
  821. --------------------------------------------------
  822. // CONSOLE
  823. and the result:
  824. [source,js]
  825. --------------------------------------------------
  826. {
  827. "docs" : [
  828. {
  829. "doc" : {
  830. "_id" : "_id",
  831. "_index" : "<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>",
  832. "_type" : "_type",
  833. "_source" : {
  834. "date1" : "2016-04-25T12:02:01.789Z"
  835. },
  836. "_ingest" : {
  837. "timestamp" : "2016-11-08T19:43:03.850+0000"
  838. }
  839. }
  840. }
  841. ]
  842. }
  843. --------------------------------------------------
  844. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  845. The above example shows that `_index` was set to `<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>`. Elasticsearch
  846. understands this to mean `2016-04-01` as is explained in the <<date-math-index-names, date math index name documentation>>
  847. [[date-index-name-options]]
  848. .Date index name options
  849. [options="header"]
  850. |======
  851. | Name | Required | Default | Description
  852. | `field` | yes | - | The field to get the date or timestamp from.
  853. | `index_name_prefix` | no | - | A prefix of the index name to be prepended before the printed date.
  854. | `date_rounding` | yes | - | How to round the date when formatting the date into the index name. Valid values are: `y` (year), `M` (month), `w` (week), `d` (day), `h` (hour), `m` (minute) and `s` (second).
  855. | `date_formats ` | no | yyyy-MM-dd'T'HH:mm:ss.SSSZ | An array of the expected date formats for parsing dates / timestamps in the document being preprocessed. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
  856. | `timezone` | no | UTC | The timezone to use when parsing the date and when date math index supports resolves expressions into concrete index names.
  857. | `locale` | no | ENGLISH | The locale to use when parsing the date from the document being preprocessed, relevant when parsing month names or week days.
  858. | `index_name_format` | no | yyyy-MM-dd | The format to be used when printing the parsed date into the index name. An valid Joda pattern is expected here.
  859. |======
  860. [[fail-processor]]
  861. === Fail Processor
  862. Raises an exception. This is useful for when
  863. you expect a pipeline to fail and want to relay a specific message
  864. to the requester.
  865. [[fail-options]]
  866. .Fail Options
  867. [options="header"]
  868. |======
  869. | Name | Required | Default | Description
  870. | `message` | yes | - | The error message of the `FailException` thrown by the processor
  871. |======
  872. [source,js]
  873. --------------------------------------------------
  874. {
  875. "fail": {
  876. "message": "an error message"
  877. }
  878. }
  879. --------------------------------------------------
  880. // NOTCONSOLE
  881. [[foreach-processor]]
  882. === Foreach Processor
  883. experimental[This processor may change or be replaced by something else that provides similar functionality. This
  884. processor executes in its own context, which makes it different compared to all other processors and for features like
  885. verbose simulation the subprocessor isn't visible. The reason we still expose this processor, is that it is the only
  886. processor that can operate on an array]
  887. Processes elements in an array of unknown length.
  888. All processors can operate on elements inside an array, but if all elements of an array need to
  889. be processed in the same way, defining a processor for each element becomes cumbersome and tricky
  890. because it is likely that the number of elements in an array is unknown. For this reason the `foreach`
  891. processor exists. By specifying the field holding array elements and a processor that
  892. defines what should happen to each element, array fields can easily be preprocessed.
  893. A processor inside the foreach processor works in the array element context and puts that in the ingest metadata
  894. under the `_ingest._value` key. If the array element is a json object it holds all immediate fields of that json object.
  895. and if the nested object is a value is `_ingest._value` just holds that value. Note that if a processor prior to the
  896. `foreach` processor used `_ingest._value` key then the specified value will not be available to the processor inside
  897. the `foreach` processor. The `foreach` processor does restore the original value, so that value is available to processors
  898. after the `foreach` processor.
  899. Note that any other field from the document are accessible and modifiable like with all other processors. This processor
  900. just puts the current array element being read into `_ingest._value` ingest metadata attribute, so that it may be
  901. pre-processed.
  902. If the `foreach` processor fails to process an element inside the array, and no `on_failure` processor has been specified,
  903. then it aborts the execution and leaves the array unmodified.
  904. [[foreach-options]]
  905. .Foreach Options
  906. [options="header"]
  907. |======
  908. | Name | Required | Default | Description
  909. | `field` | yes | - | The array field
  910. | `processor` | yes | - | The processor to execute against each field
  911. |======
  912. Assume the following document:
  913. [source,js]
  914. --------------------------------------------------
  915. {
  916. "values" : ["foo", "bar", "baz"]
  917. }
  918. --------------------------------------------------
  919. // NOTCONSOLE
  920. When this `foreach` processor operates on this sample document:
  921. [source,js]
  922. --------------------------------------------------
  923. {
  924. "foreach" : {
  925. "field" : "values",
  926. "processor" : {
  927. "uppercase" : {
  928. "field" : "_ingest._value"
  929. }
  930. }
  931. }
  932. }
  933. --------------------------------------------------
  934. // NOTCONSOLE
  935. Then the document will look like this after preprocessing:
  936. [source,js]
  937. --------------------------------------------------
  938. {
  939. "values" : ["FOO", "BAR", "BAZ"]
  940. }
  941. --------------------------------------------------
  942. // NOTCONSOLE
  943. Let's take a look at another example:
  944. [source,js]
  945. --------------------------------------------------
  946. {
  947. "persons" : [
  948. {
  949. "id" : "1",
  950. "name" : "John Doe"
  951. },
  952. {
  953. "id" : "2",
  954. "name" : "Jane Doe"
  955. }
  956. ]
  957. }
  958. --------------------------------------------------
  959. // NOTCONSOLE
  960. In this case, the `id` field needs to be removed,
  961. so the following `foreach` processor is used:
  962. [source,js]
  963. --------------------------------------------------
  964. {
  965. "foreach" : {
  966. "field" : "persons",
  967. "processor" : {
  968. "remove" : {
  969. "field" : "_ingest._value.id"
  970. }
  971. }
  972. }
  973. }
  974. --------------------------------------------------
  975. // NOTCONSOLE
  976. After preprocessing the result is:
  977. [source,js]
  978. --------------------------------------------------
  979. {
  980. "persons" : [
  981. {
  982. "name" : "John Doe"
  983. },
  984. {
  985. "name" : "Jane Doe"
  986. }
  987. ]
  988. }
  989. --------------------------------------------------
  990. // NOTCONSOLE
  991. The wrapped processor can have a `on_failure` definition.
  992. For example, the `id` field may not exist on all person objects.
  993. Instead of failing the index request, you can use an `on_failure`
  994. block to send the document to the 'failure_index' index for later inspection:
  995. [source,js]
  996. --------------------------------------------------
  997. {
  998. "foreach" : {
  999. "field" : "persons",
  1000. "processor" : {
  1001. "remove" : {
  1002. "field" : "_value.id",
  1003. "on_failure" : [
  1004. {
  1005. "set" : {
  1006. "field", "_index",
  1007. "value", "failure_index"
  1008. }
  1009. }
  1010. ]
  1011. }
  1012. }
  1013. }
  1014. }
  1015. --------------------------------------------------
  1016. // NOTCONSOLE
  1017. In this example, if the `remove` processor does fail, then
  1018. the array elements that have been processed thus far will
  1019. be updated.
  1020. Another advanced example can be found in the {plugins}/ingest-attachment-with-arrays.html[attachment processor documentation].
  1021. [[grok-processor]]
  1022. === Grok Processor
  1023. Extracts structured fields out of a single text field within a document. You choose which field to
  1024. extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular
  1025. expression that supports aliased expressions that can be reused.
  1026. This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
  1027. that is generally written for humans and not computer consumption.
  1028. This processor comes packaged with over
  1029. https://github.com/elastic/elasticsearch/tree/master/modules/ingest-common/src/main/resources/patterns[120 reusable patterns].
  1030. If you need help building patterns to match your logs, you will find the <http://grokdebug.herokuapp.com> and
  1031. <http://grokconstructor.appspot.com/> applications quite useful!
  1032. [[grok-basics]]
  1033. ==== Grok Basics
  1034. Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
  1035. The regular expression library is Oniguruma, and you can see the full supported regexp syntax
  1036. https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
  1037. Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
  1038. complex patterns that match your fields.
  1039. The syntax for reusing a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
  1040. The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
  1041. pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
  1042. patterns that are provided within the default patterns set.
  1043. The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the
  1044. duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify
  1045. the `client` making a request.
  1046. The `TYPE` is the type you wish to cast your named field. `int` and `float` are currently the only types supported for coercion.
  1047. For example, you might want to match the following text:
  1048. [source,txt]
  1049. --------------------------------------------------
  1050. 3.44 55.3.244.1
  1051. --------------------------------------------------
  1052. You may know that the message in the example is a number followed by an IP address. You can match this text by using the following
  1053. Grok expression.
  1054. [source,txt]
  1055. --------------------------------------------------
  1056. %{NUMBER:duration} %{IP:client}
  1057. --------------------------------------------------
  1058. [[using-grok]]
  1059. ==== Using the Grok Processor in a Pipeline
  1060. [[grok-options]]
  1061. .Grok Options
  1062. [options="header"]
  1063. |======
  1064. | Name | Required | Default | Description
  1065. | `field` | yes | - | The field to use for grok expression parsing
  1066. | `patterns` | yes | - | An ordered list of grok expression to match and extract named captures with. Returns on the first expression in the list that matches.
  1067. | `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
  1068. | `trace_match` | no | false | when true, `_ingest._grok_match_index` will be inserted into your matched document's metadata with the index into the pattern found in `patterns` that matched.
  1069. | `ignore_missing` | no | false | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1070. |======
  1071. Here is an example of using the provided patterns to extract out and name structured fields from a string field in
  1072. a document.
  1073. [source,js]
  1074. --------------------------------------------------
  1075. {
  1076. "message": "55.3.244.1 GET /index.html 15824 0.043"
  1077. }
  1078. --------------------------------------------------
  1079. // NOTCONSOLE
  1080. The pattern for this could be:
  1081. [source,txt]
  1082. --------------------------------------------------
  1083. %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
  1084. --------------------------------------------------
  1085. Here is an example pipeline for processing the above document by using Grok:
  1086. [source,js]
  1087. --------------------------------------------------
  1088. {
  1089. "description" : "...",
  1090. "processors": [
  1091. {
  1092. "grok": {
  1093. "field": "message",
  1094. "patterns": ["%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"]
  1095. }
  1096. }
  1097. ]
  1098. }
  1099. --------------------------------------------------
  1100. // NOTCONSOLE
  1101. This pipeline will insert these named captures as new fields within the document, like so:
  1102. [source,js]
  1103. --------------------------------------------------
  1104. {
  1105. "message": "55.3.244.1 GET /index.html 15824 0.043",
  1106. "client": "55.3.244.1",
  1107. "method": "GET",
  1108. "request": "/index.html",
  1109. "bytes": 15824,
  1110. "duration": "0.043"
  1111. }
  1112. --------------------------------------------------
  1113. // NOTCONSOLE
  1114. [[custom-patterns]]
  1115. ==== Custom Patterns and Pattern Files
  1116. The Grok processor comes pre-packaged with a base set of pattern. These patterns may not always have
  1117. what you are looking for. Pattern have a very basic format. Each entry describes has a name and the pattern itself.
  1118. You can add your own patterns to a processor definition under the `pattern_definitions` option.
  1119. Here is an example of a pipeline specifying custom pattern definitions:
  1120. [source,js]
  1121. --------------------------------------------------
  1122. {
  1123. "description" : "...",
  1124. "processors": [
  1125. {
  1126. "grok": {
  1127. "field": "message",
  1128. "patterns": ["my %{FAVORITE_DOG:dog} is colored %{RGB:color}"]
  1129. "pattern_definitions" : {
  1130. "FAVORITE_DOG" : "beagle",
  1131. "RGB" : "RED|GREEN|BLUE"
  1132. }
  1133. }
  1134. }
  1135. ]
  1136. }
  1137. --------------------------------------------------
  1138. // NOTCONSOLE
  1139. [[trace-match]]
  1140. ==== Providing Multiple Match Patterns
  1141. Sometimes one pattern is not enough to capture the potential structure of a field. Let's assume we
  1142. want to match all messages that contain your favorite pet breeds of either cats or dogs. One way to accomplish
  1143. this is to provide two distinct patterns that can be matched, instead of one really complicated expression capturing
  1144. the same `or` behavior.
  1145. Here is an example of such a configuration executed against the simulate API:
  1146. [source,js]
  1147. --------------------------------------------------
  1148. POST _ingest/pipeline/_simulate
  1149. {
  1150. "pipeline": {
  1151. "description" : "parse multiple patterns",
  1152. "processors": [
  1153. {
  1154. "grok": {
  1155. "field": "message",
  1156. "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
  1157. "pattern_definitions" : {
  1158. "FAVORITE_DOG" : "beagle",
  1159. "FAVORITE_CAT" : "burmese"
  1160. }
  1161. }
  1162. }
  1163. ]
  1164. },
  1165. "docs":[
  1166. {
  1167. "_source": {
  1168. "message": "I love burmese cats!"
  1169. }
  1170. }
  1171. ]
  1172. }
  1173. --------------------------------------------------
  1174. // CONSOLE
  1175. response:
  1176. [source,js]
  1177. --------------------------------------------------
  1178. {
  1179. "docs": [
  1180. {
  1181. "doc": {
  1182. "_type": "_type",
  1183. "_index": "_index",
  1184. "_id": "_id",
  1185. "_source": {
  1186. "message": "I love burmese cats!",
  1187. "pet": "burmese"
  1188. },
  1189. "_ingest": {
  1190. "timestamp": "2016-11-08T19:43:03.850+0000"
  1191. }
  1192. }
  1193. }
  1194. ]
  1195. }
  1196. --------------------------------------------------
  1197. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  1198. Both patterns will set the field `pet` with the appropriate match, but what if we want to trace which of our
  1199. patterns matched and populated our fields? We can do this with the `trace_match` parameter. Here is the output of
  1200. that same pipeline, but with `"trace_match": true` configured:
  1201. ////
  1202. Hidden setup for example:
  1203. [source,js]
  1204. --------------------------------------------------
  1205. POST _ingest/pipeline/_simulate
  1206. {
  1207. "pipeline": {
  1208. "description" : "parse multiple patterns",
  1209. "processors": [
  1210. {
  1211. "grok": {
  1212. "field": "message",
  1213. "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
  1214. "trace_match": true,
  1215. "pattern_definitions" : {
  1216. "FAVORITE_DOG" : "beagle",
  1217. "FAVORITE_CAT" : "burmese"
  1218. }
  1219. }
  1220. }
  1221. ]
  1222. },
  1223. "docs":[
  1224. {
  1225. "_source": {
  1226. "message": "I love burmese cats!"
  1227. }
  1228. }
  1229. ]
  1230. }
  1231. --------------------------------------------------
  1232. // CONSOLE
  1233. ////
  1234. [source,js]
  1235. --------------------------------------------------
  1236. {
  1237. "docs": [
  1238. {
  1239. "doc": {
  1240. "_type": "_type",
  1241. "_index": "_index",
  1242. "_id": "_id",
  1243. "_source": {
  1244. "message": "I love burmese cats!",
  1245. "pet": "burmese"
  1246. },
  1247. "_ingest": {
  1248. "_grok_match_index": "1",
  1249. "timestamp": "2016-11-08T19:43:03.850+0000"
  1250. }
  1251. }
  1252. }
  1253. ]
  1254. }
  1255. --------------------------------------------------
  1256. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  1257. In the above response, you can see that the index of the pattern that matched was `"1"`. This is to say that it was the
  1258. second (index starts at zero) pattern in `patterns` to match.
  1259. This trace metadata enables debugging which of the patterns matched. This information is stored in the ingest
  1260. metadata and will not be indexed.
  1261. [[gsub-processor]]
  1262. === Gsub Processor
  1263. Converts a string field by applying a regular expression and a replacement.
  1264. If the field is not a string, the processor will throw an exception.
  1265. [[gsub-options]]
  1266. .Gsub Options
  1267. [options="header"]
  1268. |======
  1269. | Name | Required | Default | Description
  1270. | `field` | yes | - | The field to apply the replacement to
  1271. | `pattern` | yes | - | The pattern to be replaced
  1272. | `replacement` | yes | - | The string to replace the matching patterns with
  1273. |======
  1274. [source,js]
  1275. --------------------------------------------------
  1276. {
  1277. "gsub": {
  1278. "field": "field1",
  1279. "pattern": "\.",
  1280. "replacement": "-"
  1281. }
  1282. }
  1283. --------------------------------------------------
  1284. // NOTCONSOLE
  1285. [[join-processor]]
  1286. === Join Processor
  1287. Joins each element of an array into a single string using a separator character between each element.
  1288. Throws an error when the field is not an array.
  1289. [[join-options]]
  1290. .Join Options
  1291. [options="header"]
  1292. |======
  1293. | Name | Required | Default | Description
  1294. | `field` | yes | - | The field to be separated
  1295. | `separator` | yes | - | The separator character
  1296. |======
  1297. [source,js]
  1298. --------------------------------------------------
  1299. {
  1300. "join": {
  1301. "field": "joined_array_field",
  1302. "separator": "-"
  1303. }
  1304. }
  1305. --------------------------------------------------
  1306. // NOTCONSOLE
  1307. [[json-processor]]
  1308. === JSON Processor
  1309. Converts a JSON string into a structured JSON object.
  1310. [[json-options]]
  1311. .Json Options
  1312. [options="header"]
  1313. |======
  1314. | Name | Required | Default | Description
  1315. | `field` | yes | - | The field to be parsed
  1316. | `target_field` | no | `field` | The field to insert the converted structured object into
  1317. | `add_to_root` | no | false | Flag that forces the serialized json to be injected into the top level of the document. `target_field` must not be set when this option is chosen.
  1318. |======
  1319. Suppose you provide this configuration of the `json` processor:
  1320. [source,js]
  1321. --------------------------------------------------
  1322. {
  1323. "json" : {
  1324. "field" : "string_source",
  1325. "target_field" : "json_target"
  1326. }
  1327. }
  1328. --------------------------------------------------
  1329. // NOTCONSOLE
  1330. If the following document is processed:
  1331. [source,js]
  1332. --------------------------------------------------
  1333. {
  1334. "string_source": "{\"foo\": 2000}"
  1335. }
  1336. --------------------------------------------------
  1337. // NOTCONSOLE
  1338. after the `json` processor operates on it, it will look like:
  1339. [source,js]
  1340. --------------------------------------------------
  1341. {
  1342. "string_source": "{\"foo\": 2000}",
  1343. "json_target": {
  1344. "foo": 2000
  1345. }
  1346. }
  1347. --------------------------------------------------
  1348. // NOTCONSOLE
  1349. If the following configuration is provided, omitting the optional `target_field` setting:
  1350. [source,js]
  1351. --------------------------------------------------
  1352. {
  1353. "json" : {
  1354. "field" : "source_and_target"
  1355. }
  1356. }
  1357. --------------------------------------------------
  1358. // NOTCONSOLE
  1359. then after the `json` processor operates on this document:
  1360. [source,js]
  1361. --------------------------------------------------
  1362. {
  1363. "source_and_target": "{\"foo\": 2000}"
  1364. }
  1365. --------------------------------------------------
  1366. // NOTCONSOLE
  1367. it will look like:
  1368. [source,js]
  1369. --------------------------------------------------
  1370. {
  1371. "source_and_target": {
  1372. "foo": 2000
  1373. }
  1374. }
  1375. --------------------------------------------------
  1376. // NOTCONSOLE
  1377. This illustrates that, unless it is explicitly named in the processor configuration, the `target_field`
  1378. is the same field provided in the required `field` configuration.
  1379. [[kv-processor]]
  1380. === KV Processor
  1381. This processor helps automatically parse messages (or specific event fields) which are of the foo=bar variety.
  1382. For example, if you have a log message which contains `ip=1.2.3.4 error=REFUSED`, you can parse those automatically by configuring:
  1383. [source,js]
  1384. --------------------------------------------------
  1385. {
  1386. "kv": {
  1387. "field": "message",
  1388. "field_split": " ",
  1389. "value_split": "="
  1390. }
  1391. }
  1392. --------------------------------------------------
  1393. // NOTCONSOLE
  1394. [[kv-options]]
  1395. .Kv Options
  1396. [options="header"]
  1397. |======
  1398. | Name | Required | Default | Description
  1399. | `field` | yes | - | The field to be parsed
  1400. | `field_split` | yes | - | Regex pattern to use for splitting key-value pairs
  1401. | `value_split` | yes | - | Regex pattern to use for splitting the key from the value within a key-value pair
  1402. | `target_field` | no | `null` | The field to insert the extracted keys into. Defaults to the root of the document
  1403. | `include_keys` | no | `null` | List of keys to filter and insert into document. Defaults to including all keys
  1404. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1405. |======
  1406. [[lowercase-processor]]
  1407. === Lowercase Processor
  1408. Converts a string to its lowercase equivalent.
  1409. [[lowercase-options]]
  1410. .Lowercase Options
  1411. [options="header"]
  1412. |======
  1413. | Name | Required | Default | Description
  1414. | `field` | yes | - | The field to make lowercase
  1415. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1416. |======
  1417. [source,js]
  1418. --------------------------------------------------
  1419. {
  1420. "lowercase": {
  1421. "field": "foo"
  1422. }
  1423. }
  1424. --------------------------------------------------
  1425. // NOTCONSOLE
  1426. [[remove-processor]]
  1427. === Remove Processor
  1428. Removes an existing field. If the field doesn't exist, an exception will be thrown.
  1429. [[remove-options]]
  1430. .Remove Options
  1431. [options="header"]
  1432. |======
  1433. | Name | Required | Default | Description
  1434. | `field` | yes | - | The field to be removed
  1435. |======
  1436. [source,js]
  1437. --------------------------------------------------
  1438. {
  1439. "remove": {
  1440. "field": "foo"
  1441. }
  1442. }
  1443. --------------------------------------------------
  1444. // NOTCONSOLE
  1445. [[rename-processor]]
  1446. === Rename Processor
  1447. Renames an existing field. If the field doesn't exist or the new name is already used, an exception will be thrown.
  1448. [[rename-options]]
  1449. .Rename Options
  1450. [options="header"]
  1451. |======
  1452. | Name | Required | Default | Description
  1453. | `field` | yes | - | The field to be renamed
  1454. | `target_field` | yes | - | The new name of the field
  1455. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  1456. |======
  1457. [source,js]
  1458. --------------------------------------------------
  1459. {
  1460. "rename": {
  1461. "field": "foo",
  1462. "target_field": "foobar"
  1463. }
  1464. }
  1465. --------------------------------------------------
  1466. // NOTCONSOLE
  1467. [[script-processor]]
  1468. === Script Processor
  1469. Allows inline, stored, and file scripts to be executed within ingest pipelines.
  1470. See <<modules-scripting-using, How to use scripts>> to learn more about writing scripts. The Script Processor
  1471. leverages caching of compiled scripts for improved performance. Since the
  1472. script specified within the processor is potentially re-compiled per document, it is important
  1473. to understand how script caching works. To learn more about
  1474. caching see <<modules-scripting-using-caching, Script Caching>>.
  1475. [[script-options]]
  1476. .Script Options
  1477. [options="header"]
  1478. |======
  1479. | Name | Required | Default | Description
  1480. | `lang` | no | "painless" | The scripting language
  1481. | `file` | no | - | The script file to refer to
  1482. | `id` | no | - | The stored script id to refer to
  1483. | `inline` | no | - | An inline script to be executed
  1484. | `params` | no | - | Script Parameters
  1485. |======
  1486. One of `file`, `id`, `inline` options must be provided in order to properly reference a script to execute.
  1487. You can access the current ingest document from within the script context by using the `ctx` variable.
  1488. The following example sets a new field called `field_a_plus_b_times_c` to be the sum of two existing
  1489. numeric fields `field_a` and `field_b` multiplied by the parameter param_c:
  1490. [source,js]
  1491. --------------------------------------------------
  1492. {
  1493. "script": {
  1494. "lang": "painless",
  1495. "inline": "ctx.field_a_plus_b_times_c = (ctx.field_a + ctx.field_b) * params.param_c",
  1496. "params": {
  1497. "param_c": 10
  1498. }
  1499. }
  1500. }
  1501. --------------------------------------------------
  1502. // NOTCONSOLE
  1503. [[set-processor]]
  1504. === Set Processor
  1505. Sets one field and associates it with the specified value. If the field already exists,
  1506. its value will be replaced with the provided one.
  1507. [[set-options]]
  1508. .Set Options
  1509. [options="header"]
  1510. |======
  1511. | Name | Required | Default | Description
  1512. | `field` | yes | - | The field to insert, upsert, or update
  1513. | `value` | yes | - | The value to be set for the field
  1514. | `override`| no | true | If processor will update fields with pre-existing non-null-valued field. When set to `false`, such fields will not be touched.
  1515. |======
  1516. [source,js]
  1517. --------------------------------------------------
  1518. {
  1519. "set": {
  1520. "field": "field1",
  1521. "value": 582.1
  1522. }
  1523. }
  1524. --------------------------------------------------
  1525. // NOTCONSOLE
  1526. [[split-processor]]
  1527. === Split Processor
  1528. Splits a field into an array using a separator character. Only works on string fields.
  1529. [[split-options]]
  1530. .Split Options
  1531. [options="header"]
  1532. |======
  1533. | Name | Required | Default | Description
  1534. | `field` | yes | - | The field to split
  1535. | `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
  1536. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  1537. |======
  1538. [source,js]
  1539. --------------------------------------------------
  1540. {
  1541. "split": {
  1542. "field": "my_field",
  1543. "separator": "\\s+" <1>
  1544. }
  1545. }
  1546. --------------------------------------------------
  1547. // NOTCONSOLE
  1548. <1> Treat all consecutive whitespace characters as a single separator
  1549. [[sort-processor]]
  1550. === Sort Processor
  1551. Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted
  1552. numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically.
  1553. Throws an error when the field is not an array.
  1554. [[sort-options]]
  1555. .Sort Options
  1556. [options="header"]
  1557. |======
  1558. | Name | Required | Default | Description
  1559. | `field` | yes | - | The field to be sorted
  1560. | `order` | no | `"asc"` | The sort order to use. Accepts `"asc"` or `"desc"`.
  1561. |======
  1562. [source,js]
  1563. --------------------------------------------------
  1564. {
  1565. "sort": {
  1566. "field": "field_to_sort",
  1567. "order": "desc"
  1568. }
  1569. }
  1570. --------------------------------------------------
  1571. // NOTCONSOLE
  1572. [[trim-processor]]
  1573. === Trim Processor
  1574. Trims whitespace from field.
  1575. NOTE: This only works on leading and trailing whitespace.
  1576. [[trim-options]]
  1577. .Trim Options
  1578. [options="header"]
  1579. |======
  1580. | Name | Required | Default | Description
  1581. | `field` | yes | - | The string-valued field to trim whitespace from
  1582. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  1583. |======
  1584. [source,js]
  1585. --------------------------------------------------
  1586. {
  1587. "trim": {
  1588. "field": "foo"
  1589. }
  1590. }
  1591. --------------------------------------------------
  1592. // NOTCONSOLE
  1593. [[uppercase-processor]]
  1594. === Uppercase Processor
  1595. Converts a string to its uppercase equivalent.
  1596. [[uppercase-options]]
  1597. .Uppercase Options
  1598. [options="header"]
  1599. |======
  1600. | Name | Required | Default | Description
  1601. | `field` | yes | - | The field to make uppercase
  1602. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1603. |======
  1604. [source,js]
  1605. --------------------------------------------------
  1606. {
  1607. "uppercase": {
  1608. "field": "foo"
  1609. }
  1610. }
  1611. --------------------------------------------------
  1612. // NOTCONSOLE
  1613. [[dot-expand-processor]]
  1614. === Dot Expander Processor
  1615. Expands a field with dots into an object field. This processor allows fields
  1616. with dots in the name to be accessible by other processors in the pipeline.
  1617. Otherwise these <<accessing-data-in-pipelines,fields>> can't be accessed by any processor.
  1618. [[dot-expender-options]]
  1619. .Dot Expand Options
  1620. [options="header"]
  1621. |======
  1622. | Name | Required | Default | Description
  1623. | `field` | yes | - | The field to expand into an object field
  1624. | `path` | no | - | The field that contains the field to expand. Only required if the field to expand is part another object field, because the `field` option can only understand leaf fields.
  1625. |======
  1626. [source,js]
  1627. --------------------------------------------------
  1628. {
  1629. "dot_expander": {
  1630. "field": "foo.bar"
  1631. }
  1632. }
  1633. --------------------------------------------------
  1634. // NOTCONSOLE
  1635. For example the dot expand processor would turn this document:
  1636. [source,js]
  1637. --------------------------------------------------
  1638. {
  1639. "foo.bar" : "value"
  1640. }
  1641. --------------------------------------------------
  1642. // NOTCONSOLE
  1643. into:
  1644. [source,js]
  1645. --------------------------------------------------
  1646. {
  1647. "foo" : {
  1648. "bar" : "value"
  1649. }
  1650. }
  1651. --------------------------------------------------
  1652. // NOTCONSOLE
  1653. If there is already a `bar` field nested under `foo` then
  1654. this processor merges the the `foo.bar` field into it. If the field is
  1655. a scalar value then it will turn that field into an array field.
  1656. For example, the following document:
  1657. [source,js]
  1658. --------------------------------------------------
  1659. {
  1660. "foo.bar" : "value2",
  1661. "foo" : {
  1662. "bar" : "value1"
  1663. }
  1664. }
  1665. --------------------------------------------------
  1666. // NOTCONSOLE
  1667. is transformed by the `dot_expander` processor into:
  1668. [source,js]
  1669. --------------------------------------------------
  1670. {
  1671. "foo" : {
  1672. "bar" : ["value1", "value2"]
  1673. }
  1674. }
  1675. --------------------------------------------------
  1676. // NOTCONSOLE
  1677. If any field outside of the leaf field conflicts with a pre-existing field of the same name,
  1678. then that field needs to be renamed first.
  1679. Consider the following document:
  1680. [source,js]
  1681. --------------------------------------------------
  1682. {
  1683. "foo": "value1",
  1684. "foo.bar": "value2"
  1685. }
  1686. --------------------------------------------------
  1687. // NOTCONSOLE
  1688. Then the the `foo` needs to be renamed first before the `dot_expander`
  1689. processor is applied. So in order for the `foo.bar` field to properly
  1690. be expanded into the `bar` field under the `foo` field the following
  1691. pipeline should be used:
  1692. [source,js]
  1693. --------------------------------------------------
  1694. {
  1695. "processors" : [
  1696. {
  1697. "rename" : {
  1698. "field" : "foo",
  1699. "target_field" : "foo.bar""
  1700. }
  1701. },
  1702. {
  1703. "dot_expander": {
  1704. "field": "foo.bar"
  1705. }
  1706. }
  1707. ]
  1708. }
  1709. --------------------------------------------------
  1710. // NOTCONSOLE
  1711. The reason for this is that Ingest doesn't know how to automatically cast
  1712. a scalar field to an object field.