ingest-node.asciidoc 97 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171
  1. [[pipeline]]
  2. == Pipeline Definition
  3. A pipeline is a definition of a series of <<ingest-processors, processors>> that are to be executed
  4. in the same order as they are declared. A pipeline consists of two main fields: a `description`
  5. and a list of `processors`:
  6. [source,js]
  7. --------------------------------------------------
  8. {
  9. "description" : "...",
  10. "processors" : [ ... ]
  11. }
  12. --------------------------------------------------
  13. // NOTCONSOLE
  14. The `description` is a special field to store a helpful description of
  15. what the pipeline does.
  16. The `processors` parameter defines a list of processors to be executed in
  17. order.
  18. [[ingest-apis]]
  19. == Ingest APIs
  20. The following ingest APIs are available for managing pipelines:
  21. * <<put-pipeline-api>> to add or update a pipeline
  22. * <<get-pipeline-api>> to return a specific pipeline
  23. * <<delete-pipeline-api>> to delete a pipeline
  24. * <<simulate-pipeline-api>> to simulate a call to a pipeline
  25. [[put-pipeline-api]]
  26. === Put Pipeline API
  27. The put pipeline API adds pipelines and updates existing pipelines in the cluster.
  28. [source,js]
  29. --------------------------------------------------
  30. PUT _ingest/pipeline/my-pipeline-id
  31. {
  32. "description" : "describe pipeline",
  33. "processors" : [
  34. {
  35. "set" : {
  36. "field": "foo",
  37. "value": "bar"
  38. }
  39. }
  40. ]
  41. }
  42. --------------------------------------------------
  43. // CONSOLE
  44. NOTE: The put pipeline API also instructs all ingest nodes to reload their in-memory representation of pipelines, so that
  45. pipeline changes take effect immediately.
  46. [[get-pipeline-api]]
  47. === Get Pipeline API
  48. The get pipeline API returns pipelines based on ID. This API always returns a local reference of the pipeline.
  49. [source,js]
  50. --------------------------------------------------
  51. GET _ingest/pipeline/my-pipeline-id
  52. --------------------------------------------------
  53. // CONSOLE
  54. // TEST[continued]
  55. Example response:
  56. [source,js]
  57. --------------------------------------------------
  58. {
  59. "my-pipeline-id" : {
  60. "description" : "describe pipeline",
  61. "processors" : [
  62. {
  63. "set" : {
  64. "field" : "foo",
  65. "value" : "bar"
  66. }
  67. }
  68. ]
  69. }
  70. }
  71. --------------------------------------------------
  72. // TESTRESPONSE
  73. For each returned pipeline, the source and the version are returned.
  74. The version is useful for knowing which version of the pipeline the node has.
  75. You can specify multiple IDs to return more than one pipeline. Wildcards are also supported.
  76. [float]
  77. [[versioning-pipelines]]
  78. ==== Pipeline Versioning
  79. Pipelines can optionally add a `version` number, which can be any integer value,
  80. in order to simplify pipeline management by external systems. The `version`
  81. field is completely optional and it is meant solely for external management of
  82. pipelines. To unset a `version`, simply replace the pipeline without specifying
  83. one.
  84. [source,js]
  85. --------------------------------------------------
  86. PUT _ingest/pipeline/my-pipeline-id
  87. {
  88. "description" : "describe pipeline",
  89. "version" : 123,
  90. "processors" : [
  91. {
  92. "set" : {
  93. "field": "foo",
  94. "value": "bar"
  95. }
  96. }
  97. ]
  98. }
  99. --------------------------------------------------
  100. // CONSOLE
  101. To check for the `version`, you can
  102. <<common-options-response-filtering, filter responses>>
  103. using `filter_path` to limit the response to just the `version`:
  104. [source,js]
  105. --------------------------------------------------
  106. GET /_ingest/pipeline/my-pipeline-id?filter_path=*.version
  107. --------------------------------------------------
  108. // CONSOLE
  109. // TEST[continued]
  110. This should give a small response that makes it both easy and inexpensive to parse:
  111. [source,js]
  112. --------------------------------------------------
  113. {
  114. "my-pipeline-id" : {
  115. "version" : 123
  116. }
  117. }
  118. --------------------------------------------------
  119. // TESTRESPONSE
  120. [[delete-pipeline-api]]
  121. === Delete Pipeline API
  122. The delete pipeline API deletes pipelines by ID or wildcard match (`my-*`, `*`).
  123. [source,js]
  124. --------------------------------------------------
  125. DELETE _ingest/pipeline/my-pipeline-id
  126. --------------------------------------------------
  127. // CONSOLE
  128. // TEST[continued]
  129. ////
  130. Hidden setup for wildcard test:
  131. [source,js]
  132. --------------------------------------------------
  133. PUT _ingest/pipeline/wild-one
  134. {
  135. "description" : "first pipeline to be wildcard deleted",
  136. "processors" : [ ]
  137. }
  138. PUT _ingest/pipeline/wild-two
  139. {
  140. "description" : "second pipeline to be wildcard deleted",
  141. "processors" : [ ]
  142. }
  143. DELETE _ingest/pipeline/*
  144. --------------------------------------------------
  145. // CONSOLE
  146. Hidden expected response:
  147. [source,js]
  148. --------------------------------------------------
  149. {
  150. "acknowledged": true
  151. }
  152. --------------------------------------------------
  153. // TESTRESPONSE
  154. ////
  155. [[simulate-pipeline-api]]
  156. === Simulate Pipeline API
  157. The simulate pipeline API executes a specific pipeline against
  158. the set of documents provided in the body of the request.
  159. You can either specify an existing pipeline to execute
  160. against the provided documents, or supply a pipeline definition in
  161. the body of the request.
  162. Here is the structure of a simulate request with a pipeline definition provided
  163. in the body of the request:
  164. [source,js]
  165. --------------------------------------------------
  166. POST _ingest/pipeline/_simulate
  167. {
  168. "pipeline" : {
  169. // pipeline definition here
  170. },
  171. "docs" : [
  172. { "_source": {/** first document **/} },
  173. { "_source": {/** second document **/} },
  174. // ...
  175. ]
  176. }
  177. --------------------------------------------------
  178. // NOTCONSOLE
  179. Here is the structure of a simulate request against an existing pipeline:
  180. [source,js]
  181. --------------------------------------------------
  182. POST _ingest/pipeline/my-pipeline-id/_simulate
  183. {
  184. "docs" : [
  185. { "_source": {/** first document **/} },
  186. { "_source": {/** second document **/} },
  187. // ...
  188. ]
  189. }
  190. --------------------------------------------------
  191. // NOTCONSOLE
  192. Here is an example of a simulate request with a pipeline defined in the request
  193. and its response:
  194. [source,js]
  195. --------------------------------------------------
  196. POST _ingest/pipeline/_simulate
  197. {
  198. "pipeline" :
  199. {
  200. "description": "_description",
  201. "processors": [
  202. {
  203. "set" : {
  204. "field" : "field2",
  205. "value" : "_value"
  206. }
  207. }
  208. ]
  209. },
  210. "docs": [
  211. {
  212. "_index": "index",
  213. "_type": "_doc",
  214. "_id": "id",
  215. "_source": {
  216. "foo": "bar"
  217. }
  218. },
  219. {
  220. "_index": "index",
  221. "_type": "_doc",
  222. "_id": "id",
  223. "_source": {
  224. "foo": "rab"
  225. }
  226. }
  227. ]
  228. }
  229. --------------------------------------------------
  230. // CONSOLE
  231. Response:
  232. [source,js]
  233. --------------------------------------------------
  234. {
  235. "docs": [
  236. {
  237. "doc": {
  238. "_id": "id",
  239. "_index": "index",
  240. "_type": "_doc",
  241. "_source": {
  242. "field2": "_value",
  243. "foo": "bar"
  244. },
  245. "_ingest": {
  246. "timestamp": "2017-05-04T22:30:03.187Z"
  247. }
  248. }
  249. },
  250. {
  251. "doc": {
  252. "_id": "id",
  253. "_index": "index",
  254. "_type": "_doc",
  255. "_source": {
  256. "field2": "_value",
  257. "foo": "rab"
  258. },
  259. "_ingest": {
  260. "timestamp": "2017-05-04T22:30:03.188Z"
  261. }
  262. }
  263. }
  264. ]
  265. }
  266. --------------------------------------------------
  267. // TESTRESPONSE[s/"2017-05-04T22:30:03.187Z"/$body.docs.0.doc._ingest.timestamp/]
  268. // TESTRESPONSE[s/"2017-05-04T22:30:03.188Z"/$body.docs.1.doc._ingest.timestamp/]
  269. [[ingest-verbose-param]]
  270. ==== Viewing Verbose Results
  271. You can use the simulate pipeline API to see how each processor affects the ingest document
  272. as it passes through the pipeline. To see the intermediate results of
  273. each processor in the simulate request, you can add the `verbose` parameter
  274. to the request.
  275. Here is an example of a verbose request and its response:
  276. [source,js]
  277. --------------------------------------------------
  278. POST _ingest/pipeline/_simulate?verbose
  279. {
  280. "pipeline" :
  281. {
  282. "description": "_description",
  283. "processors": [
  284. {
  285. "set" : {
  286. "field" : "field2",
  287. "value" : "_value2"
  288. }
  289. },
  290. {
  291. "set" : {
  292. "field" : "field3",
  293. "value" : "_value3"
  294. }
  295. }
  296. ]
  297. },
  298. "docs": [
  299. {
  300. "_index": "index",
  301. "_type": "_doc",
  302. "_id": "id",
  303. "_source": {
  304. "foo": "bar"
  305. }
  306. },
  307. {
  308. "_index": "index",
  309. "_type": "_doc",
  310. "_id": "id",
  311. "_source": {
  312. "foo": "rab"
  313. }
  314. }
  315. ]
  316. }
  317. --------------------------------------------------
  318. // CONSOLE
  319. Response:
  320. [source,js]
  321. --------------------------------------------------
  322. {
  323. "docs": [
  324. {
  325. "processor_results": [
  326. {
  327. "doc": {
  328. "_id": "id",
  329. "_index": "index",
  330. "_type": "_doc",
  331. "_source": {
  332. "field2": "_value2",
  333. "foo": "bar"
  334. },
  335. "_ingest": {
  336. "timestamp": "2017-05-04T22:46:09.674Z"
  337. }
  338. }
  339. },
  340. {
  341. "doc": {
  342. "_id": "id",
  343. "_index": "index",
  344. "_type": "_doc",
  345. "_source": {
  346. "field3": "_value3",
  347. "field2": "_value2",
  348. "foo": "bar"
  349. },
  350. "_ingest": {
  351. "timestamp": "2017-05-04T22:46:09.675Z"
  352. }
  353. }
  354. }
  355. ]
  356. },
  357. {
  358. "processor_results": [
  359. {
  360. "doc": {
  361. "_id": "id",
  362. "_index": "index",
  363. "_type": "_doc",
  364. "_source": {
  365. "field2": "_value2",
  366. "foo": "rab"
  367. },
  368. "_ingest": {
  369. "timestamp": "2017-05-04T22:46:09.676Z"
  370. }
  371. }
  372. },
  373. {
  374. "doc": {
  375. "_id": "id",
  376. "_index": "index",
  377. "_type": "_doc",
  378. "_source": {
  379. "field3": "_value3",
  380. "field2": "_value2",
  381. "foo": "rab"
  382. },
  383. "_ingest": {
  384. "timestamp": "2017-05-04T22:46:09.677Z"
  385. }
  386. }
  387. }
  388. ]
  389. }
  390. ]
  391. }
  392. --------------------------------------------------
  393. // TESTRESPONSE[s/"2017-05-04T22:46:09.674Z"/$body.docs.0.processor_results.0.doc._ingest.timestamp/]
  394. // TESTRESPONSE[s/"2017-05-04T22:46:09.675Z"/$body.docs.0.processor_results.1.doc._ingest.timestamp/]
  395. // TESTRESPONSE[s/"2017-05-04T22:46:09.676Z"/$body.docs.1.processor_results.0.doc._ingest.timestamp/]
  396. // TESTRESPONSE[s/"2017-05-04T22:46:09.677Z"/$body.docs.1.processor_results.1.doc._ingest.timestamp/]
  397. [[accessing-data-in-pipelines]]
  398. == Accessing Data in Pipelines
  399. The processors in a pipeline have read and write access to documents that pass through the pipeline.
  400. The processors can access fields in the source of a document and the document's metadata fields.
  401. [float]
  402. [[accessing-source-fields]]
  403. === Accessing Fields in the Source
  404. Accessing a field in the source is straightforward. You simply refer to fields by
  405. their name. For example:
  406. [source,js]
  407. --------------------------------------------------
  408. {
  409. "set": {
  410. "field": "my_field",
  411. "value": 582.1
  412. }
  413. }
  414. --------------------------------------------------
  415. // NOTCONSOLE
  416. On top of this, fields from the source are always accessible via the `_source` prefix:
  417. [source,js]
  418. --------------------------------------------------
  419. {
  420. "set": {
  421. "field": "_source.my_field",
  422. "value": 582.1
  423. }
  424. }
  425. --------------------------------------------------
  426. // NOTCONSOLE
  427. [float]
  428. [[accessing-metadata-fields]]
  429. === Accessing Metadata Fields
  430. You can access metadata fields in the same way that you access fields in the source. This
  431. is possible because Elasticsearch doesn't allow fields in the source that have the
  432. same name as metadata fields.
  433. The following example sets the `_id` metadata field of a document to `1`:
  434. [source,js]
  435. --------------------------------------------------
  436. {
  437. "set": {
  438. "field": "_id",
  439. "value": "1"
  440. }
  441. }
  442. --------------------------------------------------
  443. // NOTCONSOLE
  444. The following metadata fields are accessible by a processor: `_index`, `_type`, `_id`, `_routing`.
  445. [float]
  446. [[accessing-ingest-metadata]]
  447. === Accessing Ingest Metadata Fields
  448. Beyond metadata fields and source fields, ingest also adds ingest metadata to the documents that it processes.
  449. These metadata properties are accessible under the `_ingest` key. Currently ingest adds the ingest timestamp
  450. under the `_ingest.timestamp` key of the ingest metadata. The ingest timestamp is the time when Elasticsearch
  451. received the index or bulk request to pre-process the document.
  452. Any processor can add ingest-related metadata during document processing. Ingest metadata is transient
  453. and is lost after a document has been processed by the pipeline. Therefore, ingest metadata won't be indexed.
  454. The following example adds a field with the name `received`. The value is the ingest timestamp:
  455. [source,js]
  456. --------------------------------------------------
  457. {
  458. "set": {
  459. "field": "received",
  460. "value": "{{_ingest.timestamp}}"
  461. }
  462. }
  463. --------------------------------------------------
  464. // NOTCONSOLE
  465. Unlike Elasticsearch metadata fields, the ingest metadata field name `_ingest` can be used as a valid field name
  466. in the source of a document. Use `_source._ingest` to refer to the field in the source document. Otherwise, `_ingest`
  467. will be interpreted as an ingest metadata field.
  468. [float]
  469. [[accessing-template-fields]]
  470. === Accessing Fields and Metafields in Templates
  471. A number of processor settings also support templating. Settings that support templating can have zero or more
  472. template snippets. A template snippet begins with `{{` and ends with `}}`.
  473. Accessing fields and metafields in templates is exactly the same as via regular processor field settings.
  474. The following example adds a field named `field_c`. Its value is a concatenation of
  475. the values of `field_a` and `field_b`.
  476. [source,js]
  477. --------------------------------------------------
  478. {
  479. "set": {
  480. "field": "field_c",
  481. "value": "{{field_a}} {{field_b}}"
  482. }
  483. }
  484. --------------------------------------------------
  485. // NOTCONSOLE
  486. The following example uses the value of the `geoip.country_iso_code` field in the source
  487. to set the index that the document will be indexed into:
  488. [source,js]
  489. --------------------------------------------------
  490. {
  491. "set": {
  492. "field": "_index",
  493. "value": "{{geoip.country_iso_code}}"
  494. }
  495. }
  496. --------------------------------------------------
  497. // NOTCONSOLE
  498. Dynamic field names are also supported. This example sets the field named after the
  499. value of `service` to the value of the field `code`:
  500. [source,js]
  501. --------------------------------------------------
  502. {
  503. "set": {
  504. "field": "{{service}}",
  505. "value": "{{code}}"
  506. }
  507. }
  508. --------------------------------------------------
  509. // NOTCONSOLE
  510. [[ingest-conditionals]]
  511. == Conditional Execution in Pipelines
  512. Each processor allows for an optional `if` condition to determine if that
  513. processor should be executed or skipped. The value of the `if` is a
  514. <<modules-scripting-painless, Painless>> script that needs to evaluate
  515. to `true` or `false`.
  516. For example the following processor will <<drop-processor,drop>> the document
  517. (i.e. not index it) if the input document has a field named `network_name`
  518. and it is equal to `Guest`.
  519. [source,js]
  520. --------------------------------------------------
  521. PUT _ingest/pipeline/drop_guests_network
  522. {
  523. "processors": [
  524. {
  525. "drop": {
  526. "if": "ctx.network_name == 'Guest'"
  527. }
  528. }
  529. ]
  530. }
  531. --------------------------------------------------
  532. // CONSOLE
  533. Using that pipeline for an index request:
  534. [source,js]
  535. --------------------------------------------------
  536. POST test/_doc/1?pipeline=drop_guests_network
  537. {
  538. "network_name" : "Guest"
  539. }
  540. --------------------------------------------------
  541. // CONSOLE
  542. // TEST[continued]
  543. Results in nothing indexed since the conditional evaluated to `true`.
  544. [source,js]
  545. --------------------------------------------------
  546. {
  547. "_index": "test",
  548. "_type": "_doc",
  549. "_id": "1",
  550. "_version": -3,
  551. "result": "noop",
  552. "_shards": {
  553. "total": 0,
  554. "successful": 0,
  555. "failed": 0
  556. }
  557. }
  558. --------------------------------------------------
  559. // TESTRESPONSE
  560. [[ingest-conditional-nullcheck]]
  561. === Handling Nested Fields in Conditionals
  562. Source documents often contain nested fields. Care should be taken
  563. to avoid NullPointerExceptions if the parent object does not exist
  564. in the document. For example `ctx.a.b.c` can throw an NullPointerExceptions
  565. if the source document does not have top level `a` object, or a second
  566. level `b` object.
  567. To help protect against NullPointerExceptions, null safe operations should be used.
  568. Fortunately, Painless makes {painless}/painless-operators-reference.html#null-safe-operator[null safe]
  569. operations easy with the `?.` operator.
  570. [source,js]
  571. --------------------------------------------------
  572. PUT _ingest/pipeline/drop_guests_network
  573. {
  574. "processors": [
  575. {
  576. "drop": {
  577. "if": "ctx.network?.name == 'Guest'"
  578. }
  579. }
  580. ]
  581. }
  582. --------------------------------------------------
  583. // CONSOLE
  584. The following document will get <<drop-processor,dropped>> correctly:
  585. [source,js]
  586. --------------------------------------------------
  587. POST test/_doc/1?pipeline=drop_guests_network
  588. {
  589. "network": {
  590. "name": "Guest"
  591. }
  592. }
  593. --------------------------------------------------
  594. // CONSOLE
  595. // TEST[continued]
  596. ////
  597. Hidden example assertion:
  598. [source,js]
  599. --------------------------------------------------
  600. GET test/_doc/1
  601. --------------------------------------------------
  602. // CONSOLE
  603. // TEST[continued]
  604. // TEST[catch:missing]
  605. [source,js]
  606. --------------------------------------------------
  607. {
  608. "_index": "test",
  609. "_type": "_doc",
  610. "_id": "1",
  611. "found": false
  612. }
  613. --------------------------------------------------
  614. // TESTRESPONSE
  615. ////
  616. Thanks to the `?.` operator the following document will not throw an error.
  617. If the pipeline used a `.` the following document would throw a NullPointerException
  618. since the `network` object is not part of the source document.
  619. [source,js]
  620. --------------------------------------------------
  621. POST test/_doc/2?pipeline=drop_guests_network
  622. {
  623. "foo" : "bar"
  624. }
  625. --------------------------------------------------
  626. // CONSOLE
  627. // TEST[continued]
  628. ////
  629. Hidden example assertion:
  630. [source,js]
  631. --------------------------------------------------
  632. GET test/_doc/2
  633. --------------------------------------------------
  634. // CONSOLE
  635. // TEST[continued]
  636. [source,js]
  637. --------------------------------------------------
  638. {
  639. "_index": "test",
  640. "_type": "_doc",
  641. "_id": "2",
  642. "_version": 1,
  643. "found": true,
  644. "_source": {
  645. "foo": "bar"
  646. }
  647. }
  648. --------------------------------------------------
  649. // TESTRESPONSE
  650. ////
  651. The source document can also use dot delimited fields to represent nested fields.
  652. For example instead the source document defining the fields nested:
  653. [source,js]
  654. --------------------------------------------------
  655. {
  656. "network": {
  657. "name": "Guest"
  658. }
  659. }
  660. --------------------------------------------------
  661. // NOTCONSOLE
  662. The source document may have the nested fields flattened as such:
  663. [source,js]
  664. --------------------------------------------------
  665. {
  666. "network.name": "Guest"
  667. }
  668. --------------------------------------------------
  669. // NOTCONSOLE
  670. If this is the case, use the <<dot-expand-processor, Dot Expand Processor>>
  671. so that the nested fields may be used in a conditional.
  672. [source,js]
  673. --------------------------------------------------
  674. PUT _ingest/pipeline/drop_guests_network
  675. {
  676. "processors": [
  677. {
  678. "dot_expander": {
  679. "field": "network.name"
  680. }
  681. },
  682. {
  683. "drop": {
  684. "if": "ctx.network?.name == 'Guest'"
  685. }
  686. }
  687. ]
  688. }
  689. --------------------------------------------------
  690. // CONSOLE
  691. Now the following input document can be used with a conditional in the pipeline.
  692. [source,js]
  693. --------------------------------------------------
  694. POST test/_doc/3?pipeline=drop_guests_network
  695. {
  696. "network.name": "Guest"
  697. }
  698. --------------------------------------------------
  699. // CONSOLE
  700. // TEST[continued]
  701. ////
  702. Hidden example assertion:
  703. [source,js]
  704. --------------------------------------------------
  705. GET test/_doc/3
  706. --------------------------------------------------
  707. // CONSOLE
  708. // TEST[continued]
  709. // TEST[catch:missing]
  710. [source,js]
  711. --------------------------------------------------
  712. {
  713. "_index": "test",
  714. "_type": "_doc",
  715. "_id": "3",
  716. "found": false
  717. }
  718. --------------------------------------------------
  719. // TESTRESPONSE
  720. ////
  721. The `?.` operators works well for use in the `if` conditional
  722. because the {painless}/painless-operators-reference.html#null-safe-operator[null safe operator]
  723. returns null if the object is null and `==` is null safe (as well as many other
  724. {painless}/painless-operators.html[painless operators]).
  725. However, calling a method such as `.equalsIgnoreCase` is not null safe
  726. and can result in a NullPointerException.
  727. Some situations allow for the same functionality but done so in a null safe manner.
  728. For example: `'Guest'.equalsIgnoreCase(ctx.network?.name)` is null safe because
  729. `Guest` is always non null, but `ctx.network?.name.equalsIgnoreCase('Guest')` is not null safe
  730. since `ctx.network?.name` can return null.
  731. Some situations require an explicit null check. In the following example there
  732. is not null safe alternative, so an explict null check is needed.
  733. [source,js]
  734. --------------------------------------------------
  735. {
  736. "drop": {
  737. "if": "ctx.network?.name != null && ctx.network.name.contains('Guest')"
  738. }
  739. }
  740. --------------------------------------------------
  741. // NOTCONSOLE
  742. [[ingest-conditional-complex]]
  743. === Complex Conditionals
  744. The `if` condition can be more then a simple equality check.
  745. The full power of the <<modules-scripting-painless, Painless Scripting Language>> is available and
  746. running in the {painless}/painless-ingest-processor-context.html[ingest processor context].
  747. IMPORTANT: The value of ctx is read-only in `if` conditions.
  748. A more complex `if` condition that drops the document (i.e. not index it)
  749. unless it has a multi-valued tag field with at least one value that contains the characters
  750. `prod` (case insensitive).
  751. [source,js]
  752. --------------------------------------------------
  753. PUT _ingest/pipeline/not_prod_dropper
  754. {
  755. "processors": [
  756. {
  757. "drop": {
  758. "if": "Collection tags = ctx.tags;if(tags != null){for (String tag : tags) {if (tag.toLowerCase().contains('prod')) { return false;}}} return true;"
  759. }
  760. }
  761. ]
  762. }
  763. --------------------------------------------------
  764. // CONSOLE
  765. The conditional needs to be all on one line since JSON does not
  766. support new line characters. However, Kibana's console supports
  767. a triple quote syntax to help with writing and debugging
  768. scripts like these.
  769. [source,js]
  770. --------------------------------------------------
  771. PUT _ingest/pipeline/not_prod_dropper
  772. {
  773. "processors": [
  774. {
  775. "drop": {
  776. "if": """
  777. Collection tags = ctx.tags;
  778. if(tags != null){
  779. for (String tag : tags) {
  780. if (tag.toLowerCase().contains('prod')) {
  781. return false;
  782. }
  783. }
  784. }
  785. return true;
  786. """
  787. }
  788. }
  789. ]
  790. }
  791. --------------------------------------------------
  792. // NOTCONSOLE
  793. // TEST[continued]
  794. [source,js]
  795. --------------------------------------------------
  796. POST test/_doc/1?pipeline=not_prod_dropper
  797. {
  798. "tags": ["application:myapp", "env:Stage"]
  799. }
  800. --------------------------------------------------
  801. // CONSOLE
  802. // TEST[continued]
  803. The document is <<drop-processor,dropped>> since `prod` (case insensitive)
  804. is not found in the tags.
  805. ////
  806. Hidden example assertion:
  807. [source,js]
  808. --------------------------------------------------
  809. GET test/_doc/1
  810. --------------------------------------------------
  811. // CONSOLE
  812. // TEST[continued]
  813. // TEST[catch:missing]
  814. [source,js]
  815. --------------------------------------------------
  816. {
  817. "_index": "test",
  818. "_type": "_doc",
  819. "_id": "1",
  820. "found": false
  821. }
  822. --------------------------------------------------
  823. // TESTRESPONSE
  824. ////
  825. The following document is indexed (i.e. not dropped) since
  826. `prod` (case insensitive) is found in the tags.
  827. [source,js]
  828. --------------------------------------------------
  829. POST test/_doc/2?pipeline=not_prod_dropper
  830. {
  831. "tags": ["application:myapp", "env:Production"]
  832. }
  833. --------------------------------------------------
  834. // CONSOLE
  835. // TEST[continued]
  836. ////
  837. Hidden example assertion:
  838. [source,js]
  839. --------------------------------------------------
  840. GET test/_doc/2
  841. --------------------------------------------------
  842. // CONSOLE
  843. // TEST[continued]
  844. [source,js]
  845. --------------------------------------------------
  846. {
  847. "_index": "test",
  848. "_type": "_doc",
  849. "_id": "2",
  850. "_version": 1,
  851. "found": true,
  852. "_source": {
  853. "tags": [
  854. "application:myapp",
  855. "env:Production"
  856. ]
  857. }
  858. }
  859. --------------------------------------------------
  860. // TESTRESPONSE
  861. ////
  862. The <<simulate-pipeline-api>> with verbose can be used to help build out
  863. complex conditionals. If the conditional evaluates to false it will be
  864. omitted from the verbose results of the simulation since the document will not change.
  865. Care should be taken to avoid overly complex or expensive conditional checks
  866. since the condition needs to be checked for each and every document.
  867. [[conditionals-with-multiple-pipelines]]
  868. === Conditionals with the Pipeline Processor
  869. The combination of the `if` conditional and the <<pipeline-processor>> can result in a simple,
  870. yet powerful means to process heterogeneous input. For example, you can define a single pipeline
  871. that delegates to other pipelines based on some criteria.
  872. [source,js]
  873. --------------------------------------------------
  874. PUT _ingest/pipeline/logs_pipeline
  875. {
  876. "description": "A pipeline of pipelines for log files",
  877. "version": 1,
  878. "processors": [
  879. {
  880. "pipeline": {
  881. "if": "ctx.service?.name == 'apache_httpd'",
  882. "name": "httpd_pipeline"
  883. }
  884. },
  885. {
  886. "pipeline": {
  887. "if": "ctx.service?.name == 'syslog'",
  888. "name": "syslog_pipeline"
  889. }
  890. },
  891. {
  892. "fail": {
  893. "message": "This pipeline requires service.name to be either `syslog` or `apache_httpd`"
  894. }
  895. }
  896. ]
  897. }
  898. --------------------------------------------------
  899. // CONSOLE
  900. The above example allows consumers to point to a single pipeline for all log based index requests.
  901. Based on the conditional, the correct pipeline will be called to process that type of data.
  902. This pattern works well with a <<dynamic-index-settings, default pipeline>> defined in an index mapping
  903. template for all indexes that hold data that needs pre-index processing.
  904. [[conditionals-with-regex]]
  905. === Conditionals with the Regular Expressions
  906. The `if` conditional is implemented as a Painless script, which requires
  907. {painless}//painless-examples.html#modules-scripting-painless-regex[explicit support for regular expressions].
  908. `script.painless.regex.enabled: true` must be set in `elasticsearch.yml` to use regular
  909. expressions in the `if` condition.
  910. If regular expressions are enabled, operators such as `=~` can be used against a `/pattern/` for conditions.
  911. For example:
  912. [source,js]
  913. --------------------------------------------------
  914. PUT _ingest/pipeline/check_url
  915. {
  916. "processors": [
  917. {
  918. "set": {
  919. "if": "ctx.href?.url =~ /^http[^s]/",
  920. "field": "href.insecure",
  921. "value": true
  922. }
  923. }
  924. ]
  925. }
  926. --------------------------------------------------
  927. // CONSOLE
  928. [source,js]
  929. --------------------------------------------------
  930. POST test/_doc/1?pipeline=check_url
  931. {
  932. "href": {
  933. "url": "http://www.elastic.co/"
  934. }
  935. }
  936. --------------------------------------------------
  937. // CONSOLE
  938. // TEST[continued]
  939. Results in:
  940. ////
  941. Hidden example assertion:
  942. [source,js]
  943. --------------------------------------------------
  944. GET test/_doc/1
  945. --------------------------------------------------
  946. // CONSOLE
  947. // TEST[continued]
  948. ////
  949. [source,js]
  950. --------------------------------------------------
  951. {
  952. "_index": "test",
  953. "_type": "_doc",
  954. "_id": "1",
  955. "_version": 1,
  956. "found": true,
  957. "_source": {
  958. "href": {
  959. "insecure": true,
  960. "url": "http://www.elastic.co/"
  961. }
  962. }
  963. }
  964. --------------------------------------------------
  965. // TESTRESPONSE
  966. Regular expressions can be expensive and should be avoided if viable
  967. alternatives exist.
  968. For example in this case `startsWith` can be used to get the same result
  969. without using a regular expression:
  970. [source,js]
  971. --------------------------------------------------
  972. PUT _ingest/pipeline/check_url
  973. {
  974. "processors": [
  975. {
  976. "set": {
  977. "if": "ctx.href?.url != null && ctx.href.url.startsWith('http://')",
  978. "field": "href.insecure",
  979. "value": true
  980. }
  981. }
  982. ]
  983. }
  984. --------------------------------------------------
  985. // CONSOLE
  986. [[handling-failure-in-pipelines]]
  987. == Handling Failures in Pipelines
  988. In its simplest use case, a pipeline defines a list of processors that
  989. are executed sequentially, and processing halts at the first exception. This
  990. behavior may not be desirable when failures are expected. For example, you may have logs
  991. that don't match the specified grok expression. Instead of halting execution, you may
  992. want to index such documents into a separate index.
  993. To enable this behavior, you can use the `on_failure` parameter. The `on_failure` parameter
  994. defines a list of processors to be executed immediately following the failed processor.
  995. You can specify this parameter at the pipeline level, as well as at the processor
  996. level. If a processor specifies an `on_failure` configuration, whether
  997. it is empty or not, any exceptions that are thrown by the processor are caught, and the
  998. pipeline continues executing the remaining processors. Because you can define further processors
  999. within the scope of an `on_failure` statement, you can nest failure handling.
  1000. The following example defines a pipeline that renames the `foo` field in
  1001. the processed document to `bar`. If the document does not contain the `foo` field, the processor
  1002. attaches an error message to the document for later analysis within
  1003. Elasticsearch.
  1004. [source,js]
  1005. --------------------------------------------------
  1006. {
  1007. "description" : "my first pipeline with handled exceptions",
  1008. "processors" : [
  1009. {
  1010. "rename" : {
  1011. "field" : "foo",
  1012. "target_field" : "bar",
  1013. "on_failure" : [
  1014. {
  1015. "set" : {
  1016. "field" : "error",
  1017. "value" : "field \"foo\" does not exist, cannot rename to \"bar\""
  1018. }
  1019. }
  1020. ]
  1021. }
  1022. }
  1023. ]
  1024. }
  1025. --------------------------------------------------
  1026. // NOTCONSOLE
  1027. The following example defines an `on_failure` block on a whole pipeline to change
  1028. the index to which failed documents get sent.
  1029. [source,js]
  1030. --------------------------------------------------
  1031. {
  1032. "description" : "my first pipeline with handled exceptions",
  1033. "processors" : [ ... ],
  1034. "on_failure" : [
  1035. {
  1036. "set" : {
  1037. "field" : "_index",
  1038. "value" : "failed-{{ _index }}"
  1039. }
  1040. }
  1041. ]
  1042. }
  1043. --------------------------------------------------
  1044. // NOTCONSOLE
  1045. Alternatively instead of defining behaviour in case of processor failure, it is also possible
  1046. to ignore a failure and continue with the next processor by specifying the `ignore_failure` setting.
  1047. In case in the example below the field `foo` doesn't exist the failure will be caught and the pipeline
  1048. continues to execute, which in this case means that the pipeline does nothing.
  1049. [source,js]
  1050. --------------------------------------------------
  1051. {
  1052. "description" : "my first pipeline with handled exceptions",
  1053. "processors" : [
  1054. {
  1055. "rename" : {
  1056. "field" : "foo",
  1057. "target_field" : "bar",
  1058. "ignore_failure" : true
  1059. }
  1060. }
  1061. ]
  1062. }
  1063. --------------------------------------------------
  1064. // NOTCONSOLE
  1065. The `ignore_failure` can be set on any processor and defaults to `false`.
  1066. [float]
  1067. [[accessing-error-metadata]]
  1068. === Accessing Error Metadata From Processors Handling Exceptions
  1069. You may want to retrieve the actual error message that was thrown
  1070. by a failed processor. To do so you can access metadata fields called
  1071. `on_failure_message`, `on_failure_processor_type`, and `on_failure_processor_tag`. These fields are only accessible
  1072. from within the context of an `on_failure` block.
  1073. Here is an updated version of the example that you
  1074. saw earlier. But instead of setting the error message manually, the example leverages the `on_failure_message`
  1075. metadata field to provide the error message.
  1076. [source,js]
  1077. --------------------------------------------------
  1078. {
  1079. "description" : "my first pipeline with handled exceptions",
  1080. "processors" : [
  1081. {
  1082. "rename" : {
  1083. "field" : "foo",
  1084. "to" : "bar",
  1085. "on_failure" : [
  1086. {
  1087. "set" : {
  1088. "field" : "error",
  1089. "value" : "{{ _ingest.on_failure_message }}"
  1090. }
  1091. }
  1092. ]
  1093. }
  1094. }
  1095. ]
  1096. }
  1097. --------------------------------------------------
  1098. // NOTCONSOLE
  1099. [[ingest-processors]]
  1100. == Processors
  1101. All processors are defined in the following way within a pipeline definition:
  1102. [source,js]
  1103. --------------------------------------------------
  1104. {
  1105. "PROCESSOR_NAME" : {
  1106. ... processor configuration options ...
  1107. }
  1108. }
  1109. --------------------------------------------------
  1110. // NOTCONSOLE
  1111. Each processor defines its own configuration parameters, but all processors have
  1112. the ability to declare `tag`, `on_failure` and `if` fields. These fields are optional.
  1113. A `tag` is simply a string identifier of the specific instantiation of a certain
  1114. processor in a pipeline. The `tag` field does not affect the processor's behavior,
  1115. but is very useful for bookkeeping and tracing errors to specific processors.
  1116. The `if` field must contain a script that returns a boolean value. If the script evaluates to `true`
  1117. then the processor will be executed for the given document otherwise it will be skipped.
  1118. The `if` field takes an object with the script fields defined in <<script-processor, script-options>>
  1119. and accesses a read only version of the document via the same `ctx` variable used by scripts in the
  1120. <<script-processor>>.
  1121. [source,js]
  1122. --------------------------------------------------
  1123. {
  1124. "set": {
  1125. "if": "ctx.foo == 'someValue'",
  1126. "field": "found",
  1127. "value": true
  1128. }
  1129. }
  1130. --------------------------------------------------
  1131. // NOTCONSOLE
  1132. See <<ingest-conditionals>> to learn more about the `if` field and conditional execution.
  1133. See <<handling-failure-in-pipelines>> to learn more about the `on_failure` field and error handling in pipelines.
  1134. The <<ingest-info,node info API>> can be used to figure out what processors are available in a cluster.
  1135. The <<ingest-info,node info API>> will provide a per node list of what processors are available.
  1136. Custom processors must be installed on all nodes. The put pipeline API will fail if a processor specified in a pipeline
  1137. doesn't exist on all nodes. If you rely on custom processor plugins make sure to mark these plugins as mandatory by adding
  1138. `plugin.mandatory` setting to the `config/elasticsearch.yml` file, for example:
  1139. [source,yaml]
  1140. --------------------------------------------------
  1141. plugin.mandatory: ingest-attachment,ingest-geoip
  1142. --------------------------------------------------
  1143. A node will not start if either of these plugins are not available.
  1144. The <<ingest-stats,node stats API>> can be used to fetch ingest usage statistics, globally and on a per
  1145. pipeline basis. Useful to find out which pipelines are used the most or spent the most time on preprocessing.
  1146. [float]
  1147. === Ingest Processor Plugins
  1148. Additional ingest processors can be implemented and installed as Elasticsearch {plugins}/intro.html[plugins].
  1149. See {plugins}/ingest.html[Ingest plugins] for information about the available ingest plugins.
  1150. [[append-processor]]
  1151. === Append Processor
  1152. Appends one or more values to an existing array if the field already exists and it is an array.
  1153. Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar.
  1154. Creates an array containing the provided values if the field doesn't exist.
  1155. Accepts a single value or an array of values.
  1156. [[append-options]]
  1157. .Append Options
  1158. [options="header"]
  1159. |======
  1160. | Name | Required | Default | Description
  1161. | `field` | yes | - | The field to be appended to. Supports <<accessing-template-fields,template snippets>>.
  1162. | `value` | yes | - | The value to be appended. Supports <<accessing-template-fields,template snippets>>.
  1163. include::ingest-node-common-processor.asciidoc[]
  1164. |======
  1165. [source,js]
  1166. --------------------------------------------------
  1167. {
  1168. "append": {
  1169. "field": "tags",
  1170. "value": ["production", "{{app}}", "{{owner}}"]
  1171. }
  1172. }
  1173. --------------------------------------------------
  1174. // NOTCONSOLE
  1175. [[bytes-processor]]
  1176. === Bytes Processor
  1177. Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024).
  1178. Supported human readable units are "b", "kb", "mb", "gb", "tb", "pb" case insensitive. An error will occur if
  1179. the field is not a supported format or resultant value exceeds 2^63.
  1180. [[bytes-options]]
  1181. .Bytes Options
  1182. [options="header"]
  1183. |======
  1184. | Name | Required | Default | Description
  1185. | `field` | yes | - | The field to convert
  1186. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  1187. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1188. include::ingest-node-common-processor.asciidoc[]
  1189. |======
  1190. [source,js]
  1191. --------------------------------------------------
  1192. {
  1193. "bytes": {
  1194. "field": "file.size"
  1195. }
  1196. }
  1197. --------------------------------------------------
  1198. // NOTCONSOLE
  1199. [[convert-processor]]
  1200. === Convert Processor
  1201. Converts a field in the currently ingested document to a different type, such as converting a string to an integer.
  1202. If the field value is an array, all members will be converted.
  1203. The supported types include: `integer`, `long`, `float`, `double`, `string`, `boolean`, and `auto`.
  1204. Specifying `boolean` will set the field to true if its string value is equal to `true` (ignore case), to
  1205. false if its string value is equal to `false` (ignore case), or it will throw an exception otherwise.
  1206. Specifying `auto` will attempt to convert the string-valued `field` into the closest non-string type.
  1207. For example, a field whose value is `"true"` will be converted to its respective boolean type: `true`. Do note
  1208. that float takes precedence of double in `auto`. A value of `"242.15"` will "automatically" be converted to
  1209. `242.15` of type `float`. If a provided field cannot be appropriately converted, the Convert Processor will
  1210. still process successfully and leave the field value as-is. In such a case, `target_field` will
  1211. still be updated with the unconverted field value.
  1212. [[convert-options]]
  1213. .Convert Options
  1214. [options="header"]
  1215. |======
  1216. | Name | Required | Default | Description
  1217. | `field` | yes | - | The field whose value is to be converted
  1218. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  1219. | `type` | yes | - | The type to convert the existing value to
  1220. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1221. include::ingest-node-common-processor.asciidoc[]
  1222. |======
  1223. [source,js]
  1224. --------------------------------------------------
  1225. PUT _ingest/pipeline/my-pipeline-id
  1226. {
  1227. "description": "converts the content of the id field to an integer",
  1228. "processors" : [
  1229. {
  1230. "convert" : {
  1231. "field" : "id",
  1232. "type": "integer"
  1233. }
  1234. }
  1235. ]
  1236. }
  1237. --------------------------------------------------
  1238. // NOTCONSOLE
  1239. [[date-processor]]
  1240. === Date Processor
  1241. Parses dates from fields, and then uses the date or timestamp as the timestamp for the document.
  1242. By default, the date processor adds the parsed date as a new field called `@timestamp`. You can specify a
  1243. different field by setting the `target_field` configuration parameter. Multiple date formats are supported
  1244. as part of the same date processor definition. They will be used sequentially to attempt parsing the date field,
  1245. in the same order they were defined as part of the processor definition.
  1246. [[date-options]]
  1247. .Date options
  1248. [options="header"]
  1249. |======
  1250. | Name | Required | Default | Description
  1251. | `field` | yes | - | The field to get the date from.
  1252. | `target_field` | no | @timestamp | The field that will hold the parsed date.
  1253. | `formats` | yes | - | An array of the expected date formats. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
  1254. | `timezone` | no | UTC | The timezone to use when parsing the date. Supports <<accessing-template-fields,template snippets>>.
  1255. | `locale` | no | ENGLISH | The locale to use when parsing the date, relevant when parsing month names or week days. Supports <<accessing-template-fields,template snippets>>.
  1256. include::ingest-node-common-processor.asciidoc[]
  1257. |======
  1258. Here is an example that adds the parsed date to the `timestamp` field based on the `initial_date` field:
  1259. [source,js]
  1260. --------------------------------------------------
  1261. {
  1262. "description" : "...",
  1263. "processors" : [
  1264. {
  1265. "date" : {
  1266. "field" : "initial_date",
  1267. "target_field" : "timestamp",
  1268. "formats" : ["dd/MM/yyyy hh:mm:ss"],
  1269. "timezone" : "Europe/Amsterdam"
  1270. }
  1271. }
  1272. ]
  1273. }
  1274. --------------------------------------------------
  1275. // NOTCONSOLE
  1276. The `timezone` and `locale` processor parameters are templated. This means that their values can be
  1277. extracted from fields within documents. The example below shows how to extract the locale/timezone
  1278. details from existing fields, `my_timezone` and `my_locale`, in the ingested document that contain
  1279. the timezone and locale values.
  1280. [source,js]
  1281. --------------------------------------------------
  1282. {
  1283. "description" : "...",
  1284. "processors" : [
  1285. {
  1286. "date" : {
  1287. "field" : "initial_date",
  1288. "target_field" : "timestamp",
  1289. "formats" : ["ISO8601"],
  1290. "timezone" : "{{my_timezone}}",
  1291. "locale" : "{{my_locale}}"
  1292. }
  1293. }
  1294. ]
  1295. }
  1296. --------------------------------------------------
  1297. // NOTCONSOLE
  1298. [[date-index-name-processor]]
  1299. === Date Index Name Processor
  1300. The purpose of this processor is to point documents to the right time based index based
  1301. on a date or timestamp field in a document by using the <<date-math-index-names, date math index name support>>.
  1302. The processor sets the `_index` meta field with a date math index name expression based on the provided index name
  1303. prefix, a date or timestamp field in the documents being processed and the provided date rounding.
  1304. First, this processor fetches the date or timestamp from a field in the document being processed. Optionally,
  1305. date formatting can be configured on how the field's value should be parsed into a date. Then this date,
  1306. the provided index name prefix and the provided date rounding get formatted into a date math index name expression.
  1307. Also here optionally date formatting can be specified on how the date should be formatted into a date math index name
  1308. expression.
  1309. An example pipeline that points documents to a monthly index that starts with a `myindex-` prefix based on a
  1310. date in the `date1` field:
  1311. [source,js]
  1312. --------------------------------------------------
  1313. PUT _ingest/pipeline/monthlyindex
  1314. {
  1315. "description": "monthly date-time index naming",
  1316. "processors" : [
  1317. {
  1318. "date_index_name" : {
  1319. "field" : "date1",
  1320. "index_name_prefix" : "myindex-",
  1321. "date_rounding" : "M"
  1322. }
  1323. }
  1324. ]
  1325. }
  1326. --------------------------------------------------
  1327. // CONSOLE
  1328. Using that pipeline for an index request:
  1329. [source,js]
  1330. --------------------------------------------------
  1331. PUT /myindex/_doc/1?pipeline=monthlyindex
  1332. {
  1333. "date1" : "2016-04-25T12:02:01.789Z"
  1334. }
  1335. --------------------------------------------------
  1336. // CONSOLE
  1337. // TEST[continued]
  1338. [source,js]
  1339. --------------------------------------------------
  1340. {
  1341. "_index" : "myindex-2016-04-01",
  1342. "_type" : "_doc",
  1343. "_id" : "1",
  1344. "_version" : 1,
  1345. "result" : "created",
  1346. "_shards" : {
  1347. "total" : 2,
  1348. "successful" : 1,
  1349. "failed" : 0
  1350. },
  1351. "_seq_no" : 0,
  1352. "_primary_term" : 1
  1353. }
  1354. --------------------------------------------------
  1355. // TESTRESPONSE
  1356. The above request will not index this document into the `myindex` index, but into the `myindex-2016-04-01` index because
  1357. it was rounded by month. This is because the date-index-name-processor overrides the `_index` property of the document.
  1358. To see the date-math value of the index supplied in the actual index request which resulted in the above document being
  1359. indexed into `myindex-2016-04-01` we can inspect the effects of the processor using a simulate request.
  1360. [source,js]
  1361. --------------------------------------------------
  1362. POST _ingest/pipeline/_simulate
  1363. {
  1364. "pipeline" :
  1365. {
  1366. "description": "monthly date-time index naming",
  1367. "processors" : [
  1368. {
  1369. "date_index_name" : {
  1370. "field" : "date1",
  1371. "index_name_prefix" : "myindex-",
  1372. "date_rounding" : "M"
  1373. }
  1374. }
  1375. ]
  1376. },
  1377. "docs": [
  1378. {
  1379. "_source": {
  1380. "date1": "2016-04-25T12:02:01.789Z"
  1381. }
  1382. }
  1383. ]
  1384. }
  1385. --------------------------------------------------
  1386. // CONSOLE
  1387. and the result:
  1388. [source,js]
  1389. --------------------------------------------------
  1390. {
  1391. "docs" : [
  1392. {
  1393. "doc" : {
  1394. "_id" : "_id",
  1395. "_index" : "<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>",
  1396. "_type" : "_type",
  1397. "_source" : {
  1398. "date1" : "2016-04-25T12:02:01.789Z"
  1399. },
  1400. "_ingest" : {
  1401. "timestamp" : "2016-11-08T19:43:03.850+0000"
  1402. }
  1403. }
  1404. }
  1405. ]
  1406. }
  1407. --------------------------------------------------
  1408. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  1409. The above example shows that `_index` was set to `<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>`. Elasticsearch
  1410. understands this to mean `2016-04-01` as is explained in the <<date-math-index-names, date math index name documentation>>
  1411. [[date-index-name-options]]
  1412. .Date index name options
  1413. [options="header"]
  1414. |======
  1415. | Name | Required | Default | Description
  1416. | `field` | yes | - | The field to get the date or timestamp from.
  1417. | `index_name_prefix` | no | - | A prefix of the index name to be prepended before the printed date. Supports <<accessing-template-fields,template snippets>>.
  1418. | `date_rounding` | yes | - | How to round the date when formatting the date into the index name. Valid values are: `y` (year), `M` (month), `w` (week), `d` (day), `h` (hour), `m` (minute) and `s` (second). Supports <<accessing-template-fields,template snippets>>.
  1419. | `date_formats` | no | yyyy-MM-dd'T'HH:mm:ss.SSSZ | An array of the expected date formats for parsing dates / timestamps in the document being preprocessed. Can be a Joda pattern or one of the following formats: ISO8601, UNIX, UNIX_MS, or TAI64N.
  1420. | `timezone` | no | UTC | The timezone to use when parsing the date and when date math index supports resolves expressions into concrete index names.
  1421. | `locale` | no | ENGLISH | The locale to use when parsing the date from the document being preprocessed, relevant when parsing month names or week days.
  1422. | `index_name_format` | no | yyyy-MM-dd | The format to be used when printing the parsed date into the index name. An valid Joda pattern is expected here. Supports <<accessing-template-fields,template snippets>>.
  1423. include::ingest-node-common-processor.asciidoc[]
  1424. |======
  1425. [[dissect-processor]]
  1426. === Dissect Processor
  1427. Similar to the <<grok-processor,Grok Processor>>, dissect also extracts structured fields out of a single text field
  1428. within a document. However unlike the <<grok-processor,Grok Processor>>, dissect does not use
  1429. https://en.wikipedia.org/wiki/Regular_expression[Regular Expressions]. This allows dissect's syntax to be simple and for
  1430. some cases faster than the <<grok-processor,Grok Processor>>.
  1431. Dissect matches a single text field against a defined pattern.
  1432. For example the following pattern:
  1433. [source,txt]
  1434. --------------------------------------------------
  1435. %{clientip} %{ident} %{auth} [%{@timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{status} %{size}
  1436. --------------------------------------------------
  1437. will match a log line of this format:
  1438. [source,txt]
  1439. --------------------------------------------------
  1440. 1.2.3.4 - - [30/Apr/1998:22:00:52 +0000] \"GET /english/venues/cities/images/montpellier/18.gif HTTP/1.0\" 200 3171
  1441. --------------------------------------------------
  1442. and result in a document with the following fields:
  1443. [source,js]
  1444. --------------------------------------------------
  1445. "doc": {
  1446. "_index": "_index",
  1447. "_type": "_type",
  1448. "_id": "_id",
  1449. "_source": {
  1450. "request": "/english/venues/cities/images/montpellier/18.gif",
  1451. "auth": "-",
  1452. "ident": "-",
  1453. "verb": "GET",
  1454. "@timestamp": "30/Apr/1998:22:00:52 +0000",
  1455. "size": "3171",
  1456. "clientip": "1.2.3.4",
  1457. "httpversion": "1.0",
  1458. "status": "200"
  1459. }
  1460. }
  1461. --------------------------------------------------
  1462. // NOTCONSOLE
  1463. A dissect pattern is defined by the parts of the string that will be discarded. In the example above the first part
  1464. to be discarded is a single space. Dissect finds this space, then assigns the value of `clientip` is everything up
  1465. until that space.
  1466. Later dissect matches the `[` and then `]` and then assigns `@timestamp` to everything in-between `[` and `]`.
  1467. Paying special attention the parts of the string to discard will help build successful dissect patterns.
  1468. Successful matches require all keys in a pattern to have a value. If any of the `%{keyname}` defined in the pattern do
  1469. not have a value, then an exception is thrown and may be handled by the <<handling-failure-in-pipelines,on_falure>> directive.
  1470. An empty key `%{}` or a <<dissect-modifier-named-skip-key, named skip key>> can be used to match values, but exclude the value from
  1471. the final document. All matched values are represented as string data types. The <<convert-processor, convert processor>>
  1472. may be used to convert to expected data type.
  1473. Dissect also supports <<dissect-key-modifiers,key modifiers>> that can change dissect's default
  1474. behavior. For example you can instruct dissect to ignore certain fields, append fields, skip over padding, etc.
  1475. See <<dissect-key-modifiers, below>> for more information.
  1476. [[dissect-options]]
  1477. .Dissect Options
  1478. [options="header"]
  1479. |======
  1480. | Name | Required | Default | Description
  1481. | `field` | yes | - | The field to dissect
  1482. | `pattern` | yes | - | The pattern to apply to the field
  1483. | `append_separator`| no | "" (empty string) | The character(s) that separate the appended fields.
  1484. | `ignore_missing` | no | false | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1485. include::ingest-node-common-processor.asciidoc[]
  1486. |======
  1487. [source,js]
  1488. --------------------------------------------------
  1489. {
  1490. "dissect": {
  1491. "field": "message",
  1492. "pattern" : "%{clientip} %{ident} %{auth} [%{@timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{status} %{size}"
  1493. }
  1494. }
  1495. --------------------------------------------------
  1496. // NOTCONSOLE
  1497. [[dissect-key-modifiers]]
  1498. ==== Dissect key modifiers
  1499. Key modifiers can change the default behavior for dissection. Key modifiers may be found on the left or right
  1500. of the `%{keyname}` always inside the `%{` and `}`. For example `%{+keyname ->}` has the append and right padding
  1501. modifiers.
  1502. .Dissect Key Modifiers
  1503. [options="header"]
  1504. |======
  1505. | Modifier | Name | Position | Example | Description | Details
  1506. | `->` | Skip right padding | (far) right | `%{keyname1->}` | Skips any repeated characters to the right | <<dissect-modifier-skip-right-padding,link>>
  1507. | `+` | Append | left | `%{+keyname} %{+keyname}` | Appends two or more fields together | <<dissect-modifier-append-key,link>>
  1508. | `+` with `/n` | Append with order | left and right | `%{+keyname/2} %{+keyname/1}` | Appends two or more fields together in the order specified | <<dissect-modifier-append-key-with-order,link>>
  1509. | `?` | Named skip key | left | `%{?ignoreme}` | Skips the matched value in the output. Same behavior as `%{}`| <<dissect-modifier-named-skip-key,link>>
  1510. | `*` and `&` | Reference keys | left | `%{*r1} %{&r1}` | Sets the output key as value of `*` and output value of `&` | <<dissect-modifier-reference-keys,link>>
  1511. |======
  1512. [[dissect-modifier-skip-right-padding]]
  1513. ===== Right padding modifier (`->`)
  1514. The algorithm that performs the dissection is very strict in that it requires all characters in the pattern to match
  1515. the source string. For example, the pattern `%{fookey} %{barkey}` (1 space), will match the string "foo{nbsp}bar"
  1516. (1 space), but will not match the string "foo{nbsp}{nbsp}bar" (2 spaces) since the pattern has only 1 space and the
  1517. source string has 2 spaces.
  1518. The right padding modifier helps with this case. Adding the right padding modifier to the pattern `%{fookey->} %{barkey}`,
  1519. It will now will match "foo{nbsp}bar" (1 space) and "foo{nbsp}{nbsp}bar" (2 spaces)
  1520. and even "foo{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}bar" (10 spaces).
  1521. Use the right padding modifier to allow for repetition of the characters after a `%{keyname->}`.
  1522. The right padding modifier may be placed on any key with any other modifiers. It should always be the furthest right
  1523. modifier. For example: `%{+keyname/1->}` and `%{->}`
  1524. Right padding modifier example
  1525. |======
  1526. | *Pattern* | `%{ts->} %{level}`
  1527. | *Input* | 1998-08-10T17:15:42,466{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}WARN
  1528. | *Result* a|
  1529. * ts = 1998-08-10T17:15:42,466
  1530. * level = WARN
  1531. |======
  1532. The right padding modifier may be used with an empty key to help skip unwanted data. For example, the same input string, but wrapped with brackets requires the use of an empty right padded key to achieve the same result.
  1533. Right padding modifier with empty key example
  1534. |======
  1535. | *Pattern* | `[%{ts}]%{->}[%{level}]`
  1536. | *Input* | [1998-08-10T17:15:42,466]{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}{nbsp}[WARN]
  1537. | *Result* a|
  1538. * ts = 1998-08-10T17:15:42,466
  1539. * level = WARN
  1540. |======
  1541. ===== Append modifier (`+`)
  1542. [[dissect-modifier-append-key]]
  1543. Dissect supports appending two or more results together for the output.
  1544. Values are appended left to right. An append separator can be specified.
  1545. In this example the append_separator is defined as a space.
  1546. Append modifier example
  1547. |======
  1548. | *Pattern* | `%{+name} %{+name} %{+name} %{+name}`
  1549. | *Input* | john jacob jingleheimer schmidt
  1550. | *Result* a|
  1551. * name = john jacob jingleheimer schmidt
  1552. |======
  1553. ===== Append with order modifier (`+` and `/n`)
  1554. [[dissect-modifier-append-key-with-order]]
  1555. Dissect supports appending two or more results together for the output.
  1556. Values are appended based on the order defined (`/n`). An append separator can be specified.
  1557. In this example the append_separator is defined as a comma.
  1558. Append with order modifier example
  1559. |======
  1560. | *Pattern* | `%{+name/2} %{+name/4} %{+name/3} %{+name/1}`
  1561. | *Input* | john jacob jingleheimer schmidt
  1562. | *Result* a|
  1563. * name = schmidt,john,jingleheimer,jacob
  1564. |======
  1565. ===== Named skip key (`?`)
  1566. [[dissect-modifier-named-skip-key]]
  1567. Dissect supports ignoring matches in the final result. This can be done with an empty key `%{}`, but for readability
  1568. it may be desired to give that empty key a name.
  1569. Named skip key modifier example
  1570. |======
  1571. | *Pattern* | `%{clientip} %{?ident} %{?auth} [%{@timestamp}]`
  1572. | *Input* | 1.2.3.4 - - [30/Apr/1998:22:00:52 +0000]
  1573. | *Result* a|
  1574. * ip = 1.2.3.4
  1575. * @timestamp = 30/Apr/1998:22:00:52 +0000
  1576. |======
  1577. ===== Reference keys (`*` and `&`)
  1578. [[dissect-modifier-reference-keys]]
  1579. Dissect support using parsed values as the key/value pairings for the structured content. Imagine a system that
  1580. partially logs in key/value pairs. Reference keys allow you to maintain that key/value relationship.
  1581. Reference key modifier example
  1582. |======
  1583. | *Pattern* | `[%{ts}] [%{level}] %{*p1}:%{&p1} %{*p2}:%{&p2}`
  1584. | *Input* | [2018-08-10T17:15:42,466] [ERR] ip:1.2.3.4 error:REFUSED
  1585. | *Result* a|
  1586. * ts = 1998-08-10T17:15:42,466
  1587. * level = ERR
  1588. * ip = 1.2.3.4
  1589. * error = REFUSED
  1590. |======
  1591. [[drop-processor]]
  1592. === Drop Processor
  1593. Drops the document without raising any errors. This is useful to prevent the document from
  1594. getting indexed based on some condition.
  1595. [[drop-options]]
  1596. .Drop Options
  1597. [options="header"]
  1598. |======
  1599. | Name | Required | Default | Description
  1600. include::ingest-node-common-processor.asciidoc[]
  1601. |======
  1602. [source,js]
  1603. --------------------------------------------------
  1604. {
  1605. "drop": {
  1606. "if" : "ctx.network_name == 'Guest'"
  1607. }
  1608. }
  1609. --------------------------------------------------
  1610. // NOTCONSOLE
  1611. [[dot-expand-processor]]
  1612. === Dot Expander Processor
  1613. Expands a field with dots into an object field. This processor allows fields
  1614. with dots in the name to be accessible by other processors in the pipeline.
  1615. Otherwise these <<accessing-data-in-pipelines,fields>> can't be accessed by any processor.
  1616. [[dot-expender-options]]
  1617. .Dot Expand Options
  1618. [options="header"]
  1619. |======
  1620. | Name | Required | Default | Description
  1621. | `field` | yes | - | The field to expand into an object field
  1622. | `path` | no | - | The field that contains the field to expand. Only required if the field to expand is part another object field, because the `field` option can only understand leaf fields.
  1623. include::ingest-node-common-processor.asciidoc[]
  1624. |======
  1625. [source,js]
  1626. --------------------------------------------------
  1627. {
  1628. "dot_expander": {
  1629. "field": "foo.bar"
  1630. }
  1631. }
  1632. --------------------------------------------------
  1633. // NOTCONSOLE
  1634. For example the dot expand processor would turn this document:
  1635. [source,js]
  1636. --------------------------------------------------
  1637. {
  1638. "foo.bar" : "value"
  1639. }
  1640. --------------------------------------------------
  1641. // NOTCONSOLE
  1642. into:
  1643. [source,js]
  1644. --------------------------------------------------
  1645. {
  1646. "foo" : {
  1647. "bar" : "value"
  1648. }
  1649. }
  1650. --------------------------------------------------
  1651. // NOTCONSOLE
  1652. If there is already a `bar` field nested under `foo` then
  1653. this processor merges the `foo.bar` field into it. If the field is
  1654. a scalar value then it will turn that field into an array field.
  1655. For example, the following document:
  1656. [source,js]
  1657. --------------------------------------------------
  1658. {
  1659. "foo.bar" : "value2",
  1660. "foo" : {
  1661. "bar" : "value1"
  1662. }
  1663. }
  1664. --------------------------------------------------
  1665. // NOTCONSOLE
  1666. is transformed by the `dot_expander` processor into:
  1667. [source,js]
  1668. --------------------------------------------------
  1669. {
  1670. "foo" : {
  1671. "bar" : ["value1", "value2"]
  1672. }
  1673. }
  1674. --------------------------------------------------
  1675. // NOTCONSOLE
  1676. If any field outside of the leaf field conflicts with a pre-existing field of the same name,
  1677. then that field needs to be renamed first.
  1678. Consider the following document:
  1679. [source,js]
  1680. --------------------------------------------------
  1681. {
  1682. "foo": "value1",
  1683. "foo.bar": "value2"
  1684. }
  1685. --------------------------------------------------
  1686. // NOTCONSOLE
  1687. Then the `foo` needs to be renamed first before the `dot_expander`
  1688. processor is applied. So in order for the `foo.bar` field to properly
  1689. be expanded into the `bar` field under the `foo` field the following
  1690. pipeline should be used:
  1691. [source,js]
  1692. --------------------------------------------------
  1693. {
  1694. "processors" : [
  1695. {
  1696. "rename" : {
  1697. "field" : "foo",
  1698. "target_field" : "foo.bar""
  1699. }
  1700. },
  1701. {
  1702. "dot_expander": {
  1703. "field": "foo.bar"
  1704. }
  1705. }
  1706. ]
  1707. }
  1708. --------------------------------------------------
  1709. // NOTCONSOLE
  1710. The reason for this is that Ingest doesn't know how to automatically cast
  1711. a scalar field to an object field.
  1712. [[fail-processor]]
  1713. === Fail Processor
  1714. Raises an exception. This is useful for when
  1715. you expect a pipeline to fail and want to relay a specific message
  1716. to the requester.
  1717. [[fail-options]]
  1718. .Fail Options
  1719. [options="header"]
  1720. |======
  1721. | Name | Required | Default | Description
  1722. | `message` | yes | - | The error message thrown by the processor. Supports <<accessing-template-fields,template snippets>>.
  1723. include::ingest-node-common-processor.asciidoc[]
  1724. |======
  1725. [source,js]
  1726. --------------------------------------------------
  1727. {
  1728. "fail": {
  1729. "if" : "ctx.tags.contains('production') != true",
  1730. "message": "The production tag is not present, found tags: {{tags}}"
  1731. }
  1732. }
  1733. --------------------------------------------------
  1734. // NOTCONSOLE
  1735. [[foreach-processor]]
  1736. === Foreach Processor
  1737. Processes elements in an array of unknown length.
  1738. All processors can operate on elements inside an array, but if all elements of an array need to
  1739. be processed in the same way, defining a processor for each element becomes cumbersome and tricky
  1740. because it is likely that the number of elements in an array is unknown. For this reason the `foreach`
  1741. processor exists. By specifying the field holding array elements and a processor that
  1742. defines what should happen to each element, array fields can easily be preprocessed.
  1743. A processor inside the foreach processor works in the array element context and puts that in the ingest metadata
  1744. under the `_ingest._value` key. If the array element is a json object it holds all immediate fields of that json object.
  1745. and if the nested object is a value is `_ingest._value` just holds that value. Note that if a processor prior to the
  1746. `foreach` processor used `_ingest._value` key then the specified value will not be available to the processor inside
  1747. the `foreach` processor. The `foreach` processor does restore the original value, so that value is available to processors
  1748. after the `foreach` processor.
  1749. Note that any other field from the document are accessible and modifiable like with all other processors. This processor
  1750. just puts the current array element being read into `_ingest._value` ingest metadata attribute, so that it may be
  1751. pre-processed.
  1752. If the `foreach` processor fails to process an element inside the array, and no `on_failure` processor has been specified,
  1753. then it aborts the execution and leaves the array unmodified.
  1754. [[foreach-options]]
  1755. .Foreach Options
  1756. [options="header"]
  1757. |======
  1758. | Name | Required | Default | Description
  1759. | `field` | yes | - | The array field
  1760. | `processor` | yes | - | The processor to execute against each field
  1761. | `ignore_missing` | no | false | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1762. include::ingest-node-common-processor.asciidoc[]
  1763. |======
  1764. Assume the following document:
  1765. [source,js]
  1766. --------------------------------------------------
  1767. {
  1768. "values" : ["foo", "bar", "baz"]
  1769. }
  1770. --------------------------------------------------
  1771. // NOTCONSOLE
  1772. When this `foreach` processor operates on this sample document:
  1773. [source,js]
  1774. --------------------------------------------------
  1775. {
  1776. "foreach" : {
  1777. "field" : "values",
  1778. "processor" : {
  1779. "uppercase" : {
  1780. "field" : "_ingest._value"
  1781. }
  1782. }
  1783. }
  1784. }
  1785. --------------------------------------------------
  1786. // NOTCONSOLE
  1787. Then the document will look like this after preprocessing:
  1788. [source,js]
  1789. --------------------------------------------------
  1790. {
  1791. "values" : ["FOO", "BAR", "BAZ"]
  1792. }
  1793. --------------------------------------------------
  1794. // NOTCONSOLE
  1795. Let's take a look at another example:
  1796. [source,js]
  1797. --------------------------------------------------
  1798. {
  1799. "persons" : [
  1800. {
  1801. "id" : "1",
  1802. "name" : "John Doe"
  1803. },
  1804. {
  1805. "id" : "2",
  1806. "name" : "Jane Doe"
  1807. }
  1808. ]
  1809. }
  1810. --------------------------------------------------
  1811. // NOTCONSOLE
  1812. In this case, the `id` field needs to be removed,
  1813. so the following `foreach` processor is used:
  1814. [source,js]
  1815. --------------------------------------------------
  1816. {
  1817. "foreach" : {
  1818. "field" : "persons",
  1819. "processor" : {
  1820. "remove" : {
  1821. "field" : "_ingest._value.id"
  1822. }
  1823. }
  1824. }
  1825. }
  1826. --------------------------------------------------
  1827. // NOTCONSOLE
  1828. After preprocessing the result is:
  1829. [source,js]
  1830. --------------------------------------------------
  1831. {
  1832. "persons" : [
  1833. {
  1834. "name" : "John Doe"
  1835. },
  1836. {
  1837. "name" : "Jane Doe"
  1838. }
  1839. ]
  1840. }
  1841. --------------------------------------------------
  1842. // NOTCONSOLE
  1843. The wrapped processor can have a `on_failure` definition.
  1844. For example, the `id` field may not exist on all person objects.
  1845. Instead of failing the index request, you can use an `on_failure`
  1846. block to send the document to the 'failure_index' index for later inspection:
  1847. [source,js]
  1848. --------------------------------------------------
  1849. {
  1850. "foreach" : {
  1851. "field" : "persons",
  1852. "processor" : {
  1853. "remove" : {
  1854. "field" : "_value.id",
  1855. "on_failure" : [
  1856. {
  1857. "set" : {
  1858. "field", "_index",
  1859. "value", "failure_index"
  1860. }
  1861. }
  1862. ]
  1863. }
  1864. }
  1865. }
  1866. }
  1867. --------------------------------------------------
  1868. // NOTCONSOLE
  1869. In this example, if the `remove` processor does fail, then
  1870. the array elements that have been processed thus far will
  1871. be updated.
  1872. Another advanced example can be found in the {plugins}/ingest-attachment-with-arrays.html[attachment processor documentation].
  1873. [[grok-processor]]
  1874. === Grok Processor
  1875. Extracts structured fields out of a single text field within a document. You choose which field to
  1876. extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular
  1877. expression that supports aliased expressions that can be reused.
  1878. This tool is perfect for syslog logs, apache and other webserver logs, mysql logs, and in general, any log format
  1879. that is generally written for humans and not computer consumption.
  1880. This processor comes packaged with many
  1881. https://github.com/elastic/elasticsearch/blob/{branch}/libs/grok/src/main/resources/patterns[reusable patterns].
  1882. If you need help building patterns to match your logs, you will find the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] tool quite useful! The Grok Debugger is an {xpack} feature under the Basic License and is therefore *free to use*. The Grok Constructor at <http://grokconstructor.appspot.com/> is also a useful tool.
  1883. [[grok-basics]]
  1884. ==== Grok Basics
  1885. Grok sits on top of regular expressions, so any regular expressions are valid in grok as well.
  1886. The regular expression library is Oniguruma, and you can see the full supported regexp syntax
  1887. https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Onigiruma site].
  1888. Grok works by leveraging this regular expression language to allow naming existing patterns and combining them into more
  1889. complex patterns that match your fields.
  1890. The syntax for reusing a grok pattern comes in three forms: `%{SYNTAX:SEMANTIC}`, `%{SYNTAX}`, `%{SYNTAX:SEMANTIC:TYPE}`.
  1891. The `SYNTAX` is the name of the pattern that will match your text. For example, `3.44` will be matched by the `NUMBER`
  1892. pattern and `55.3.244.1` will be matched by the `IP` pattern. The syntax is how you match. `NUMBER` and `IP` are both
  1893. patterns that are provided within the default patterns set.
  1894. The `SEMANTIC` is the identifier you give to the piece of text being matched. For example, `3.44` could be the
  1895. duration of an event, so you could call it simply `duration`. Further, a string `55.3.244.1` might identify
  1896. the `client` making a request.
  1897. The `TYPE` is the type you wish to cast your named field. `int`, `long`, `double`, `float` and `boolean` are supported types for coercion.
  1898. For example, you might want to match the following text:
  1899. [source,txt]
  1900. --------------------------------------------------
  1901. 3.44 55.3.244.1
  1902. --------------------------------------------------
  1903. You may know that the message in the example is a number followed by an IP address. You can match this text by using the following
  1904. Grok expression.
  1905. [source,txt]
  1906. --------------------------------------------------
  1907. %{NUMBER:duration} %{IP:client}
  1908. --------------------------------------------------
  1909. [[using-grok]]
  1910. ==== Using the Grok Processor in a Pipeline
  1911. [[grok-options]]
  1912. .Grok Options
  1913. [options="header"]
  1914. |======
  1915. | Name | Required | Default | Description
  1916. | `field` | yes | - | The field to use for grok expression parsing
  1917. | `patterns` | yes | - | An ordered list of grok expression to match and extract named captures with. Returns on the first expression in the list that matches.
  1918. | `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
  1919. | `trace_match` | no | false | when true, `_ingest._grok_match_index` will be inserted into your matched document's metadata with the index into the pattern found in `patterns` that matched.
  1920. | `ignore_missing` | no | false | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  1921. include::ingest-node-common-processor.asciidoc[]
  1922. |======
  1923. Here is an example of using the provided patterns to extract out and name structured fields from a string field in
  1924. a document.
  1925. [source,js]
  1926. --------------------------------------------------
  1927. {
  1928. "message": "55.3.244.1 GET /index.html 15824 0.043"
  1929. }
  1930. --------------------------------------------------
  1931. // NOTCONSOLE
  1932. The pattern for this could be:
  1933. [source,txt]
  1934. --------------------------------------------------
  1935. %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
  1936. --------------------------------------------------
  1937. Here is an example pipeline for processing the above document by using Grok:
  1938. [source,js]
  1939. --------------------------------------------------
  1940. {
  1941. "description" : "...",
  1942. "processors": [
  1943. {
  1944. "grok": {
  1945. "field": "message",
  1946. "patterns": ["%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"]
  1947. }
  1948. }
  1949. ]
  1950. }
  1951. --------------------------------------------------
  1952. // NOTCONSOLE
  1953. This pipeline will insert these named captures as new fields within the document, like so:
  1954. [source,js]
  1955. --------------------------------------------------
  1956. {
  1957. "message": "55.3.244.1 GET /index.html 15824 0.043",
  1958. "client": "55.3.244.1",
  1959. "method": "GET",
  1960. "request": "/index.html",
  1961. "bytes": 15824,
  1962. "duration": "0.043"
  1963. }
  1964. --------------------------------------------------
  1965. // NOTCONSOLE
  1966. [[custom-patterns]]
  1967. ==== Custom Patterns
  1968. The Grok processor comes pre-packaged with a base set of pattern. These patterns may not always have
  1969. what you are looking for. Pattern have a very basic format. Each entry describes has a name and the pattern itself.
  1970. You can add your own patterns to a processor definition under the `pattern_definitions` option.
  1971. Here is an example of a pipeline specifying custom pattern definitions:
  1972. [source,js]
  1973. --------------------------------------------------
  1974. {
  1975. "description" : "...",
  1976. "processors": [
  1977. {
  1978. "grok": {
  1979. "field": "message",
  1980. "patterns": ["my %{FAVORITE_DOG:dog} is colored %{RGB:color}"],
  1981. "pattern_definitions" : {
  1982. "FAVORITE_DOG" : "beagle",
  1983. "RGB" : "RED|GREEN|BLUE"
  1984. }
  1985. }
  1986. }
  1987. ]
  1988. }
  1989. --------------------------------------------------
  1990. // NOTCONSOLE
  1991. [[trace-match]]
  1992. ==== Providing Multiple Match Patterns
  1993. Sometimes one pattern is not enough to capture the potential structure of a field. Let's assume we
  1994. want to match all messages that contain your favorite pet breeds of either cats or dogs. One way to accomplish
  1995. this is to provide two distinct patterns that can be matched, instead of one really complicated expression capturing
  1996. the same `or` behavior.
  1997. Here is an example of such a configuration executed against the simulate API:
  1998. [source,js]
  1999. --------------------------------------------------
  2000. POST _ingest/pipeline/_simulate
  2001. {
  2002. "pipeline": {
  2003. "description" : "parse multiple patterns",
  2004. "processors": [
  2005. {
  2006. "grok": {
  2007. "field": "message",
  2008. "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
  2009. "pattern_definitions" : {
  2010. "FAVORITE_DOG" : "beagle",
  2011. "FAVORITE_CAT" : "burmese"
  2012. }
  2013. }
  2014. }
  2015. ]
  2016. },
  2017. "docs":[
  2018. {
  2019. "_source": {
  2020. "message": "I love burmese cats!"
  2021. }
  2022. }
  2023. ]
  2024. }
  2025. --------------------------------------------------
  2026. // CONSOLE
  2027. response:
  2028. [source,js]
  2029. --------------------------------------------------
  2030. {
  2031. "docs": [
  2032. {
  2033. "doc": {
  2034. "_type": "_type",
  2035. "_index": "_index",
  2036. "_id": "_id",
  2037. "_source": {
  2038. "message": "I love burmese cats!",
  2039. "pet": "burmese"
  2040. },
  2041. "_ingest": {
  2042. "timestamp": "2016-11-08T19:43:03.850+0000"
  2043. }
  2044. }
  2045. }
  2046. ]
  2047. }
  2048. --------------------------------------------------
  2049. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  2050. Both patterns will set the field `pet` with the appropriate match, but what if we want to trace which of our
  2051. patterns matched and populated our fields? We can do this with the `trace_match` parameter. Here is the output of
  2052. that same pipeline, but with `"trace_match": true` configured:
  2053. ////
  2054. Hidden setup for example:
  2055. [source,js]
  2056. --------------------------------------------------
  2057. POST _ingest/pipeline/_simulate
  2058. {
  2059. "pipeline": {
  2060. "description" : "parse multiple patterns",
  2061. "processors": [
  2062. {
  2063. "grok": {
  2064. "field": "message",
  2065. "patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
  2066. "trace_match": true,
  2067. "pattern_definitions" : {
  2068. "FAVORITE_DOG" : "beagle",
  2069. "FAVORITE_CAT" : "burmese"
  2070. }
  2071. }
  2072. }
  2073. ]
  2074. },
  2075. "docs":[
  2076. {
  2077. "_source": {
  2078. "message": "I love burmese cats!"
  2079. }
  2080. }
  2081. ]
  2082. }
  2083. --------------------------------------------------
  2084. // CONSOLE
  2085. ////
  2086. [source,js]
  2087. --------------------------------------------------
  2088. {
  2089. "docs": [
  2090. {
  2091. "doc": {
  2092. "_type": "_type",
  2093. "_index": "_index",
  2094. "_id": "_id",
  2095. "_source": {
  2096. "message": "I love burmese cats!",
  2097. "pet": "burmese"
  2098. },
  2099. "_ingest": {
  2100. "_grok_match_index": "1",
  2101. "timestamp": "2016-11-08T19:43:03.850+0000"
  2102. }
  2103. }
  2104. }
  2105. ]
  2106. }
  2107. --------------------------------------------------
  2108. // TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
  2109. In the above response, you can see that the index of the pattern that matched was `"1"`. This is to say that it was the
  2110. second (index starts at zero) pattern in `patterns` to match.
  2111. This trace metadata enables debugging which of the patterns matched. This information is stored in the ingest
  2112. metadata and will not be indexed.
  2113. [[grok-processor-rest-get]]
  2114. ==== Retrieving patterns from REST endpoint
  2115. The Grok Processor comes packaged with its own REST endpoint for retrieving which patterns the processor is packaged with.
  2116. [source,js]
  2117. --------------------------------------------------
  2118. GET _ingest/processor/grok
  2119. --------------------------------------------------
  2120. // CONSOLE
  2121. The above request will return a response body containing a key-value representation of the built-in patterns dictionary.
  2122. [source,js]
  2123. --------------------------------------------------
  2124. {
  2125. "patterns" : {
  2126. "BACULA_CAPACITY" : "%{INT}{1,3}(,%{INT}{3})*",
  2127. "PATH" : "(?:%{UNIXPATH}|%{WINPATH})",
  2128. ...
  2129. }
  2130. --------------------------------------------------
  2131. // NOTCONSOLE
  2132. This can be useful to reference as the built-in patterns change across versions.
  2133. [[grok-watchdog]]
  2134. ==== Grok watchdog
  2135. Grok expressions that take too long to execute are interrupted and
  2136. the grok processor then fails with an exception. The grok
  2137. processor has a watchdog thread that determines when evaluation of
  2138. a grok expression takes too long and is controlled by the following
  2139. settings:
  2140. [[grok-watchdog-options]]
  2141. .Grok watchdog settings
  2142. [options="header"]
  2143. |======
  2144. | Name | Default | Description
  2145. | `ingest.grok.watchdog.interval` | 1s | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time.
  2146. | `ingest.grok.watchdog.max_execution_time` | 1s | The maximum allowed execution of a grok expression evaluation.
  2147. |======
  2148. [[gsub-processor]]
  2149. === Gsub Processor
  2150. Converts a string field by applying a regular expression and a replacement.
  2151. If the field is not a string, the processor will throw an exception.
  2152. [[gsub-options]]
  2153. .Gsub Options
  2154. [options="header"]
  2155. |======
  2156. | Name | Required | Default | Description
  2157. | `field` | yes | - | The field to apply the replacement to
  2158. | `pattern` | yes | - | The pattern to be replaced
  2159. | `replacement` | yes | - | The string to replace the matching patterns with
  2160. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  2161. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2162. include::ingest-node-common-processor.asciidoc[]
  2163. |======
  2164. [source,js]
  2165. --------------------------------------------------
  2166. {
  2167. "gsub": {
  2168. "field": "field1",
  2169. "pattern": "\.",
  2170. "replacement": "-"
  2171. }
  2172. }
  2173. --------------------------------------------------
  2174. // NOTCONSOLE
  2175. [[join-processor]]
  2176. === Join Processor
  2177. Joins each element of an array into a single string using a separator character between each element.
  2178. Throws an error when the field is not an array.
  2179. [[join-options]]
  2180. .Join Options
  2181. [options="header"]
  2182. |======
  2183. | Name | Required | Default | Description
  2184. | `field` | yes | - | The field to be separated
  2185. | `separator` | yes | - | The separator character
  2186. | `target_field` | no | `field` | The field to assign the joined value to, by default `field` is updated in-place
  2187. include::ingest-node-common-processor.asciidoc[]
  2188. |======
  2189. [source,js]
  2190. --------------------------------------------------
  2191. {
  2192. "join": {
  2193. "field": "joined_array_field",
  2194. "separator": "-"
  2195. }
  2196. }
  2197. --------------------------------------------------
  2198. // NOTCONSOLE
  2199. [[json-processor]]
  2200. === JSON Processor
  2201. Converts a JSON string into a structured JSON object.
  2202. [[json-options]]
  2203. .Json Options
  2204. [options="header"]
  2205. |======
  2206. | Name | Required | Default | Description
  2207. | `field` | yes | - | The field to be parsed
  2208. | `target_field` | no | `field` | The field to insert the converted structured object into
  2209. | `add_to_root` | no | false | Flag that forces the serialized json to be injected into the top level of the document. `target_field` must not be set when this option is chosen.
  2210. include::ingest-node-common-processor.asciidoc[]
  2211. |======
  2212. All JSON-supported types will be parsed (null, boolean, number, array, object, string).
  2213. Suppose you provide this configuration of the `json` processor:
  2214. [source,js]
  2215. --------------------------------------------------
  2216. {
  2217. "json" : {
  2218. "field" : "string_source",
  2219. "target_field" : "json_target"
  2220. }
  2221. }
  2222. --------------------------------------------------
  2223. // NOTCONSOLE
  2224. If the following document is processed:
  2225. [source,js]
  2226. --------------------------------------------------
  2227. {
  2228. "string_source": "{\"foo\": 2000}"
  2229. }
  2230. --------------------------------------------------
  2231. // NOTCONSOLE
  2232. after the `json` processor operates on it, it will look like:
  2233. [source,js]
  2234. --------------------------------------------------
  2235. {
  2236. "string_source": "{\"foo\": 2000}",
  2237. "json_target": {
  2238. "foo": 2000
  2239. }
  2240. }
  2241. --------------------------------------------------
  2242. // NOTCONSOLE
  2243. If the following configuration is provided, omitting the optional `target_field` setting:
  2244. [source,js]
  2245. --------------------------------------------------
  2246. {
  2247. "json" : {
  2248. "field" : "source_and_target"
  2249. }
  2250. }
  2251. --------------------------------------------------
  2252. // NOTCONSOLE
  2253. then after the `json` processor operates on this document:
  2254. [source,js]
  2255. --------------------------------------------------
  2256. {
  2257. "source_and_target": "{\"foo\": 2000}"
  2258. }
  2259. --------------------------------------------------
  2260. // NOTCONSOLE
  2261. it will look like:
  2262. [source,js]
  2263. --------------------------------------------------
  2264. {
  2265. "source_and_target": {
  2266. "foo": 2000
  2267. }
  2268. }
  2269. --------------------------------------------------
  2270. // NOTCONSOLE
  2271. This illustrates that, unless it is explicitly named in the processor configuration, the `target_field`
  2272. is the same field provided in the required `field` configuration.
  2273. [[kv-processor]]
  2274. === KV Processor
  2275. This processor helps automatically parse messages (or specific event fields) which are of the foo=bar variety.
  2276. For example, if you have a log message which contains `ip=1.2.3.4 error=REFUSED`, you can parse those automatically by configuring:
  2277. [source,js]
  2278. --------------------------------------------------
  2279. {
  2280. "kv": {
  2281. "field": "message",
  2282. "field_split": " ",
  2283. "value_split": "="
  2284. }
  2285. }
  2286. --------------------------------------------------
  2287. // NOTCONSOLE
  2288. [[kv-options]]
  2289. .Kv Options
  2290. [options="header"]
  2291. |======
  2292. | Name | Required | Default | Description
  2293. | `field` | yes | - | The field to be parsed
  2294. | `field_split` | yes | - | Regex pattern to use for splitting key-value pairs
  2295. | `value_split` | yes | - | Regex pattern to use for splitting the key from the value within a key-value pair
  2296. | `target_field` | no | `null` | The field to insert the extracted keys into. Defaults to the root of the document
  2297. | `include_keys` | no | `null` | List of keys to filter and insert into document. Defaults to including all keys
  2298. | `exclude_keys` | no | `null` | List of keys to exclude from document
  2299. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2300. | `prefix` | no | `null` | Prefix to be added to extracted keys
  2301. | `trim_key` | no | `null` | String of characters to trim from extracted keys
  2302. | `trim_value` | no | `null` | String of characters to trim from extracted values
  2303. | `strip_brackets` | no | `false` | If `true` strip brackets `()`, `<>`, `[]` as well as quotes `'` and `"` from extracted values
  2304. include::ingest-node-common-processor.asciidoc[]
  2305. |======
  2306. [[lowercase-processor]]
  2307. === Lowercase Processor
  2308. Converts a string to its lowercase equivalent.
  2309. [[lowercase-options]]
  2310. .Lowercase Options
  2311. [options="header"]
  2312. |======
  2313. | Name | Required | Default | Description
  2314. | `field` | yes | - | The field to make lowercase
  2315. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  2316. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2317. include::ingest-node-common-processor.asciidoc[]
  2318. |======
  2319. [source,js]
  2320. --------------------------------------------------
  2321. {
  2322. "lowercase": {
  2323. "field": "foo"
  2324. }
  2325. }
  2326. --------------------------------------------------
  2327. // NOTCONSOLE
  2328. [[pipeline-processor]]
  2329. === Pipeline Processor
  2330. Executes another pipeline.
  2331. [[pipeline-options]]
  2332. .Pipeline Options
  2333. [options="header"]
  2334. |======
  2335. | Name | Required | Default | Description
  2336. | `name` | yes | - | The name of the pipeline to execute
  2337. include::ingest-node-common-processor.asciidoc[]
  2338. |======
  2339. [source,js]
  2340. --------------------------------------------------
  2341. {
  2342. "pipeline": {
  2343. "name": "inner-pipeline"
  2344. }
  2345. }
  2346. --------------------------------------------------
  2347. // NOTCONSOLE
  2348. An example of using this processor for nesting pipelines would be:
  2349. Define an inner pipeline:
  2350. [source,js]
  2351. --------------------------------------------------
  2352. PUT _ingest/pipeline/pipelineA
  2353. {
  2354. "description" : "inner pipeline",
  2355. "processors" : [
  2356. {
  2357. "set" : {
  2358. "field": "inner_pipeline_set",
  2359. "value": "inner"
  2360. }
  2361. }
  2362. ]
  2363. }
  2364. --------------------------------------------------
  2365. // CONSOLE
  2366. Define another pipeline that uses the previously defined inner pipeline:
  2367. [source,js]
  2368. --------------------------------------------------
  2369. PUT _ingest/pipeline/pipelineB
  2370. {
  2371. "description" : "outer pipeline",
  2372. "processors" : [
  2373. {
  2374. "pipeline" : {
  2375. "name": "pipelineA"
  2376. }
  2377. },
  2378. {
  2379. "set" : {
  2380. "field": "outer_pipeline_set",
  2381. "value": "outer"
  2382. }
  2383. }
  2384. ]
  2385. }
  2386. --------------------------------------------------
  2387. // CONSOLE
  2388. // TEST[continued]
  2389. Now indexing a document while applying the outer pipeline will see the inner pipeline executed
  2390. from the outer pipeline:
  2391. [source,js]
  2392. --------------------------------------------------
  2393. PUT /myindex/_doc/1?pipeline=pipelineB
  2394. {
  2395. "field": "value"
  2396. }
  2397. --------------------------------------------------
  2398. // CONSOLE
  2399. // TEST[continued]
  2400. Response from the index request:
  2401. [source,js]
  2402. --------------------------------------------------
  2403. {
  2404. "_index": "myindex",
  2405. "_type": "_doc",
  2406. "_id": "1",
  2407. "_version": 1,
  2408. "result": "created",
  2409. "_shards": {
  2410. "total": 2,
  2411. "successful": 1,
  2412. "failed": 0
  2413. },
  2414. "_seq_no": 0,
  2415. "_primary_term": 1,
  2416. }
  2417. --------------------------------------------------
  2418. // TESTRESPONSE
  2419. Indexed document:
  2420. [source,js]
  2421. --------------------------------------------------
  2422. {
  2423. "field": "value",
  2424. "inner_pipeline_set": "inner",
  2425. "outer_pipeline_set": "outer"
  2426. }
  2427. --------------------------------------------------
  2428. // NOTCONSOLE
  2429. [[remove-processor]]
  2430. === Remove Processor
  2431. Removes existing fields. If one field doesn't exist, an exception will be thrown.
  2432. [[remove-options]]
  2433. .Remove Options
  2434. [options="header"]
  2435. |======
  2436. | Name | Required | Default | Description
  2437. | `field` | yes | - | Fields to be removed. Supports <<accessing-template-fields,template snippets>>.
  2438. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2439. include::ingest-node-common-processor.asciidoc[]
  2440. |======
  2441. Here is an example to remove a single field:
  2442. [source,js]
  2443. --------------------------------------------------
  2444. {
  2445. "remove": {
  2446. "field": "user_agent"
  2447. }
  2448. }
  2449. --------------------------------------------------
  2450. // NOTCONSOLE
  2451. To remove multiple fields, you can use the following query:
  2452. [source,js]
  2453. --------------------------------------------------
  2454. {
  2455. "remove": {
  2456. "field": ["user_agent", "url"]
  2457. }
  2458. }
  2459. --------------------------------------------------
  2460. // NOTCONSOLE
  2461. [[rename-processor]]
  2462. === Rename Processor
  2463. Renames an existing field. If the field doesn't exist or the new name is already used, an exception will be thrown.
  2464. [[rename-options]]
  2465. .Rename Options
  2466. [options="header"]
  2467. |======
  2468. | Name | Required | Default | Description
  2469. | `field` | yes | - | The field to be renamed. Supports <<accessing-template-fields,template snippets>>.
  2470. | `target_field` | yes | - | The new name of the field. Supports <<accessing-template-fields,template snippets>>.
  2471. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  2472. include::ingest-node-common-processor.asciidoc[]
  2473. |======
  2474. [source,js]
  2475. --------------------------------------------------
  2476. {
  2477. "rename": {
  2478. "field": "provider",
  2479. "target_field": "cloud.provider"
  2480. }
  2481. }
  2482. --------------------------------------------------
  2483. // NOTCONSOLE
  2484. [[script-processor]]
  2485. === Script Processor
  2486. Allows inline and stored scripts to be executed within ingest pipelines.
  2487. See <<modules-scripting-using, How to use scripts>> to learn more about writing scripts. The Script Processor
  2488. leverages caching of compiled scripts for improved performance. Since the
  2489. script specified within the processor is potentially re-compiled per document, it is important
  2490. to understand how script caching works. To learn more about
  2491. caching see <<modules-scripting-using-caching, Script Caching>>.
  2492. [[script-options]]
  2493. .Script Options
  2494. [options="header"]
  2495. |======
  2496. | Name | Required | Default | Description
  2497. | `lang` | no | "painless" | The scripting language
  2498. | `id` | no | - | The stored script id to refer to
  2499. | `source` | no | - | An inline script to be executed
  2500. | `params` | no | - | Script Parameters
  2501. include::ingest-node-common-processor.asciidoc[]
  2502. |======
  2503. One of `id` or `source` options must be provided in order to properly reference a script to execute.
  2504. You can access the current ingest document from within the script context by using the `ctx` variable.
  2505. The following example sets a new field called `field_a_plus_b_times_c` to be the sum of two existing
  2506. numeric fields `field_a` and `field_b` multiplied by the parameter param_c:
  2507. [source,js]
  2508. --------------------------------------------------
  2509. {
  2510. "script": {
  2511. "lang": "painless",
  2512. "source": "ctx.field_a_plus_b_times_c = (ctx.field_a + ctx.field_b) * params.param_c",
  2513. "params": {
  2514. "param_c": 10
  2515. }
  2516. }
  2517. }
  2518. --------------------------------------------------
  2519. // NOTCONSOLE
  2520. It is possible to use the Script Processor to manipulate document metadata like `_index` and `_type` during
  2521. ingestion. Here is an example of an Ingest Pipeline that renames the index and type to `my_index` no matter what
  2522. was provided in the original index request:
  2523. [source,js]
  2524. --------------------------------------------------
  2525. PUT _ingest/pipeline/my_index
  2526. {
  2527. "description": "use index:my_index and type:_doc",
  2528. "processors": [
  2529. {
  2530. "script": {
  2531. "source": """
  2532. ctx._index = 'my_index';
  2533. ctx._type = '_doc';
  2534. """
  2535. }
  2536. }
  2537. ]
  2538. }
  2539. --------------------------------------------------
  2540. // CONSOLE
  2541. Using the above pipeline, we can attempt to index a document into the `any_index` index.
  2542. [source,js]
  2543. --------------------------------------------------
  2544. PUT any_index/_doc/1?pipeline=my_index
  2545. {
  2546. "message": "text"
  2547. }
  2548. --------------------------------------------------
  2549. // CONSOLE
  2550. // TEST[continued]
  2551. The response from the above index request:
  2552. [source,js]
  2553. --------------------------------------------------
  2554. {
  2555. "_index": "my_index",
  2556. "_type": "_doc",
  2557. "_id": "1",
  2558. "_version": 1,
  2559. "result": "created",
  2560. "_shards": {
  2561. "total": 2,
  2562. "successful": 1,
  2563. "failed": 0
  2564. },
  2565. "_seq_no": 0,
  2566. "_primary_term": 1,
  2567. }
  2568. --------------------------------------------------
  2569. // TESTRESPONSE
  2570. In the above response, you can see that our document was actually indexed into `my_index` instead of
  2571. `any_index`. This type of manipulation is often convenient in pipelines that have various branches of transformation,
  2572. and depending on the progress made, indexed into different indices.
  2573. [[set-processor]]
  2574. === Set Processor
  2575. Sets one field and associates it with the specified value. If the field already exists,
  2576. its value will be replaced with the provided one.
  2577. [[set-options]]
  2578. .Set Options
  2579. [options="header"]
  2580. |======
  2581. | Name | Required | Default | Description
  2582. | `field` | yes | - | The field to insert, upsert, or update. Supports <<accessing-template-fields,template snippets>>.
  2583. | `value` | yes | - | The value to be set for the field. Supports <<accessing-template-fields,template snippets>>.
  2584. | `override` | no | true | If processor will update fields with pre-existing non-null-valued field. When set to `false`, such fields will not be touched.
  2585. include::ingest-node-common-processor.asciidoc[]
  2586. |======
  2587. [source,js]
  2588. --------------------------------------------------
  2589. {
  2590. "set": {
  2591. "field": "host.os.name",
  2592. "value": "{{os}}"
  2593. }
  2594. }
  2595. --------------------------------------------------
  2596. // NOTCONSOLE
  2597. [[ingest-node-set-security-user-processor]]
  2598. === Set Security User Processor
  2599. Sets user-related details (such as `username`, `roles`, `email`, `full_name`
  2600. and `metadata` ) from the current
  2601. authenticated user to the current document by pre-processing the ingest.
  2602. IMPORTANT: Requires an authenticated user for the index request.
  2603. [[set-security-user-options]]
  2604. .Set Security User Options
  2605. [options="header"]
  2606. |======
  2607. | Name | Required | Default | Description
  2608. | `field` | yes | - | The field to store the user information into.
  2609. | `properties` | no | [`username`, `roles`, `email`, `full_name`, `metadata`] | Controls what user related properties are added to the `field`.
  2610. include::ingest-node-common-processor.asciidoc[]
  2611. |======
  2612. The following example adds all user details for the current authenticated user
  2613. to the `user` field for all documents that are processed by this pipeline:
  2614. [source,js]
  2615. --------------------------------------------------
  2616. {
  2617. "processors" : [
  2618. {
  2619. "set_security_user": {
  2620. "field": "user"
  2621. }
  2622. }
  2623. ]
  2624. }
  2625. --------------------------------------------------
  2626. // NOTCONSOLE
  2627. [[split-processor]]
  2628. === Split Processor
  2629. Splits a field into an array using a separator character. Only works on string fields.
  2630. [[split-options]]
  2631. .Split Options
  2632. [options="header"]
  2633. |======
  2634. | Name | Required | Default | Description
  2635. | `field` | yes | - | The field to split
  2636. | `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
  2637. | `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
  2638. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  2639. include::ingest-node-common-processor.asciidoc[]
  2640. |======
  2641. [source,js]
  2642. --------------------------------------------------
  2643. {
  2644. "split": {
  2645. "field": "my_field",
  2646. "separator": "\\s+" <1>
  2647. }
  2648. }
  2649. --------------------------------------------------
  2650. // NOTCONSOLE
  2651. <1> Treat all consecutive whitespace characters as a single separator
  2652. [[sort-processor]]
  2653. === Sort Processor
  2654. Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted
  2655. numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically.
  2656. Throws an error when the field is not an array.
  2657. [[sort-options]]
  2658. .Sort Options
  2659. [options="header"]
  2660. |======
  2661. | Name | Required | Default | Description
  2662. | `field` | yes | - | The field to be sorted
  2663. | `order` | no | `"asc"` | The sort order to use. Accepts `"asc"` or `"desc"`.
  2664. | `target_field` | no | `field` | The field to assign the sorted value to, by default `field` is updated in-place
  2665. include::ingest-node-common-processor.asciidoc[]
  2666. |======
  2667. [source,js]
  2668. --------------------------------------------------
  2669. {
  2670. "sort": {
  2671. "field": "array_field_to_sort",
  2672. "order": "desc"
  2673. }
  2674. }
  2675. --------------------------------------------------
  2676. // NOTCONSOLE
  2677. [[trim-processor]]
  2678. === Trim Processor
  2679. Trims whitespace from field.
  2680. NOTE: This only works on leading and trailing whitespace.
  2681. [[trim-options]]
  2682. .Trim Options
  2683. [options="header"]
  2684. |======
  2685. | Name | Required | Default | Description
  2686. | `field` | yes | - | The string-valued field to trim whitespace from
  2687. | `target_field` | no | `field` | The field to assign the trimmed value to, by default `field` is updated in-place
  2688. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
  2689. include::ingest-node-common-processor.asciidoc[]
  2690. |======
  2691. [source,js]
  2692. --------------------------------------------------
  2693. {
  2694. "trim": {
  2695. "field": "foo"
  2696. }
  2697. }
  2698. --------------------------------------------------
  2699. // NOTCONSOLE
  2700. [[uppercase-processor]]
  2701. === Uppercase Processor
  2702. Converts a string to its uppercase equivalent.
  2703. [[uppercase-options]]
  2704. .Uppercase Options
  2705. [options="header"]
  2706. |======
  2707. | Name | Required | Default | Description
  2708. | `field` | yes | - | The field to make uppercase
  2709. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  2710. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2711. include::ingest-node-common-processor.asciidoc[]
  2712. |======
  2713. [source,js]
  2714. --------------------------------------------------
  2715. {
  2716. "uppercase": {
  2717. "field": "foo"
  2718. }
  2719. }
  2720. --------------------------------------------------
  2721. // NOTCONSOLE
  2722. [[urldecode-processor]]
  2723. === URL Decode Processor
  2724. URL-decodes a string
  2725. [[urldecode-options]]
  2726. .URL Decode Options
  2727. [options="header"]
  2728. |======
  2729. | Name | Required | Default | Description
  2730. | `field` | yes | - | The field to decode
  2731. | `target_field` | no | `field` | The field to assign the converted value to, by default `field` is updated in-place
  2732. | `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
  2733. include::ingest-node-common-processor.asciidoc[]
  2734. |======
  2735. [source,js]
  2736. --------------------------------------------------
  2737. {
  2738. "urldecode": {
  2739. "field": "my_url_to_decode"
  2740. }
  2741. }
  2742. --------------------------------------------------
  2743. // NOTCONSOLE