lang-analyzer.asciidoc 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630
  1. [[analysis-lang-analyzer]]
  2. === Language Analyzers
  3. A set of analyzers aimed at analyzing specific language text. The
  4. following types are supported:
  5. <<arabic-analyzer,`arabic`>>,
  6. <<armenian-analyzer,`armenian`>>,
  7. <<basque-analyzer,`basque`>>,
  8. <<brazilian-analyzer,`brazilian`>>,
  9. <<bulgarian-analyzer,`bulgarian`>>,
  10. <<catalan-analyzer,`catalan`>>,
  11. <<cjk-analyzer,`cjk`>>,
  12. <<czech-analyzer,`czech`>>,
  13. <<danish-analyzer,`danish`>>,
  14. <<dutch-analyzer,`dutch`>>,
  15. <<english-analyzer,`english`>>,
  16. <<finnish-analyzer,`finnish`>>,
  17. <<french-analyzer,`french`>>,
  18. <<galician-analyzer,`galician`>>,
  19. <<german-analyzer,`german`>>,
  20. <<greek-analyzer,`greek`>>,
  21. <<hindi-analyzer,`hindi`>>,
  22. <<hungarian-analyzer,`hungarian`>>,
  23. <<indonesian-analyzer,`indonesian`>>,
  24. <<irish-analyzer,`irish`>>,
  25. <<italian-analyzer,`italian`>>,
  26. <<latvian-analyzer,`latvian`>>,
  27. <<lithuanian-analyzer,`lithuanian`>>,
  28. <<norwegian-analyzer,`norwegian`>>,
  29. <<persian-analyzer,`persian`>>,
  30. <<portuguese-analyzer,`portuguese`>>,
  31. <<romanian-analyzer,`romanian`>>,
  32. <<russian-analyzer,`russian`>>,
  33. <<sorani-analyzer,`sorani`>>,
  34. <<spanish-analyzer,`spanish`>>,
  35. <<swedish-analyzer,`swedish`>>,
  36. <<turkish-analyzer,`turkish`>>,
  37. <<thai-analyzer,`thai`>>.
  38. ==== Configuring language analyzers
  39. ===== Stopwords
  40. All analyzers support setting custom `stopwords` either internally in
  41. the config, or by using an external stopwords file by setting
  42. `stopwords_path`. Check <<analysis-stop-analyzer,Stop Analyzer>> for
  43. more details.
  44. ===== Excluding words from stemming
  45. The `stem_exclusion` parameter allows you to specify an array
  46. of lowercase words that should not be stemmed. Internally, this
  47. functionality is implemented by adding the
  48. <<analysis-keyword-marker-tokenfilter,`keyword_marker` token filter>>
  49. with the `keywords` set to the value of the `stem_exclusion` parameter.
  50. The following analyzers support setting custom `stem_exclusion` list:
  51. `arabic`, `armenian`, `basque`, `bulgarian`, `catalan`, `czech`,
  52. `dutch`, `english`, `finnish`, `french`, `galician`,
  53. `german`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`,
  54. `lithuanian`, `norwegian`, `portuguese`, `romanian`, `russian`, `sorani`,
  55. `spanish`, `swedish`, `turkish`.
  56. ==== Reimplementing language analyzers
  57. The built-in language analyzers can be reimplemented as `custom` analyzers
  58. (as described below) in order to customize their behaviour.
  59. NOTE: If you do not intend to exclude words from being stemmed (the
  60. equivalent of the `stem_exclusion` parameter above), then you should remove
  61. the `keyword_marker` token filter from the custom analyzer configuration.
  62. [[arabic-analyzer]]
  63. ===== `arabic` analyzer
  64. The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows:
  65. [source,js]
  66. ----------------------------------------------------
  67. PUT /arabic_example
  68. {
  69. "settings": {
  70. "analysis": {
  71. "filter": {
  72. "arabic_stop": {
  73. "type": "stop",
  74. "stopwords": "_arabic_" <1>
  75. },
  76. "arabic_keywords": {
  77. "type": "keyword_marker",
  78. "keywords": ["مثال"] <2>
  79. },
  80. "arabic_stemmer": {
  81. "type": "stemmer",
  82. "language": "arabic"
  83. }
  84. },
  85. "analyzer": {
  86. "arabic": {
  87. "tokenizer": "standard",
  88. "filter": [
  89. "lowercase",
  90. "arabic_stop",
  91. "arabic_normalization",
  92. "arabic_keywords",
  93. "arabic_stemmer"
  94. ]
  95. }
  96. }
  97. }
  98. }
  99. }
  100. ----------------------------------------------------
  101. // CONSOLE
  102. <1> The default stopwords can be overridden with the `stopwords`
  103. or `stopwords_path` parameters.
  104. <2> This filter should be removed unless there are words which should
  105. be excluded from stemming.
  106. [[armenian-analyzer]]
  107. ===== `armenian` analyzer
  108. The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows:
  109. [source,js]
  110. ----------------------------------------------------
  111. PUT /armenian_example
  112. {
  113. "settings": {
  114. "analysis": {
  115. "filter": {
  116. "armenian_stop": {
  117. "type": "stop",
  118. "stopwords": "_armenian_" <1>
  119. },
  120. "armenian_keywords": {
  121. "type": "keyword_marker",
  122. "keywords": ["օրինակ"] <2>
  123. },
  124. "armenian_stemmer": {
  125. "type": "stemmer",
  126. "language": "armenian"
  127. }
  128. },
  129. "analyzer": {
  130. "armenian": {
  131. "tokenizer": "standard",
  132. "filter": [
  133. "lowercase",
  134. "armenian_stop",
  135. "armenian_keywords",
  136. "armenian_stemmer"
  137. ]
  138. }
  139. }
  140. }
  141. }
  142. }
  143. ----------------------------------------------------
  144. // CONSOLE
  145. <1> The default stopwords can be overridden with the `stopwords`
  146. or `stopwords_path` parameters.
  147. <2> This filter should be removed unless there are words which should
  148. be excluded from stemming.
  149. [[basque-analyzer]]
  150. ===== `basque` analyzer
  151. The `basque` analyzer could be reimplemented as a `custom` analyzer as follows:
  152. [source,js]
  153. ----------------------------------------------------
  154. PUT /armenian_example
  155. {
  156. "settings": {
  157. "analysis": {
  158. "filter": {
  159. "basque_stop": {
  160. "type": "stop",
  161. "stopwords": "_basque_" <1>
  162. },
  163. "basque_keywords": {
  164. "type": "keyword_marker",
  165. "keywords": ["Adibidez"] <2>
  166. },
  167. "basque_stemmer": {
  168. "type": "stemmer",
  169. "language": "basque"
  170. }
  171. },
  172. "analyzer": {
  173. "basque": {
  174. "tokenizer": "standard",
  175. "filter": [
  176. "lowercase",
  177. "basque_stop",
  178. "basque_keywords",
  179. "basque_stemmer"
  180. ]
  181. }
  182. }
  183. }
  184. }
  185. }
  186. ----------------------------------------------------
  187. // CONSOLE
  188. <1> The default stopwords can be overridden with the `stopwords`
  189. or `stopwords_path` parameters.
  190. <2> This filter should be removed unless there are words which should
  191. be excluded from stemming.
  192. [[brazilian-analyzer]]
  193. ===== `brazilian` analyzer
  194. The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follows:
  195. [source,js]
  196. ----------------------------------------------------
  197. PUT /brazilian_example
  198. {
  199. "settings": {
  200. "analysis": {
  201. "filter": {
  202. "brazilian_stop": {
  203. "type": "stop",
  204. "stopwords": "_brazilian_" <1>
  205. },
  206. "brazilian_keywords": {
  207. "type": "keyword_marker",
  208. "keywords": ["exemplo"] <2>
  209. },
  210. "brazilian_stemmer": {
  211. "type": "stemmer",
  212. "language": "brazilian"
  213. }
  214. },
  215. "analyzer": {
  216. "brazilian": {
  217. "tokenizer": "standard",
  218. "filter": [
  219. "lowercase",
  220. "brazilian_stop",
  221. "brazilian_keywords",
  222. "brazilian_stemmer"
  223. ]
  224. }
  225. }
  226. }
  227. }
  228. }
  229. ----------------------------------------------------
  230. // CONSOLE
  231. <1> The default stopwords can be overridden with the `stopwords`
  232. or `stopwords_path` parameters.
  233. <2> This filter should be removed unless there are words which should
  234. be excluded from stemming.
  235. [[bulgarian-analyzer]]
  236. ===== `bulgarian` analyzer
  237. The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follows:
  238. [source,js]
  239. ----------------------------------------------------
  240. PUT /bulgarian_example
  241. {
  242. "settings": {
  243. "analysis": {
  244. "filter": {
  245. "bulgarian_stop": {
  246. "type": "stop",
  247. "stopwords": "_bulgarian_" <1>
  248. },
  249. "bulgarian_keywords": {
  250. "type": "keyword_marker",
  251. "keywords": ["пример"] <2>
  252. },
  253. "bulgarian_stemmer": {
  254. "type": "stemmer",
  255. "language": "bulgarian"
  256. }
  257. },
  258. "analyzer": {
  259. "bulgarian": {
  260. "tokenizer": "standard",
  261. "filter": [
  262. "lowercase",
  263. "bulgarian_stop",
  264. "bulgarian_keywords",
  265. "bulgarian_stemmer"
  266. ]
  267. }
  268. }
  269. }
  270. }
  271. }
  272. ----------------------------------------------------
  273. // CONSOLE
  274. <1> The default stopwords can be overridden with the `stopwords`
  275. or `stopwords_path` parameters.
  276. <2> This filter should be removed unless there are words which should
  277. be excluded from stemming.
  278. [[catalan-analyzer]]
  279. ===== `catalan` analyzer
  280. The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows:
  281. [source,js]
  282. ----------------------------------------------------
  283. PUT /catalan_example
  284. {
  285. "settings": {
  286. "analysis": {
  287. "filter": {
  288. "catalan_elision": {
  289. "type": "elision",
  290. "articles": [ "d", "l", "m", "n", "s", "t"]
  291. },
  292. "catalan_stop": {
  293. "type": "stop",
  294. "stopwords": "_catalan_" <1>
  295. },
  296. "catalan_keywords": {
  297. "type": "keyword_marker",
  298. "keywords": ["exemple"] <2>
  299. },
  300. "catalan_stemmer": {
  301. "type": "stemmer",
  302. "language": "catalan"
  303. }
  304. },
  305. "analyzer": {
  306. "catalan": {
  307. "tokenizer": "standard",
  308. "filter": [
  309. "catalan_elision",
  310. "lowercase",
  311. "catalan_stop",
  312. "catalan_keywords",
  313. "catalan_stemmer"
  314. ]
  315. }
  316. }
  317. }
  318. }
  319. }
  320. ----------------------------------------------------
  321. // CONSOLE
  322. <1> The default stopwords can be overridden with the `stopwords`
  323. or `stopwords_path` parameters.
  324. <2> This filter should be removed unless there are words which should
  325. be excluded from stemming.
  326. [[cjk-analyzer]]
  327. ===== `cjk` analyzer
  328. The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows:
  329. [source,js]
  330. ----------------------------------------------------
  331. PUT /cjk_example
  332. {
  333. "settings": {
  334. "analysis": {
  335. "filter": {
  336. "english_stop": {
  337. "type": "stop",
  338. "stopwords": "_english_" <1>
  339. }
  340. },
  341. "analyzer": {
  342. "cjk": {
  343. "tokenizer": "standard",
  344. "filter": [
  345. "cjk_width",
  346. "lowercase",
  347. "cjk_bigram",
  348. "english_stop"
  349. ]
  350. }
  351. }
  352. }
  353. }
  354. }
  355. ----------------------------------------------------
  356. // CONSOLE
  357. <1> The default stopwords can be overridden with the `stopwords`
  358. or `stopwords_path` parameters.
  359. [[czech-analyzer]]
  360. ===== `czech` analyzer
  361. The `czech` analyzer could be reimplemented as a `custom` analyzer as follows:
  362. [source,js]
  363. ----------------------------------------------------
  364. PUT /czech_example
  365. {
  366. "settings": {
  367. "analysis": {
  368. "filter": {
  369. "czech_stop": {
  370. "type": "stop",
  371. "stopwords": "_czech_" <1>
  372. },
  373. "czech_keywords": {
  374. "type": "keyword_marker",
  375. "keywords": ["příklad"] <2>
  376. },
  377. "czech_stemmer": {
  378. "type": "stemmer",
  379. "language": "czech"
  380. }
  381. },
  382. "analyzer": {
  383. "czech": {
  384. "tokenizer": "standard",
  385. "filter": [
  386. "lowercase",
  387. "czech_stop",
  388. "czech_keywords",
  389. "czech_stemmer"
  390. ]
  391. }
  392. }
  393. }
  394. }
  395. }
  396. ----------------------------------------------------
  397. // CONSOLE
  398. <1> The default stopwords can be overridden with the `stopwords`
  399. or `stopwords_path` parameters.
  400. <2> This filter should be removed unless there are words which should
  401. be excluded from stemming.
  402. [[danish-analyzer]]
  403. ===== `danish` analyzer
  404. The `danish` analyzer could be reimplemented as a `custom` analyzer as follows:
  405. [source,js]
  406. ----------------------------------------------------
  407. PUT /danish_example
  408. {
  409. "settings": {
  410. "analysis": {
  411. "filter": {
  412. "danish_stop": {
  413. "type": "stop",
  414. "stopwords": "_danish_" <1>
  415. },
  416. "danish_keywords": {
  417. "type": "keyword_marker",
  418. "keywords": ["eksempel"] <2>
  419. },
  420. "danish_stemmer": {
  421. "type": "stemmer",
  422. "language": "danish"
  423. }
  424. },
  425. "analyzer": {
  426. "danish": {
  427. "tokenizer": "standard",
  428. "filter": [
  429. "lowercase",
  430. "danish_stop",
  431. "danish_keywords",
  432. "danish_stemmer"
  433. ]
  434. }
  435. }
  436. }
  437. }
  438. }
  439. ----------------------------------------------------
  440. // CONSOLE
  441. <1> The default stopwords can be overridden with the `stopwords`
  442. or `stopwords_path` parameters.
  443. <2> This filter should be removed unless there are words which should
  444. be excluded from stemming.
  445. [[dutch-analyzer]]
  446. ===== `dutch` analyzer
  447. The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows:
  448. [source,js]
  449. ----------------------------------------------------
  450. PUT /detch_example
  451. {
  452. "settings": {
  453. "analysis": {
  454. "filter": {
  455. "dutch_stop": {
  456. "type": "stop",
  457. "stopwords": "_dutch_" <1>
  458. },
  459. "dutch_keywords": {
  460. "type": "keyword_marker",
  461. "keywords": ["voorbeeld"] <2>
  462. },
  463. "dutch_stemmer": {
  464. "type": "stemmer",
  465. "language": "dutch"
  466. },
  467. "dutch_override": {
  468. "type": "stemmer_override",
  469. "rules": [
  470. "fiets=>fiets",
  471. "bromfiets=>bromfiets",
  472. "ei=>eier",
  473. "kind=>kinder"
  474. ]
  475. }
  476. },
  477. "analyzer": {
  478. "dutch": {
  479. "tokenizer": "standard",
  480. "filter": [
  481. "lowercase",
  482. "dutch_stop",
  483. "dutch_keywords",
  484. "dutch_override",
  485. "dutch_stemmer"
  486. ]
  487. }
  488. }
  489. }
  490. }
  491. }
  492. ----------------------------------------------------
  493. // CONSOLE
  494. <1> The default stopwords can be overridden with the `stopwords`
  495. or `stopwords_path` parameters.
  496. <2> This filter should be removed unless there are words which should
  497. be excluded from stemming.
  498. [[english-analyzer]]
  499. ===== `english` analyzer
  500. The `english` analyzer could be reimplemented as a `custom` analyzer as follows:
  501. [source,js]
  502. ----------------------------------------------------
  503. PUT /english_example
  504. {
  505. "settings": {
  506. "analysis": {
  507. "filter": {
  508. "english_stop": {
  509. "type": "stop",
  510. "stopwords": "_english_" <1>
  511. },
  512. "english_keywords": {
  513. "type": "keyword_marker",
  514. "keywords": ["example"] <2>
  515. },
  516. "english_stemmer": {
  517. "type": "stemmer",
  518. "language": "english"
  519. },
  520. "english_possessive_stemmer": {
  521. "type": "stemmer",
  522. "language": "possessive_english"
  523. }
  524. },
  525. "analyzer": {
  526. "english": {
  527. "tokenizer": "standard",
  528. "filter": [
  529. "english_possessive_stemmer",
  530. "lowercase",
  531. "english_stop",
  532. "english_keywords",
  533. "english_stemmer"
  534. ]
  535. }
  536. }
  537. }
  538. }
  539. }
  540. ----------------------------------------------------
  541. // CONSOLE
  542. <1> The default stopwords can be overridden with the `stopwords`
  543. or `stopwords_path` parameters.
  544. <2> This filter should be removed unless there are words which should
  545. be excluded from stemming.
  546. [[finnish-analyzer]]
  547. ===== `finnish` analyzer
  548. The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows:
  549. [source,js]
  550. ----------------------------------------------------
  551. PUT /finnish_example
  552. {
  553. "settings": {
  554. "analysis": {
  555. "filter": {
  556. "finnish_stop": {
  557. "type": "stop",
  558. "stopwords": "_finnish_" <1>
  559. },
  560. "finnish_keywords": {
  561. "type": "keyword_marker",
  562. "keywords": ["esimerkki"] <2>
  563. },
  564. "finnish_stemmer": {
  565. "type": "stemmer",
  566. "language": "finnish"
  567. }
  568. },
  569. "analyzer": {
  570. "finnish": {
  571. "tokenizer": "standard",
  572. "filter": [
  573. "lowercase",
  574. "finnish_stop",
  575. "finnish_keywords",
  576. "finnish_stemmer"
  577. ]
  578. }
  579. }
  580. }
  581. }
  582. }
  583. ----------------------------------------------------
  584. // CONSOLE
  585. <1> The default stopwords can be overridden with the `stopwords`
  586. or `stopwords_path` parameters.
  587. <2> This filter should be removed unless there are words which should
  588. be excluded from stemming.
  589. [[french-analyzer]]
  590. ===== `french` analyzer
  591. The `french` analyzer could be reimplemented as a `custom` analyzer as follows:
  592. [source,js]
  593. ----------------------------------------------------
  594. PUT /french_example
  595. {
  596. "settings": {
  597. "analysis": {
  598. "filter": {
  599. "french_elision": {
  600. "type": "elision",
  601. "articles_case": true,
  602. "articles": [
  603. "l", "m", "t", "qu", "n", "s",
  604. "j", "d", "c", "jusqu", "quoiqu",
  605. "lorsqu", "puisqu"
  606. ]
  607. },
  608. "french_stop": {
  609. "type": "stop",
  610. "stopwords": "_french_" <1>
  611. },
  612. "french_keywords": {
  613. "type": "keyword_marker",
  614. "keywords": ["Exemple"] <2>
  615. },
  616. "french_stemmer": {
  617. "type": "stemmer",
  618. "language": "light_french"
  619. }
  620. },
  621. "analyzer": {
  622. "french": {
  623. "tokenizer": "standard",
  624. "filter": [
  625. "french_elision",
  626. "lowercase",
  627. "french_stop",
  628. "french_keywords",
  629. "french_stemmer"
  630. ]
  631. }
  632. }
  633. }
  634. }
  635. }
  636. ----------------------------------------------------
  637. // CONSOLE
  638. <1> The default stopwords can be overridden with the `stopwords`
  639. or `stopwords_path` parameters.
  640. <2> This filter should be removed unless there are words which should
  641. be excluded from stemming.
  642. [[galician-analyzer]]
  643. ===== `galician` analyzer
  644. The `galician` analyzer could be reimplemented as a `custom` analyzer as follows:
  645. [source,js]
  646. ----------------------------------------------------
  647. PUT /galician_example
  648. {
  649. "settings": {
  650. "analysis": {
  651. "filter": {
  652. "galician_stop": {
  653. "type": "stop",
  654. "stopwords": "_galician_" <1>
  655. },
  656. "galician_keywords": {
  657. "type": "keyword_marker",
  658. "keywords": ["exemplo"] <2>
  659. },
  660. "galician_stemmer": {
  661. "type": "stemmer",
  662. "language": "galician"
  663. }
  664. },
  665. "analyzer": {
  666. "galician": {
  667. "tokenizer": "standard",
  668. "filter": [
  669. "lowercase",
  670. "galician_stop",
  671. "galician_keywords",
  672. "galician_stemmer"
  673. ]
  674. }
  675. }
  676. }
  677. }
  678. }
  679. ----------------------------------------------------
  680. // CONSOLE
  681. <1> The default stopwords can be overridden with the `stopwords`
  682. or `stopwords_path` parameters.
  683. <2> This filter should be removed unless there are words which should
  684. be excluded from stemming.
  685. [[german-analyzer]]
  686. ===== `german` analyzer
  687. The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
  688. [source,js]
  689. ----------------------------------------------------
  690. PUT /german_example
  691. {
  692. "settings": {
  693. "analysis": {
  694. "filter": {
  695. "german_stop": {
  696. "type": "stop",
  697. "stopwords": "_german_" <1>
  698. },
  699. "german_keywords": {
  700. "type": "keyword_marker",
  701. "keywords": ["Beispiel"] <2>
  702. },
  703. "german_stemmer": {
  704. "type": "stemmer",
  705. "language": "light_german"
  706. }
  707. },
  708. "analyzer": {
  709. "german": {
  710. "tokenizer": "standard",
  711. "filter": [
  712. "lowercase",
  713. "german_stop",
  714. "german_keywords",
  715. "german_normalization",
  716. "german_stemmer"
  717. ]
  718. }
  719. }
  720. }
  721. }
  722. }
  723. ----------------------------------------------------
  724. // CONSOLE
  725. <1> The default stopwords can be overridden with the `stopwords`
  726. or `stopwords_path` parameters.
  727. <2> This filter should be removed unless there are words which should
  728. be excluded from stemming.
  729. [[greek-analyzer]]
  730. ===== `greek` analyzer
  731. The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
  732. [source,js]
  733. ----------------------------------------------------
  734. PUT /greek_example
  735. {
  736. "settings": {
  737. "analysis": {
  738. "filter": {
  739. "greek_stop": {
  740. "type": "stop",
  741. "stopwords": "_greek_" <1>
  742. },
  743. "greek_lowercase": {
  744. "type": "lowercase",
  745. "language": "greek"
  746. },
  747. "greek_keywords": {
  748. "type": "keyword_marker",
  749. "keywords": ["παράδειγμα"] <2>
  750. },
  751. "greek_stemmer": {
  752. "type": "stemmer",
  753. "language": "greek"
  754. }
  755. },
  756. "analyzer": {
  757. "greek": {
  758. "tokenizer": "standard",
  759. "filter": [
  760. "greek_lowercase",
  761. "greek_stop",
  762. "greek_keywords",
  763. "greek_stemmer"
  764. ]
  765. }
  766. }
  767. }
  768. }
  769. }
  770. ----------------------------------------------------
  771. // CONSOLE
  772. <1> The default stopwords can be overridden with the `stopwords`
  773. or `stopwords_path` parameters.
  774. <2> This filter should be removed unless there are words which should
  775. be excluded from stemming.
  776. [[hindi-analyzer]]
  777. ===== `hindi` analyzer
  778. The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows:
  779. [source,js]
  780. ----------------------------------------------------
  781. PUT /hindi_example
  782. {
  783. "settings": {
  784. "analysis": {
  785. "filter": {
  786. "hindi_stop": {
  787. "type": "stop",
  788. "stopwords": "_hindi_" <1>
  789. },
  790. "hindi_keywords": {
  791. "type": "keyword_marker",
  792. "keywords": ["उदाहरण"] <2>
  793. },
  794. "hindi_stemmer": {
  795. "type": "stemmer",
  796. "language": "hindi"
  797. }
  798. },
  799. "analyzer": {
  800. "hindi": {
  801. "tokenizer": "standard",
  802. "filter": [
  803. "lowercase",
  804. "indic_normalization",
  805. "hindi_normalization",
  806. "hindi_stop",
  807. "hindi_keywords",
  808. "hindi_stemmer"
  809. ]
  810. }
  811. }
  812. }
  813. }
  814. }
  815. ----------------------------------------------------
  816. // CONSOLE
  817. <1> The default stopwords can be overridden with the `stopwords`
  818. or `stopwords_path` parameters.
  819. <2> This filter should be removed unless there are words which should
  820. be excluded from stemming.
  821. [[hungarian-analyzer]]
  822. ===== `hungarian` analyzer
  823. The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follows:
  824. [source,js]
  825. ----------------------------------------------------
  826. PUT /hungarian_example
  827. {
  828. "settings": {
  829. "analysis": {
  830. "filter": {
  831. "hungarian_stop": {
  832. "type": "stop",
  833. "stopwords": "_hungarian_" <1>
  834. },
  835. "hungarian_keywords": {
  836. "type": "keyword_marker",
  837. "keywords": ["példa"] <2>
  838. },
  839. "hungarian_stemmer": {
  840. "type": "stemmer",
  841. "language": "hungarian"
  842. }
  843. },
  844. "analyzer": {
  845. "hungarian": {
  846. "tokenizer": "standard",
  847. "filter": [
  848. "lowercase",
  849. "hungarian_stop",
  850. "hungarian_keywords",
  851. "hungarian_stemmer"
  852. ]
  853. }
  854. }
  855. }
  856. }
  857. }
  858. ----------------------------------------------------
  859. // CONSOLE
  860. <1> The default stopwords can be overridden with the `stopwords`
  861. or `stopwords_path` parameters.
  862. <2> This filter should be removed unless there are words which should
  863. be excluded from stemming.
  864. [[indonesian-analyzer]]
  865. ===== `indonesian` analyzer
  866. The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follows:
  867. [source,js]
  868. ----------------------------------------------------
  869. PUT /indonesian_example
  870. {
  871. "settings": {
  872. "analysis": {
  873. "filter": {
  874. "indonesian_stop": {
  875. "type": "stop",
  876. "stopwords": "_indonesian_" <1>
  877. },
  878. "indonesian_keywords": {
  879. "type": "keyword_marker",
  880. "keywords": ["contoh"] <2>
  881. },
  882. "indonesian_stemmer": {
  883. "type": "stemmer",
  884. "language": "indonesian"
  885. }
  886. },
  887. "analyzer": {
  888. "indonesian": {
  889. "tokenizer": "standard",
  890. "filter": [
  891. "lowercase",
  892. "indonesian_stop",
  893. "indonesian_keywords",
  894. "indonesian_stemmer"
  895. ]
  896. }
  897. }
  898. }
  899. }
  900. }
  901. ----------------------------------------------------
  902. // CONSOLE
  903. <1> The default stopwords can be overridden with the `stopwords`
  904. or `stopwords_path` parameters.
  905. <2> This filter should be removed unless there are words which should
  906. be excluded from stemming.
  907. [[irish-analyzer]]
  908. ===== `irish` analyzer
  909. The `irish` analyzer could be reimplemented as a `custom` analyzer as follows:
  910. [source,js]
  911. ----------------------------------------------------
  912. PUT /irish_example
  913. {
  914. "settings": {
  915. "analysis": {
  916. "filter": {
  917. "irish_elision": {
  918. "type": "elision",
  919. "articles": [ "h", "n", "t" ]
  920. },
  921. "irish_stop": {
  922. "type": "stop",
  923. "stopwords": "_irish_" <1>
  924. },
  925. "irish_lowercase": {
  926. "type": "lowercase",
  927. "language": "irish"
  928. },
  929. "irish_keywords": {
  930. "type": "keyword_marker",
  931. "keywords": ["sampla"] <2>
  932. },
  933. "irish_stemmer": {
  934. "type": "stemmer",
  935. "language": "irish"
  936. }
  937. },
  938. "analyzer": {
  939. "irish": {
  940. "tokenizer": "standard",
  941. "filter": [
  942. "irish_stop",
  943. "irish_elision",
  944. "irish_lowercase",
  945. "irish_keywords",
  946. "irish_stemmer"
  947. ]
  948. }
  949. }
  950. }
  951. }
  952. }
  953. ----------------------------------------------------
  954. // CONSOLE
  955. <1> The default stopwords can be overridden with the `stopwords`
  956. or `stopwords_path` parameters.
  957. <2> This filter should be removed unless there are words which should
  958. be excluded from stemming.
  959. [[italian-analyzer]]
  960. ===== `italian` analyzer
  961. The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
  962. [source,js]
  963. ----------------------------------------------------
  964. PUT /italian_example
  965. {
  966. "settings": {
  967. "analysis": {
  968. "filter": {
  969. "italian_elision": {
  970. "type": "elision",
  971. "articles": [
  972. "c", "l", "all", "dall", "dell",
  973. "nell", "sull", "coll", "pell",
  974. "gl", "agl", "dagl", "degl", "negl",
  975. "sugl", "un", "m", "t", "s", "v", "d"
  976. ]
  977. },
  978. "italian_stop": {
  979. "type": "stop",
  980. "stopwords": "_italian_" <1>
  981. },
  982. "italian_keywords": {
  983. "type": "keyword_marker",
  984. "keywords": ["esempio"] <2>
  985. },
  986. "italian_stemmer": {
  987. "type": "stemmer",
  988. "language": "light_italian"
  989. }
  990. },
  991. "analyzer": {
  992. "italian": {
  993. "tokenizer": "standard",
  994. "filter": [
  995. "italian_elision",
  996. "lowercase",
  997. "italian_stop",
  998. "italian_keywords",
  999. "italian_stemmer"
  1000. ]
  1001. }
  1002. }
  1003. }
  1004. }
  1005. }
  1006. ----------------------------------------------------
  1007. // CONSOLE
  1008. <1> The default stopwords can be overridden with the `stopwords`
  1009. or `stopwords_path` parameters.
  1010. <2> This filter should be removed unless there are words which should
  1011. be excluded from stemming.
  1012. [[latvian-analyzer]]
  1013. ===== `latvian` analyzer
  1014. The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1015. [source,js]
  1016. ----------------------------------------------------
  1017. PUT /latvian_example
  1018. {
  1019. "settings": {
  1020. "analysis": {
  1021. "filter": {
  1022. "latvian_stop": {
  1023. "type": "stop",
  1024. "stopwords": "_latvian_" <1>
  1025. },
  1026. "latvian_keywords": {
  1027. "type": "keyword_marker",
  1028. "keywords": ["piemērs"] <2>
  1029. },
  1030. "latvian_stemmer": {
  1031. "type": "stemmer",
  1032. "language": "latvian"
  1033. }
  1034. },
  1035. "analyzer": {
  1036. "latvian": {
  1037. "tokenizer": "standard",
  1038. "filter": [
  1039. "lowercase",
  1040. "latvian_stop",
  1041. "latvian_keywords",
  1042. "latvian_stemmer"
  1043. ]
  1044. }
  1045. }
  1046. }
  1047. }
  1048. }
  1049. ----------------------------------------------------
  1050. // CONSOLE
  1051. <1> The default stopwords can be overridden with the `stopwords`
  1052. or `stopwords_path` parameters.
  1053. <2> This filter should be removed unless there are words which should
  1054. be excluded from stemming.
  1055. [[lithuanian-analyzer]]
  1056. ===== `lithuanian` analyzer
  1057. The `lithuanian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1058. [source,js]
  1059. ----------------------------------------------------
  1060. PUT /lithuanian_example
  1061. {
  1062. "settings": {
  1063. "analysis": {
  1064. "filter": {
  1065. "lithuanian_stop": {
  1066. "type": "stop",
  1067. "stopwords": "_lithuanian_" <1>
  1068. },
  1069. "lithuanian_keywords": {
  1070. "type": "keyword_marker",
  1071. "keywords": ["pavyzdys"] <2>
  1072. },
  1073. "lithuanian_stemmer": {
  1074. "type": "stemmer",
  1075. "language": "lithuanian"
  1076. }
  1077. },
  1078. "analyzer": {
  1079. "lithuanian": {
  1080. "tokenizer": "standard",
  1081. "filter": [
  1082. "lowercase",
  1083. "lithuanian_stop",
  1084. "lithuanian_keywords",
  1085. "lithuanian_stemmer"
  1086. ]
  1087. }
  1088. }
  1089. }
  1090. }
  1091. }
  1092. ----------------------------------------------------
  1093. // CONSOLE
  1094. <1> The default stopwords can be overridden with the `stopwords`
  1095. or `stopwords_path` parameters.
  1096. <2> This filter should be removed unless there are words which should
  1097. be excluded from stemming.
  1098. [[norwegian-analyzer]]
  1099. ===== `norwegian` analyzer
  1100. The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1101. [source,js]
  1102. ----------------------------------------------------
  1103. PUT /norwegian_example
  1104. {
  1105. "settings": {
  1106. "analysis": {
  1107. "filter": {
  1108. "norwegian_stop": {
  1109. "type": "stop",
  1110. "stopwords": "_norwegian_" <1>
  1111. },
  1112. "norwegian_keywords": {
  1113. "type": "keyword_marker",
  1114. "keywords": ["eksempel"] <2>
  1115. },
  1116. "norwegian_stemmer": {
  1117. "type": "stemmer",
  1118. "language": "norwegian"
  1119. }
  1120. },
  1121. "analyzer": {
  1122. "norwegian": {
  1123. "tokenizer": "standard",
  1124. "filter": [
  1125. "lowercase",
  1126. "norwegian_stop",
  1127. "norwegian_keywords",
  1128. "norwegian_stemmer"
  1129. ]
  1130. }
  1131. }
  1132. }
  1133. }
  1134. }
  1135. ----------------------------------------------------
  1136. // CONSOLE
  1137. <1> The default stopwords can be overridden with the `stopwords`
  1138. or `stopwords_path` parameters.
  1139. <2> This filter should be removed unless there are words which should
  1140. be excluded from stemming.
  1141. [[persian-analyzer]]
  1142. ===== `persian` analyzer
  1143. The `persian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1144. [source,js]
  1145. ----------------------------------------------------
  1146. PUT /persian_example
  1147. {
  1148. "settings": {
  1149. "analysis": {
  1150. "char_filter": {
  1151. "zero_width_spaces": {
  1152. "type": "mapping",
  1153. "mappings": [ "\\u200C=> "] <1>
  1154. }
  1155. },
  1156. "filter": {
  1157. "persian_stop": {
  1158. "type": "stop",
  1159. "stopwords": "_persian_" <2>
  1160. }
  1161. },
  1162. "analyzer": {
  1163. "persian": {
  1164. "tokenizer": "standard",
  1165. "char_filter": [ "zero_width_spaces" ],
  1166. "filter": [
  1167. "lowercase",
  1168. "arabic_normalization",
  1169. "persian_normalization",
  1170. "persian_stop"
  1171. ]
  1172. }
  1173. }
  1174. }
  1175. }
  1176. }
  1177. ----------------------------------------------------
  1178. // CONSOLE
  1179. <1> Replaces zero-width non-joiners with an ASCII space.
  1180. <2> The default stopwords can be overridden with the `stopwords`
  1181. or `stopwords_path` parameters.
  1182. [[portuguese-analyzer]]
  1183. ===== `portuguese` analyzer
  1184. The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follows:
  1185. [source,js]
  1186. ----------------------------------------------------
  1187. PUT /portuguese_example
  1188. {
  1189. "settings": {
  1190. "analysis": {
  1191. "filter": {
  1192. "portuguese_stop": {
  1193. "type": "stop",
  1194. "stopwords": "_portuguese_" <1>
  1195. },
  1196. "portuguese_keywords": {
  1197. "type": "keyword_marker",
  1198. "keywords": ["exemplo"] <2>
  1199. },
  1200. "portuguese_stemmer": {
  1201. "type": "stemmer",
  1202. "language": "light_portuguese"
  1203. }
  1204. },
  1205. "analyzer": {
  1206. "portuguese": {
  1207. "tokenizer": "standard",
  1208. "filter": [
  1209. "lowercase",
  1210. "portuguese_stop",
  1211. "portuguese_keywords",
  1212. "portuguese_stemmer"
  1213. ]
  1214. }
  1215. }
  1216. }
  1217. }
  1218. }
  1219. ----------------------------------------------------
  1220. // CONSOLE
  1221. <1> The default stopwords can be overridden with the `stopwords`
  1222. or `stopwords_path` parameters.
  1223. <2> This filter should be removed unless there are words which should
  1224. be excluded from stemming.
  1225. [[romanian-analyzer]]
  1226. ===== `romanian` analyzer
  1227. The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1228. [source,js]
  1229. ----------------------------------------------------
  1230. PUT /romanian_example
  1231. {
  1232. "settings": {
  1233. "analysis": {
  1234. "filter": {
  1235. "romanian_stop": {
  1236. "type": "stop",
  1237. "stopwords": "_romanian_" <1>
  1238. },
  1239. "romanian_keywords": {
  1240. "type": "keyword_marker",
  1241. "keywords": ["exemplu"] <2>
  1242. },
  1243. "romanian_stemmer": {
  1244. "type": "stemmer",
  1245. "language": "romanian"
  1246. }
  1247. },
  1248. "analyzer": {
  1249. "romanian": {
  1250. "tokenizer": "standard",
  1251. "filter": [
  1252. "lowercase",
  1253. "romanian_stop",
  1254. "romanian_keywords",
  1255. "romanian_stemmer"
  1256. ]
  1257. }
  1258. }
  1259. }
  1260. }
  1261. }
  1262. ----------------------------------------------------
  1263. // CONSOLE
  1264. <1> The default stopwords can be overridden with the `stopwords`
  1265. or `stopwords_path` parameters.
  1266. <2> This filter should be removed unless there are words which should
  1267. be excluded from stemming.
  1268. [[russian-analyzer]]
  1269. ===== `russian` analyzer
  1270. The `russian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1271. [source,js]
  1272. ----------------------------------------------------
  1273. PUT /russian_example
  1274. {
  1275. "settings": {
  1276. "analysis": {
  1277. "filter": {
  1278. "russian_stop": {
  1279. "type": "stop",
  1280. "stopwords": "_russian_" <1>
  1281. },
  1282. "russian_keywords": {
  1283. "type": "keyword_marker",
  1284. "keywords": ["пример"] <2>
  1285. },
  1286. "russian_stemmer": {
  1287. "type": "stemmer",
  1288. "language": "russian"
  1289. }
  1290. },
  1291. "analyzer": {
  1292. "russian": {
  1293. "tokenizer": "standard",
  1294. "filter": [
  1295. "lowercase",
  1296. "russian_stop",
  1297. "russian_keywords",
  1298. "russian_stemmer"
  1299. ]
  1300. }
  1301. }
  1302. }
  1303. }
  1304. }
  1305. ----------------------------------------------------
  1306. // CONSOLE
  1307. <1> The default stopwords can be overridden with the `stopwords`
  1308. or `stopwords_path` parameters.
  1309. <2> This filter should be removed unless there are words which should
  1310. be excluded from stemming.
  1311. [[sorani-analyzer]]
  1312. ===== `sorani` analyzer
  1313. The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows:
  1314. [source,js]
  1315. ----------------------------------------------------
  1316. PUT /sorani_example
  1317. {
  1318. "settings": {
  1319. "analysis": {
  1320. "filter": {
  1321. "sorani_stop": {
  1322. "type": "stop",
  1323. "stopwords": "_sorani_" <1>
  1324. },
  1325. "sorani_keywords": {
  1326. "type": "keyword_marker",
  1327. "keywords": ["mînak"] <2>
  1328. },
  1329. "sorani_stemmer": {
  1330. "type": "stemmer",
  1331. "language": "sorani"
  1332. }
  1333. },
  1334. "analyzer": {
  1335. "sorani": {
  1336. "tokenizer": "standard",
  1337. "filter": [
  1338. "sorani_normalization",
  1339. "lowercase",
  1340. "sorani_stop",
  1341. "sorani_keywords",
  1342. "sorani_stemmer"
  1343. ]
  1344. }
  1345. }
  1346. }
  1347. }
  1348. }
  1349. ----------------------------------------------------
  1350. // CONSOLE
  1351. <1> The default stopwords can be overridden with the `stopwords`
  1352. or `stopwords_path` parameters.
  1353. <2> This filter should be removed unless there are words which should
  1354. be excluded from stemming.
  1355. [[spanish-analyzer]]
  1356. ===== `spanish` analyzer
  1357. The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1358. [source,js]
  1359. ----------------------------------------------------
  1360. PUT /spanish_example
  1361. {
  1362. "settings": {
  1363. "analysis": {
  1364. "filter": {
  1365. "spanish_stop": {
  1366. "type": "stop",
  1367. "stopwords": "_spanish_" <1>
  1368. },
  1369. "spanish_keywords": {
  1370. "type": "keyword_marker",
  1371. "keywords": ["ejemplo"] <2>
  1372. },
  1373. "spanish_stemmer": {
  1374. "type": "stemmer",
  1375. "language": "light_spanish"
  1376. }
  1377. },
  1378. "analyzer": {
  1379. "spanish": {
  1380. "tokenizer": "standard",
  1381. "filter": [
  1382. "lowercase",
  1383. "spanish_stop",
  1384. "spanish_keywords",
  1385. "spanish_stemmer"
  1386. ]
  1387. }
  1388. }
  1389. }
  1390. }
  1391. }
  1392. ----------------------------------------------------
  1393. // CONSOLE
  1394. <1> The default stopwords can be overridden with the `stopwords`
  1395. or `stopwords_path` parameters.
  1396. <2> This filter should be removed unless there are words which should
  1397. be excluded from stemming.
  1398. [[swedish-analyzer]]
  1399. ===== `swedish` analyzer
  1400. The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1401. [source,js]
  1402. ----------------------------------------------------
  1403. PUT /swidish_example
  1404. {
  1405. "settings": {
  1406. "analysis": {
  1407. "filter": {
  1408. "swedish_stop": {
  1409. "type": "stop",
  1410. "stopwords": "_swedish_" <1>
  1411. },
  1412. "swedish_keywords": {
  1413. "type": "keyword_marker",
  1414. "keywords": ["exempel"] <2>
  1415. },
  1416. "swedish_stemmer": {
  1417. "type": "stemmer",
  1418. "language": "swedish"
  1419. }
  1420. },
  1421. "analyzer": {
  1422. "swedish": {
  1423. "tokenizer": "standard",
  1424. "filter": [
  1425. "lowercase",
  1426. "swedish_stop",
  1427. "swedish_keywords",
  1428. "swedish_stemmer"
  1429. ]
  1430. }
  1431. }
  1432. }
  1433. }
  1434. }
  1435. ----------------------------------------------------
  1436. // CONSOLE
  1437. <1> The default stopwords can be overridden with the `stopwords`
  1438. or `stopwords_path` parameters.
  1439. <2> This filter should be removed unless there are words which should
  1440. be excluded from stemming.
  1441. [[turkish-analyzer]]
  1442. ===== `turkish` analyzer
  1443. The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1444. [source,js]
  1445. ----------------------------------------------------
  1446. PUT /turkish_example
  1447. {
  1448. "settings": {
  1449. "analysis": {
  1450. "filter": {
  1451. "turkish_stop": {
  1452. "type": "stop",
  1453. "stopwords": "_turkish_" <1>
  1454. },
  1455. "turkish_lowercase": {
  1456. "type": "lowercase",
  1457. "language": "turkish"
  1458. },
  1459. "turkish_keywords": {
  1460. "type": "keyword_marker",
  1461. "keywords": ["örnek"] <2>
  1462. },
  1463. "turkish_stemmer": {
  1464. "type": "stemmer",
  1465. "language": "turkish"
  1466. }
  1467. },
  1468. "analyzer": {
  1469. "turkish": {
  1470. "tokenizer": "standard",
  1471. "filter": [
  1472. "apostrophe",
  1473. "turkish_lowercase",
  1474. "turkish_stop",
  1475. "turkish_keywords",
  1476. "turkish_stemmer"
  1477. ]
  1478. }
  1479. }
  1480. }
  1481. }
  1482. }
  1483. ----------------------------------------------------
  1484. // CONSOLE
  1485. <1> The default stopwords can be overridden with the `stopwords`
  1486. or `stopwords_path` parameters.
  1487. <2> This filter should be removed unless there are words which should
  1488. be excluded from stemming.
  1489. [[thai-analyzer]]
  1490. ===== `thai` analyzer
  1491. The `thai` analyzer could be reimplemented as a `custom` analyzer as follows:
  1492. [source,js]
  1493. ----------------------------------------------------
  1494. PUT /thai_example
  1495. {
  1496. "settings": {
  1497. "analysis": {
  1498. "filter": {
  1499. "thai_stop": {
  1500. "type": "stop",
  1501. "stopwords": "_thai_" <1>
  1502. }
  1503. },
  1504. "analyzer": {
  1505. "thai": {
  1506. "tokenizer": "thai",
  1507. "filter": [
  1508. "lowercase",
  1509. "thai_stop"
  1510. ]
  1511. }
  1512. }
  1513. }
  1514. }
  1515. }
  1516. ----------------------------------------------------
  1517. // CONSOLE
  1518. <1> The default stopwords can be overridden with the `stopwords`
  1519. or `stopwords_path` parameters.