lang-analyzer.asciidoc 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679
  1. [[analysis-lang-analyzer]]
  2. === Language Analyzers
  3. A set of analyzers aimed at analyzing specific language text. The
  4. following types are supported:
  5. <<arabic-analyzer,`arabic`>>,
  6. <<armenian-analyzer,`armenian`>>,
  7. <<basque-analyzer,`basque`>>,
  8. <<bengali-analyzer,`bengali`>>,
  9. <<brazilian-analyzer,`brazilian`>>,
  10. <<bulgarian-analyzer,`bulgarian`>>,
  11. <<catalan-analyzer,`catalan`>>,
  12. <<cjk-analyzer,`cjk`>>,
  13. <<czech-analyzer,`czech`>>,
  14. <<danish-analyzer,`danish`>>,
  15. <<dutch-analyzer,`dutch`>>,
  16. <<english-analyzer,`english`>>,
  17. <<finnish-analyzer,`finnish`>>,
  18. <<french-analyzer,`french`>>,
  19. <<galician-analyzer,`galician`>>,
  20. <<german-analyzer,`german`>>,
  21. <<greek-analyzer,`greek`>>,
  22. <<hindi-analyzer,`hindi`>>,
  23. <<hungarian-analyzer,`hungarian`>>,
  24. <<indonesian-analyzer,`indonesian`>>,
  25. <<irish-analyzer,`irish`>>,
  26. <<italian-analyzer,`italian`>>,
  27. <<latvian-analyzer,`latvian`>>,
  28. <<lithuanian-analyzer,`lithuanian`>>,
  29. <<norwegian-analyzer,`norwegian`>>,
  30. <<persian-analyzer,`persian`>>,
  31. <<portuguese-analyzer,`portuguese`>>,
  32. <<romanian-analyzer,`romanian`>>,
  33. <<russian-analyzer,`russian`>>,
  34. <<sorani-analyzer,`sorani`>>,
  35. <<spanish-analyzer,`spanish`>>,
  36. <<swedish-analyzer,`swedish`>>,
  37. <<turkish-analyzer,`turkish`>>,
  38. <<thai-analyzer,`thai`>>.
  39. ==== Configuring language analyzers
  40. ===== Stopwords
  41. All analyzers support setting custom `stopwords` either internally in
  42. the config, or by using an external stopwords file by setting
  43. `stopwords_path`. Check <<analysis-stop-analyzer,Stop Analyzer>> for
  44. more details.
  45. ===== Excluding words from stemming
  46. The `stem_exclusion` parameter allows you to specify an array
  47. of lowercase words that should not be stemmed. Internally, this
  48. functionality is implemented by adding the
  49. <<analysis-keyword-marker-tokenfilter,`keyword_marker` token filter>>
  50. with the `keywords` set to the value of the `stem_exclusion` parameter.
  51. The following analyzers support setting custom `stem_exclusion` list:
  52. `arabic`, `armenian`, `basque`, `bengali`, `bulgarian`, `catalan`, `czech`,
  53. `dutch`, `english`, `finnish`, `french`, `galician`,
  54. `german`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`,
  55. `lithuanian`, `norwegian`, `portuguese`, `romanian`, `russian`, `sorani`,
  56. `spanish`, `swedish`, `turkish`.
  57. ==== Reimplementing language analyzers
  58. The built-in language analyzers can be reimplemented as `custom` analyzers
  59. (as described below) in order to customize their behaviour.
  60. NOTE: If you do not intend to exclude words from being stemmed (the
  61. equivalent of the `stem_exclusion` parameter above), then you should remove
  62. the `keyword_marker` token filter from the custom analyzer configuration.
  63. [[arabic-analyzer]]
  64. ===== `arabic` analyzer
  65. The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows:
  66. [source,js]
  67. ----------------------------------------------------
  68. PUT /arabic_example
  69. {
  70. "settings": {
  71. "analysis": {
  72. "filter": {
  73. "arabic_stop": {
  74. "type": "stop",
  75. "stopwords": "_arabic_" <1>
  76. },
  77. "arabic_keywords": {
  78. "type": "keyword_marker",
  79. "keywords": ["مثال"] <2>
  80. },
  81. "arabic_stemmer": {
  82. "type": "stemmer",
  83. "language": "arabic"
  84. }
  85. },
  86. "analyzer": {
  87. "arabic": {
  88. "tokenizer": "standard",
  89. "filter": [
  90. "lowercase",
  91. "arabic_stop",
  92. "arabic_normalization",
  93. "arabic_keywords",
  94. "arabic_stemmer"
  95. ]
  96. }
  97. }
  98. }
  99. }
  100. }
  101. ----------------------------------------------------
  102. // CONSOLE
  103. <1> The default stopwords can be overridden with the `stopwords`
  104. or `stopwords_path` parameters.
  105. <2> This filter should be removed unless there are words which should
  106. be excluded from stemming.
  107. [[armenian-analyzer]]
  108. ===== `armenian` analyzer
  109. The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows:
  110. [source,js]
  111. ----------------------------------------------------
  112. PUT /armenian_example
  113. {
  114. "settings": {
  115. "analysis": {
  116. "filter": {
  117. "armenian_stop": {
  118. "type": "stop",
  119. "stopwords": "_armenian_" <1>
  120. },
  121. "armenian_keywords": {
  122. "type": "keyword_marker",
  123. "keywords": ["օրինակ"] <2>
  124. },
  125. "armenian_stemmer": {
  126. "type": "stemmer",
  127. "language": "armenian"
  128. }
  129. },
  130. "analyzer": {
  131. "armenian": {
  132. "tokenizer": "standard",
  133. "filter": [
  134. "lowercase",
  135. "armenian_stop",
  136. "armenian_keywords",
  137. "armenian_stemmer"
  138. ]
  139. }
  140. }
  141. }
  142. }
  143. }
  144. ----------------------------------------------------
  145. // CONSOLE
  146. <1> The default stopwords can be overridden with the `stopwords`
  147. or `stopwords_path` parameters.
  148. <2> This filter should be removed unless there are words which should
  149. be excluded from stemming.
  150. [[basque-analyzer]]
  151. ===== `basque` analyzer
  152. The `basque` analyzer could be reimplemented as a `custom` analyzer as follows:
  153. [source,js]
  154. ----------------------------------------------------
  155. PUT /basque_example
  156. {
  157. "settings": {
  158. "analysis": {
  159. "filter": {
  160. "basque_stop": {
  161. "type": "stop",
  162. "stopwords": "_basque_" <1>
  163. },
  164. "basque_keywords": {
  165. "type": "keyword_marker",
  166. "keywords": ["Adibidez"] <2>
  167. },
  168. "basque_stemmer": {
  169. "type": "stemmer",
  170. "language": "basque"
  171. }
  172. },
  173. "analyzer": {
  174. "basque": {
  175. "tokenizer": "standard",
  176. "filter": [
  177. "lowercase",
  178. "basque_stop",
  179. "basque_keywords",
  180. "basque_stemmer"
  181. ]
  182. }
  183. }
  184. }
  185. }
  186. }
  187. ----------------------------------------------------
  188. // CONSOLE
  189. <1> The default stopwords can be overridden with the `stopwords`
  190. or `stopwords_path` parameters.
  191. <2> This filter should be removed unless there are words which should
  192. be excluded from stemming.
  193. [[bengali-analyzer]]
  194. ===== `bengali` analyzer
  195. The `bengali` analyzer could be reimplemented as a `custom` analyzer as follows:
  196. [source,js]
  197. ----------------------------------------------------
  198. PUT /bengali_example
  199. {
  200. "settings": {
  201. "analysis": {
  202. "filter": {
  203. "bengali_stop": {
  204. "type": "stop",
  205. "stopwords": "_bengali_" <1>
  206. },
  207. "bengali_keywords": {
  208. "type": "keyword_marker",
  209. "keywords": ["উদাহরণ"] <2>
  210. },
  211. "bengali_stemmer": {
  212. "type": "stemmer",
  213. "language": "bengali"
  214. }
  215. },
  216. "analyzer": {
  217. "bengali": {
  218. "tokenizer": "standard",
  219. "filter": [
  220. "lowercase",
  221. "indic_normalization",
  222. "bengali_normalization",
  223. "bengali_stop",
  224. "bengali_keywords",
  225. "bengali_stemmer"
  226. ]
  227. }
  228. }
  229. }
  230. }
  231. }
  232. ----------------------------------------------------
  233. // CONSOLE
  234. <1> The default stopwords can be overridden with the `stopwords`
  235. or `stopwords_path` parameters.
  236. <2> This filter should be removed unless there are words which should
  237. be excluded from stemming.
  238. [[brazilian-analyzer]]
  239. ===== `brazilian` analyzer
  240. The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follows:
  241. [source,js]
  242. ----------------------------------------------------
  243. PUT /brazilian_example
  244. {
  245. "settings": {
  246. "analysis": {
  247. "filter": {
  248. "brazilian_stop": {
  249. "type": "stop",
  250. "stopwords": "_brazilian_" <1>
  251. },
  252. "brazilian_keywords": {
  253. "type": "keyword_marker",
  254. "keywords": ["exemplo"] <2>
  255. },
  256. "brazilian_stemmer": {
  257. "type": "stemmer",
  258. "language": "brazilian"
  259. }
  260. },
  261. "analyzer": {
  262. "brazilian": {
  263. "tokenizer": "standard",
  264. "filter": [
  265. "lowercase",
  266. "brazilian_stop",
  267. "brazilian_keywords",
  268. "brazilian_stemmer"
  269. ]
  270. }
  271. }
  272. }
  273. }
  274. }
  275. ----------------------------------------------------
  276. // CONSOLE
  277. <1> The default stopwords can be overridden with the `stopwords`
  278. or `stopwords_path` parameters.
  279. <2> This filter should be removed unless there are words which should
  280. be excluded from stemming.
  281. [[bulgarian-analyzer]]
  282. ===== `bulgarian` analyzer
  283. The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follows:
  284. [source,js]
  285. ----------------------------------------------------
  286. PUT /bulgarian_example
  287. {
  288. "settings": {
  289. "analysis": {
  290. "filter": {
  291. "bulgarian_stop": {
  292. "type": "stop",
  293. "stopwords": "_bulgarian_" <1>
  294. },
  295. "bulgarian_keywords": {
  296. "type": "keyword_marker",
  297. "keywords": ["пример"] <2>
  298. },
  299. "bulgarian_stemmer": {
  300. "type": "stemmer",
  301. "language": "bulgarian"
  302. }
  303. },
  304. "analyzer": {
  305. "bulgarian": {
  306. "tokenizer": "standard",
  307. "filter": [
  308. "lowercase",
  309. "bulgarian_stop",
  310. "bulgarian_keywords",
  311. "bulgarian_stemmer"
  312. ]
  313. }
  314. }
  315. }
  316. }
  317. }
  318. ----------------------------------------------------
  319. // CONSOLE
  320. <1> The default stopwords can be overridden with the `stopwords`
  321. or `stopwords_path` parameters.
  322. <2> This filter should be removed unless there are words which should
  323. be excluded from stemming.
  324. [[catalan-analyzer]]
  325. ===== `catalan` analyzer
  326. The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows:
  327. [source,js]
  328. ----------------------------------------------------
  329. PUT /catalan_example
  330. {
  331. "settings": {
  332. "analysis": {
  333. "filter": {
  334. "catalan_elision": {
  335. "type": "elision",
  336. "articles": [ "d", "l", "m", "n", "s", "t"]
  337. },
  338. "catalan_stop": {
  339. "type": "stop",
  340. "stopwords": "_catalan_" <1>
  341. },
  342. "catalan_keywords": {
  343. "type": "keyword_marker",
  344. "keywords": ["exemple"] <2>
  345. },
  346. "catalan_stemmer": {
  347. "type": "stemmer",
  348. "language": "catalan"
  349. }
  350. },
  351. "analyzer": {
  352. "catalan": {
  353. "tokenizer": "standard",
  354. "filter": [
  355. "catalan_elision",
  356. "lowercase",
  357. "catalan_stop",
  358. "catalan_keywords",
  359. "catalan_stemmer"
  360. ]
  361. }
  362. }
  363. }
  364. }
  365. }
  366. ----------------------------------------------------
  367. // CONSOLE
  368. <1> The default stopwords can be overridden with the `stopwords`
  369. or `stopwords_path` parameters.
  370. <2> This filter should be removed unless there are words which should
  371. be excluded from stemming.
  372. [[cjk-analyzer]]
  373. ===== `cjk` analyzer
  374. The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows:
  375. [source,js]
  376. ----------------------------------------------------
  377. PUT /cjk_example
  378. {
  379. "settings": {
  380. "analysis": {
  381. "filter": {
  382. "english_stop": {
  383. "type": "stop",
  384. "stopwords": "_english_" <1>
  385. }
  386. },
  387. "analyzer": {
  388. "cjk": {
  389. "tokenizer": "standard",
  390. "filter": [
  391. "cjk_width",
  392. "lowercase",
  393. "cjk_bigram",
  394. "english_stop"
  395. ]
  396. }
  397. }
  398. }
  399. }
  400. }
  401. ----------------------------------------------------
  402. // CONSOLE
  403. <1> The default stopwords can be overridden with the `stopwords`
  404. or `stopwords_path` parameters.
  405. [[czech-analyzer]]
  406. ===== `czech` analyzer
  407. The `czech` analyzer could be reimplemented as a `custom` analyzer as follows:
  408. [source,js]
  409. ----------------------------------------------------
  410. PUT /czech_example
  411. {
  412. "settings": {
  413. "analysis": {
  414. "filter": {
  415. "czech_stop": {
  416. "type": "stop",
  417. "stopwords": "_czech_" <1>
  418. },
  419. "czech_keywords": {
  420. "type": "keyword_marker",
  421. "keywords": ["příklad"] <2>
  422. },
  423. "czech_stemmer": {
  424. "type": "stemmer",
  425. "language": "czech"
  426. }
  427. },
  428. "analyzer": {
  429. "czech": {
  430. "tokenizer": "standard",
  431. "filter": [
  432. "lowercase",
  433. "czech_stop",
  434. "czech_keywords",
  435. "czech_stemmer"
  436. ]
  437. }
  438. }
  439. }
  440. }
  441. }
  442. ----------------------------------------------------
  443. // CONSOLE
  444. <1> The default stopwords can be overridden with the `stopwords`
  445. or `stopwords_path` parameters.
  446. <2> This filter should be removed unless there are words which should
  447. be excluded from stemming.
  448. [[danish-analyzer]]
  449. ===== `danish` analyzer
  450. The `danish` analyzer could be reimplemented as a `custom` analyzer as follows:
  451. [source,js]
  452. ----------------------------------------------------
  453. PUT /danish_example
  454. {
  455. "settings": {
  456. "analysis": {
  457. "filter": {
  458. "danish_stop": {
  459. "type": "stop",
  460. "stopwords": "_danish_" <1>
  461. },
  462. "danish_keywords": {
  463. "type": "keyword_marker",
  464. "keywords": ["eksempel"] <2>
  465. },
  466. "danish_stemmer": {
  467. "type": "stemmer",
  468. "language": "danish"
  469. }
  470. },
  471. "analyzer": {
  472. "danish": {
  473. "tokenizer": "standard",
  474. "filter": [
  475. "lowercase",
  476. "danish_stop",
  477. "danish_keywords",
  478. "danish_stemmer"
  479. ]
  480. }
  481. }
  482. }
  483. }
  484. }
  485. ----------------------------------------------------
  486. // CONSOLE
  487. <1> The default stopwords can be overridden with the `stopwords`
  488. or `stopwords_path` parameters.
  489. <2> This filter should be removed unless there are words which should
  490. be excluded from stemming.
  491. [[dutch-analyzer]]
  492. ===== `dutch` analyzer
  493. The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows:
  494. [source,js]
  495. ----------------------------------------------------
  496. PUT /dutch_example
  497. {
  498. "settings": {
  499. "analysis": {
  500. "filter": {
  501. "dutch_stop": {
  502. "type": "stop",
  503. "stopwords": "_dutch_" <1>
  504. },
  505. "dutch_keywords": {
  506. "type": "keyword_marker",
  507. "keywords": ["voorbeeld"] <2>
  508. },
  509. "dutch_stemmer": {
  510. "type": "stemmer",
  511. "language": "dutch"
  512. },
  513. "dutch_override": {
  514. "type": "stemmer_override",
  515. "rules": [
  516. "fiets=>fiets",
  517. "bromfiets=>bromfiets",
  518. "ei=>eier",
  519. "kind=>kinder"
  520. ]
  521. }
  522. },
  523. "analyzer": {
  524. "dutch": {
  525. "tokenizer": "standard",
  526. "filter": [
  527. "lowercase",
  528. "dutch_stop",
  529. "dutch_keywords",
  530. "dutch_override",
  531. "dutch_stemmer"
  532. ]
  533. }
  534. }
  535. }
  536. }
  537. }
  538. ----------------------------------------------------
  539. // CONSOLE
  540. <1> The default stopwords can be overridden with the `stopwords`
  541. or `stopwords_path` parameters.
  542. <2> This filter should be removed unless there are words which should
  543. be excluded from stemming.
  544. [[english-analyzer]]
  545. ===== `english` analyzer
  546. The `english` analyzer could be reimplemented as a `custom` analyzer as follows:
  547. [source,js]
  548. ----------------------------------------------------
  549. PUT /english_example
  550. {
  551. "settings": {
  552. "analysis": {
  553. "filter": {
  554. "english_stop": {
  555. "type": "stop",
  556. "stopwords": "_english_" <1>
  557. },
  558. "english_keywords": {
  559. "type": "keyword_marker",
  560. "keywords": ["example"] <2>
  561. },
  562. "english_stemmer": {
  563. "type": "stemmer",
  564. "language": "english"
  565. },
  566. "english_possessive_stemmer": {
  567. "type": "stemmer",
  568. "language": "possessive_english"
  569. }
  570. },
  571. "analyzer": {
  572. "english": {
  573. "tokenizer": "standard",
  574. "filter": [
  575. "english_possessive_stemmer",
  576. "lowercase",
  577. "english_stop",
  578. "english_keywords",
  579. "english_stemmer"
  580. ]
  581. }
  582. }
  583. }
  584. }
  585. }
  586. ----------------------------------------------------
  587. // CONSOLE
  588. <1> The default stopwords can be overridden with the `stopwords`
  589. or `stopwords_path` parameters.
  590. <2> This filter should be removed unless there are words which should
  591. be excluded from stemming.
  592. [[finnish-analyzer]]
  593. ===== `finnish` analyzer
  594. The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows:
  595. [source,js]
  596. ----------------------------------------------------
  597. PUT /finnish_example
  598. {
  599. "settings": {
  600. "analysis": {
  601. "filter": {
  602. "finnish_stop": {
  603. "type": "stop",
  604. "stopwords": "_finnish_" <1>
  605. },
  606. "finnish_keywords": {
  607. "type": "keyword_marker",
  608. "keywords": ["esimerkki"] <2>
  609. },
  610. "finnish_stemmer": {
  611. "type": "stemmer",
  612. "language": "finnish"
  613. }
  614. },
  615. "analyzer": {
  616. "finnish": {
  617. "tokenizer": "standard",
  618. "filter": [
  619. "lowercase",
  620. "finnish_stop",
  621. "finnish_keywords",
  622. "finnish_stemmer"
  623. ]
  624. }
  625. }
  626. }
  627. }
  628. }
  629. ----------------------------------------------------
  630. // CONSOLE
  631. <1> The default stopwords can be overridden with the `stopwords`
  632. or `stopwords_path` parameters.
  633. <2> This filter should be removed unless there are words which should
  634. be excluded from stemming.
  635. [[french-analyzer]]
  636. ===== `french` analyzer
  637. The `french` analyzer could be reimplemented as a `custom` analyzer as follows:
  638. [source,js]
  639. ----------------------------------------------------
  640. PUT /french_example
  641. {
  642. "settings": {
  643. "analysis": {
  644. "filter": {
  645. "french_elision": {
  646. "type": "elision",
  647. "articles_case": true,
  648. "articles": [
  649. "l", "m", "t", "qu", "n", "s",
  650. "j", "d", "c", "jusqu", "quoiqu",
  651. "lorsqu", "puisqu"
  652. ]
  653. },
  654. "french_stop": {
  655. "type": "stop",
  656. "stopwords": "_french_" <1>
  657. },
  658. "french_keywords": {
  659. "type": "keyword_marker",
  660. "keywords": ["Exemple"] <2>
  661. },
  662. "french_stemmer": {
  663. "type": "stemmer",
  664. "language": "light_french"
  665. }
  666. },
  667. "analyzer": {
  668. "french": {
  669. "tokenizer": "standard",
  670. "filter": [
  671. "french_elision",
  672. "lowercase",
  673. "french_stop",
  674. "french_keywords",
  675. "french_stemmer"
  676. ]
  677. }
  678. }
  679. }
  680. }
  681. }
  682. ----------------------------------------------------
  683. // CONSOLE
  684. <1> The default stopwords can be overridden with the `stopwords`
  685. or `stopwords_path` parameters.
  686. <2> This filter should be removed unless there are words which should
  687. be excluded from stemming.
  688. [[galician-analyzer]]
  689. ===== `galician` analyzer
  690. The `galician` analyzer could be reimplemented as a `custom` analyzer as follows:
  691. [source,js]
  692. ----------------------------------------------------
  693. PUT /galician_example
  694. {
  695. "settings": {
  696. "analysis": {
  697. "filter": {
  698. "galician_stop": {
  699. "type": "stop",
  700. "stopwords": "_galician_" <1>
  701. },
  702. "galician_keywords": {
  703. "type": "keyword_marker",
  704. "keywords": ["exemplo"] <2>
  705. },
  706. "galician_stemmer": {
  707. "type": "stemmer",
  708. "language": "galician"
  709. }
  710. },
  711. "analyzer": {
  712. "galician": {
  713. "tokenizer": "standard",
  714. "filter": [
  715. "lowercase",
  716. "galician_stop",
  717. "galician_keywords",
  718. "galician_stemmer"
  719. ]
  720. }
  721. }
  722. }
  723. }
  724. }
  725. ----------------------------------------------------
  726. // CONSOLE
  727. <1> The default stopwords can be overridden with the `stopwords`
  728. or `stopwords_path` parameters.
  729. <2> This filter should be removed unless there are words which should
  730. be excluded from stemming.
  731. [[german-analyzer]]
  732. ===== `german` analyzer
  733. The `german` analyzer could be reimplemented as a `custom` analyzer as follows:
  734. [source,js]
  735. ----------------------------------------------------
  736. PUT /german_example
  737. {
  738. "settings": {
  739. "analysis": {
  740. "filter": {
  741. "german_stop": {
  742. "type": "stop",
  743. "stopwords": "_german_" <1>
  744. },
  745. "german_keywords": {
  746. "type": "keyword_marker",
  747. "keywords": ["Beispiel"] <2>
  748. },
  749. "german_stemmer": {
  750. "type": "stemmer",
  751. "language": "light_german"
  752. }
  753. },
  754. "analyzer": {
  755. "german": {
  756. "tokenizer": "standard",
  757. "filter": [
  758. "lowercase",
  759. "german_stop",
  760. "german_keywords",
  761. "german_normalization",
  762. "german_stemmer"
  763. ]
  764. }
  765. }
  766. }
  767. }
  768. }
  769. ----------------------------------------------------
  770. // CONSOLE
  771. <1> The default stopwords can be overridden with the `stopwords`
  772. or `stopwords_path` parameters.
  773. <2> This filter should be removed unless there are words which should
  774. be excluded from stemming.
  775. [[greek-analyzer]]
  776. ===== `greek` analyzer
  777. The `greek` analyzer could be reimplemented as a `custom` analyzer as follows:
  778. [source,js]
  779. ----------------------------------------------------
  780. PUT /greek_example
  781. {
  782. "settings": {
  783. "analysis": {
  784. "filter": {
  785. "greek_stop": {
  786. "type": "stop",
  787. "stopwords": "_greek_" <1>
  788. },
  789. "greek_lowercase": {
  790. "type": "lowercase",
  791. "language": "greek"
  792. },
  793. "greek_keywords": {
  794. "type": "keyword_marker",
  795. "keywords": ["παράδειγμα"] <2>
  796. },
  797. "greek_stemmer": {
  798. "type": "stemmer",
  799. "language": "greek"
  800. }
  801. },
  802. "analyzer": {
  803. "greek": {
  804. "tokenizer": "standard",
  805. "filter": [
  806. "greek_lowercase",
  807. "greek_stop",
  808. "greek_keywords",
  809. "greek_stemmer"
  810. ]
  811. }
  812. }
  813. }
  814. }
  815. }
  816. ----------------------------------------------------
  817. // CONSOLE
  818. <1> The default stopwords can be overridden with the `stopwords`
  819. or `stopwords_path` parameters.
  820. <2> This filter should be removed unless there are words which should
  821. be excluded from stemming.
  822. [[hindi-analyzer]]
  823. ===== `hindi` analyzer
  824. The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows:
  825. [source,js]
  826. ----------------------------------------------------
  827. PUT /hindi_example
  828. {
  829. "settings": {
  830. "analysis": {
  831. "filter": {
  832. "hindi_stop": {
  833. "type": "stop",
  834. "stopwords": "_hindi_" <1>
  835. },
  836. "hindi_keywords": {
  837. "type": "keyword_marker",
  838. "keywords": ["उदाहरण"] <2>
  839. },
  840. "hindi_stemmer": {
  841. "type": "stemmer",
  842. "language": "hindi"
  843. }
  844. },
  845. "analyzer": {
  846. "hindi": {
  847. "tokenizer": "standard",
  848. "filter": [
  849. "lowercase",
  850. "indic_normalization",
  851. "hindi_normalization",
  852. "hindi_stop",
  853. "hindi_keywords",
  854. "hindi_stemmer"
  855. ]
  856. }
  857. }
  858. }
  859. }
  860. }
  861. ----------------------------------------------------
  862. // CONSOLE
  863. <1> The default stopwords can be overridden with the `stopwords`
  864. or `stopwords_path` parameters.
  865. <2> This filter should be removed unless there are words which should
  866. be excluded from stemming.
  867. [[hungarian-analyzer]]
  868. ===== `hungarian` analyzer
  869. The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follows:
  870. [source,js]
  871. ----------------------------------------------------
  872. PUT /hungarian_example
  873. {
  874. "settings": {
  875. "analysis": {
  876. "filter": {
  877. "hungarian_stop": {
  878. "type": "stop",
  879. "stopwords": "_hungarian_" <1>
  880. },
  881. "hungarian_keywords": {
  882. "type": "keyword_marker",
  883. "keywords": ["példa"] <2>
  884. },
  885. "hungarian_stemmer": {
  886. "type": "stemmer",
  887. "language": "hungarian"
  888. }
  889. },
  890. "analyzer": {
  891. "hungarian": {
  892. "tokenizer": "standard",
  893. "filter": [
  894. "lowercase",
  895. "hungarian_stop",
  896. "hungarian_keywords",
  897. "hungarian_stemmer"
  898. ]
  899. }
  900. }
  901. }
  902. }
  903. }
  904. ----------------------------------------------------
  905. // CONSOLE
  906. <1> The default stopwords can be overridden with the `stopwords`
  907. or `stopwords_path` parameters.
  908. <2> This filter should be removed unless there are words which should
  909. be excluded from stemming.
  910. [[indonesian-analyzer]]
  911. ===== `indonesian` analyzer
  912. The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follows:
  913. [source,js]
  914. ----------------------------------------------------
  915. PUT /indonesian_example
  916. {
  917. "settings": {
  918. "analysis": {
  919. "filter": {
  920. "indonesian_stop": {
  921. "type": "stop",
  922. "stopwords": "_indonesian_" <1>
  923. },
  924. "indonesian_keywords": {
  925. "type": "keyword_marker",
  926. "keywords": ["contoh"] <2>
  927. },
  928. "indonesian_stemmer": {
  929. "type": "stemmer",
  930. "language": "indonesian"
  931. }
  932. },
  933. "analyzer": {
  934. "indonesian": {
  935. "tokenizer": "standard",
  936. "filter": [
  937. "lowercase",
  938. "indonesian_stop",
  939. "indonesian_keywords",
  940. "indonesian_stemmer"
  941. ]
  942. }
  943. }
  944. }
  945. }
  946. }
  947. ----------------------------------------------------
  948. // CONSOLE
  949. <1> The default stopwords can be overridden with the `stopwords`
  950. or `stopwords_path` parameters.
  951. <2> This filter should be removed unless there are words which should
  952. be excluded from stemming.
  953. [[irish-analyzer]]
  954. ===== `irish` analyzer
  955. The `irish` analyzer could be reimplemented as a `custom` analyzer as follows:
  956. [source,js]
  957. ----------------------------------------------------
  958. PUT /irish_example
  959. {
  960. "settings": {
  961. "analysis": {
  962. "filter": {
  963. "irish_elision": {
  964. "type": "elision",
  965. "articles": [ "h", "n", "t" ]
  966. },
  967. "irish_stop": {
  968. "type": "stop",
  969. "stopwords": "_irish_" <1>
  970. },
  971. "irish_lowercase": {
  972. "type": "lowercase",
  973. "language": "irish"
  974. },
  975. "irish_keywords": {
  976. "type": "keyword_marker",
  977. "keywords": ["sampla"] <2>
  978. },
  979. "irish_stemmer": {
  980. "type": "stemmer",
  981. "language": "irish"
  982. }
  983. },
  984. "analyzer": {
  985. "irish": {
  986. "tokenizer": "standard",
  987. "filter": [
  988. "irish_stop",
  989. "irish_elision",
  990. "irish_lowercase",
  991. "irish_keywords",
  992. "irish_stemmer"
  993. ]
  994. }
  995. }
  996. }
  997. }
  998. }
  999. ----------------------------------------------------
  1000. // CONSOLE
  1001. <1> The default stopwords can be overridden with the `stopwords`
  1002. or `stopwords_path` parameters.
  1003. <2> This filter should be removed unless there are words which should
  1004. be excluded from stemming.
  1005. [[italian-analyzer]]
  1006. ===== `italian` analyzer
  1007. The `italian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1008. [source,js]
  1009. ----------------------------------------------------
  1010. PUT /italian_example
  1011. {
  1012. "settings": {
  1013. "analysis": {
  1014. "filter": {
  1015. "italian_elision": {
  1016. "type": "elision",
  1017. "articles": [
  1018. "c", "l", "all", "dall", "dell",
  1019. "nell", "sull", "coll", "pell",
  1020. "gl", "agl", "dagl", "degl", "negl",
  1021. "sugl", "un", "m", "t", "s", "v", "d"
  1022. ]
  1023. },
  1024. "italian_stop": {
  1025. "type": "stop",
  1026. "stopwords": "_italian_" <1>
  1027. },
  1028. "italian_keywords": {
  1029. "type": "keyword_marker",
  1030. "keywords": ["esempio"] <2>
  1031. },
  1032. "italian_stemmer": {
  1033. "type": "stemmer",
  1034. "language": "light_italian"
  1035. }
  1036. },
  1037. "analyzer": {
  1038. "italian": {
  1039. "tokenizer": "standard",
  1040. "filter": [
  1041. "italian_elision",
  1042. "lowercase",
  1043. "italian_stop",
  1044. "italian_keywords",
  1045. "italian_stemmer"
  1046. ]
  1047. }
  1048. }
  1049. }
  1050. }
  1051. }
  1052. ----------------------------------------------------
  1053. // CONSOLE
  1054. <1> The default stopwords can be overridden with the `stopwords`
  1055. or `stopwords_path` parameters.
  1056. <2> This filter should be removed unless there are words which should
  1057. be excluded from stemming.
  1058. [[latvian-analyzer]]
  1059. ===== `latvian` analyzer
  1060. The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1061. [source,js]
  1062. ----------------------------------------------------
  1063. PUT /latvian_example
  1064. {
  1065. "settings": {
  1066. "analysis": {
  1067. "filter": {
  1068. "latvian_stop": {
  1069. "type": "stop",
  1070. "stopwords": "_latvian_" <1>
  1071. },
  1072. "latvian_keywords": {
  1073. "type": "keyword_marker",
  1074. "keywords": ["piemērs"] <2>
  1075. },
  1076. "latvian_stemmer": {
  1077. "type": "stemmer",
  1078. "language": "latvian"
  1079. }
  1080. },
  1081. "analyzer": {
  1082. "latvian": {
  1083. "tokenizer": "standard",
  1084. "filter": [
  1085. "lowercase",
  1086. "latvian_stop",
  1087. "latvian_keywords",
  1088. "latvian_stemmer"
  1089. ]
  1090. }
  1091. }
  1092. }
  1093. }
  1094. }
  1095. ----------------------------------------------------
  1096. // CONSOLE
  1097. <1> The default stopwords can be overridden with the `stopwords`
  1098. or `stopwords_path` parameters.
  1099. <2> This filter should be removed unless there are words which should
  1100. be excluded from stemming.
  1101. [[lithuanian-analyzer]]
  1102. ===== `lithuanian` analyzer
  1103. The `lithuanian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1104. [source,js]
  1105. ----------------------------------------------------
  1106. PUT /lithuanian_example
  1107. {
  1108. "settings": {
  1109. "analysis": {
  1110. "filter": {
  1111. "lithuanian_stop": {
  1112. "type": "stop",
  1113. "stopwords": "_lithuanian_" <1>
  1114. },
  1115. "lithuanian_keywords": {
  1116. "type": "keyword_marker",
  1117. "keywords": ["pavyzdys"] <2>
  1118. },
  1119. "lithuanian_stemmer": {
  1120. "type": "stemmer",
  1121. "language": "lithuanian"
  1122. }
  1123. },
  1124. "analyzer": {
  1125. "lithuanian": {
  1126. "tokenizer": "standard",
  1127. "filter": [
  1128. "lowercase",
  1129. "lithuanian_stop",
  1130. "lithuanian_keywords",
  1131. "lithuanian_stemmer"
  1132. ]
  1133. }
  1134. }
  1135. }
  1136. }
  1137. }
  1138. ----------------------------------------------------
  1139. // CONSOLE
  1140. <1> The default stopwords can be overridden with the `stopwords`
  1141. or `stopwords_path` parameters.
  1142. <2> This filter should be removed unless there are words which should
  1143. be excluded from stemming.
  1144. [[norwegian-analyzer]]
  1145. ===== `norwegian` analyzer
  1146. The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1147. [source,js]
  1148. ----------------------------------------------------
  1149. PUT /norwegian_example
  1150. {
  1151. "settings": {
  1152. "analysis": {
  1153. "filter": {
  1154. "norwegian_stop": {
  1155. "type": "stop",
  1156. "stopwords": "_norwegian_" <1>
  1157. },
  1158. "norwegian_keywords": {
  1159. "type": "keyword_marker",
  1160. "keywords": ["eksempel"] <2>
  1161. },
  1162. "norwegian_stemmer": {
  1163. "type": "stemmer",
  1164. "language": "norwegian"
  1165. }
  1166. },
  1167. "analyzer": {
  1168. "norwegian": {
  1169. "tokenizer": "standard",
  1170. "filter": [
  1171. "lowercase",
  1172. "norwegian_stop",
  1173. "norwegian_keywords",
  1174. "norwegian_stemmer"
  1175. ]
  1176. }
  1177. }
  1178. }
  1179. }
  1180. }
  1181. ----------------------------------------------------
  1182. // CONSOLE
  1183. <1> The default stopwords can be overridden with the `stopwords`
  1184. or `stopwords_path` parameters.
  1185. <2> This filter should be removed unless there are words which should
  1186. be excluded from stemming.
  1187. [[persian-analyzer]]
  1188. ===== `persian` analyzer
  1189. The `persian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1190. [source,js]
  1191. ----------------------------------------------------
  1192. PUT /persian_example
  1193. {
  1194. "settings": {
  1195. "analysis": {
  1196. "char_filter": {
  1197. "zero_width_spaces": {
  1198. "type": "mapping",
  1199. "mappings": [ "\\u200C=> "] <1>
  1200. }
  1201. },
  1202. "filter": {
  1203. "persian_stop": {
  1204. "type": "stop",
  1205. "stopwords": "_persian_" <2>
  1206. }
  1207. },
  1208. "analyzer": {
  1209. "persian": {
  1210. "tokenizer": "standard",
  1211. "char_filter": [ "zero_width_spaces" ],
  1212. "filter": [
  1213. "lowercase",
  1214. "arabic_normalization",
  1215. "persian_normalization",
  1216. "persian_stop"
  1217. ]
  1218. }
  1219. }
  1220. }
  1221. }
  1222. }
  1223. ----------------------------------------------------
  1224. // CONSOLE
  1225. <1> Replaces zero-width non-joiners with an ASCII space.
  1226. <2> The default stopwords can be overridden with the `stopwords`
  1227. or `stopwords_path` parameters.
  1228. [[portuguese-analyzer]]
  1229. ===== `portuguese` analyzer
  1230. The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follows:
  1231. [source,js]
  1232. ----------------------------------------------------
  1233. PUT /portuguese_example
  1234. {
  1235. "settings": {
  1236. "analysis": {
  1237. "filter": {
  1238. "portuguese_stop": {
  1239. "type": "stop",
  1240. "stopwords": "_portuguese_" <1>
  1241. },
  1242. "portuguese_keywords": {
  1243. "type": "keyword_marker",
  1244. "keywords": ["exemplo"] <2>
  1245. },
  1246. "portuguese_stemmer": {
  1247. "type": "stemmer",
  1248. "language": "light_portuguese"
  1249. }
  1250. },
  1251. "analyzer": {
  1252. "portuguese": {
  1253. "tokenizer": "standard",
  1254. "filter": [
  1255. "lowercase",
  1256. "portuguese_stop",
  1257. "portuguese_keywords",
  1258. "portuguese_stemmer"
  1259. ]
  1260. }
  1261. }
  1262. }
  1263. }
  1264. }
  1265. ----------------------------------------------------
  1266. // CONSOLE
  1267. <1> The default stopwords can be overridden with the `stopwords`
  1268. or `stopwords_path` parameters.
  1269. <2> This filter should be removed unless there are words which should
  1270. be excluded from stemming.
  1271. [[romanian-analyzer]]
  1272. ===== `romanian` analyzer
  1273. The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1274. [source,js]
  1275. ----------------------------------------------------
  1276. PUT /romanian_example
  1277. {
  1278. "settings": {
  1279. "analysis": {
  1280. "filter": {
  1281. "romanian_stop": {
  1282. "type": "stop",
  1283. "stopwords": "_romanian_" <1>
  1284. },
  1285. "romanian_keywords": {
  1286. "type": "keyword_marker",
  1287. "keywords": ["exemplu"] <2>
  1288. },
  1289. "romanian_stemmer": {
  1290. "type": "stemmer",
  1291. "language": "romanian"
  1292. }
  1293. },
  1294. "analyzer": {
  1295. "romanian": {
  1296. "tokenizer": "standard",
  1297. "filter": [
  1298. "lowercase",
  1299. "romanian_stop",
  1300. "romanian_keywords",
  1301. "romanian_stemmer"
  1302. ]
  1303. }
  1304. }
  1305. }
  1306. }
  1307. }
  1308. ----------------------------------------------------
  1309. // CONSOLE
  1310. <1> The default stopwords can be overridden with the `stopwords`
  1311. or `stopwords_path` parameters.
  1312. <2> This filter should be removed unless there are words which should
  1313. be excluded from stemming.
  1314. [[russian-analyzer]]
  1315. ===== `russian` analyzer
  1316. The `russian` analyzer could be reimplemented as a `custom` analyzer as follows:
  1317. [source,js]
  1318. ----------------------------------------------------
  1319. PUT /russian_example
  1320. {
  1321. "settings": {
  1322. "analysis": {
  1323. "filter": {
  1324. "russian_stop": {
  1325. "type": "stop",
  1326. "stopwords": "_russian_" <1>
  1327. },
  1328. "russian_keywords": {
  1329. "type": "keyword_marker",
  1330. "keywords": ["пример"] <2>
  1331. },
  1332. "russian_stemmer": {
  1333. "type": "stemmer",
  1334. "language": "russian"
  1335. }
  1336. },
  1337. "analyzer": {
  1338. "russian": {
  1339. "tokenizer": "standard",
  1340. "filter": [
  1341. "lowercase",
  1342. "russian_stop",
  1343. "russian_keywords",
  1344. "russian_stemmer"
  1345. ]
  1346. }
  1347. }
  1348. }
  1349. }
  1350. }
  1351. ----------------------------------------------------
  1352. // CONSOLE
  1353. <1> The default stopwords can be overridden with the `stopwords`
  1354. or `stopwords_path` parameters.
  1355. <2> This filter should be removed unless there are words which should
  1356. be excluded from stemming.
  1357. [[sorani-analyzer]]
  1358. ===== `sorani` analyzer
  1359. The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows:
  1360. [source,js]
  1361. ----------------------------------------------------
  1362. PUT /sorani_example
  1363. {
  1364. "settings": {
  1365. "analysis": {
  1366. "filter": {
  1367. "sorani_stop": {
  1368. "type": "stop",
  1369. "stopwords": "_sorani_" <1>
  1370. },
  1371. "sorani_keywords": {
  1372. "type": "keyword_marker",
  1373. "keywords": ["mînak"] <2>
  1374. },
  1375. "sorani_stemmer": {
  1376. "type": "stemmer",
  1377. "language": "sorani"
  1378. }
  1379. },
  1380. "analyzer": {
  1381. "sorani": {
  1382. "tokenizer": "standard",
  1383. "filter": [
  1384. "sorani_normalization",
  1385. "lowercase",
  1386. "sorani_stop",
  1387. "sorani_keywords",
  1388. "sorani_stemmer"
  1389. ]
  1390. }
  1391. }
  1392. }
  1393. }
  1394. }
  1395. ----------------------------------------------------
  1396. // CONSOLE
  1397. <1> The default stopwords can be overridden with the `stopwords`
  1398. or `stopwords_path` parameters.
  1399. <2> This filter should be removed unless there are words which should
  1400. be excluded from stemming.
  1401. [[spanish-analyzer]]
  1402. ===== `spanish` analyzer
  1403. The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1404. [source,js]
  1405. ----------------------------------------------------
  1406. PUT /spanish_example
  1407. {
  1408. "settings": {
  1409. "analysis": {
  1410. "filter": {
  1411. "spanish_stop": {
  1412. "type": "stop",
  1413. "stopwords": "_spanish_" <1>
  1414. },
  1415. "spanish_keywords": {
  1416. "type": "keyword_marker",
  1417. "keywords": ["ejemplo"] <2>
  1418. },
  1419. "spanish_stemmer": {
  1420. "type": "stemmer",
  1421. "language": "light_spanish"
  1422. }
  1423. },
  1424. "analyzer": {
  1425. "spanish": {
  1426. "tokenizer": "standard",
  1427. "filter": [
  1428. "lowercase",
  1429. "spanish_stop",
  1430. "spanish_keywords",
  1431. "spanish_stemmer"
  1432. ]
  1433. }
  1434. }
  1435. }
  1436. }
  1437. }
  1438. ----------------------------------------------------
  1439. // CONSOLE
  1440. <1> The default stopwords can be overridden with the `stopwords`
  1441. or `stopwords_path` parameters.
  1442. <2> This filter should be removed unless there are words which should
  1443. be excluded from stemming.
  1444. [[swedish-analyzer]]
  1445. ===== `swedish` analyzer
  1446. The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1447. [source,js]
  1448. ----------------------------------------------------
  1449. PUT /swedish_example
  1450. {
  1451. "settings": {
  1452. "analysis": {
  1453. "filter": {
  1454. "swedish_stop": {
  1455. "type": "stop",
  1456. "stopwords": "_swedish_" <1>
  1457. },
  1458. "swedish_keywords": {
  1459. "type": "keyword_marker",
  1460. "keywords": ["exempel"] <2>
  1461. },
  1462. "swedish_stemmer": {
  1463. "type": "stemmer",
  1464. "language": "swedish"
  1465. }
  1466. },
  1467. "analyzer": {
  1468. "swedish": {
  1469. "tokenizer": "standard",
  1470. "filter": [
  1471. "lowercase",
  1472. "swedish_stop",
  1473. "swedish_keywords",
  1474. "swedish_stemmer"
  1475. ]
  1476. }
  1477. }
  1478. }
  1479. }
  1480. }
  1481. ----------------------------------------------------
  1482. // CONSOLE
  1483. <1> The default stopwords can be overridden with the `stopwords`
  1484. or `stopwords_path` parameters.
  1485. <2> This filter should be removed unless there are words which should
  1486. be excluded from stemming.
  1487. [[turkish-analyzer]]
  1488. ===== `turkish` analyzer
  1489. The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows:
  1490. [source,js]
  1491. ----------------------------------------------------
  1492. PUT /turkish_example
  1493. {
  1494. "settings": {
  1495. "analysis": {
  1496. "filter": {
  1497. "turkish_stop": {
  1498. "type": "stop",
  1499. "stopwords": "_turkish_" <1>
  1500. },
  1501. "turkish_lowercase": {
  1502. "type": "lowercase",
  1503. "language": "turkish"
  1504. },
  1505. "turkish_keywords": {
  1506. "type": "keyword_marker",
  1507. "keywords": ["örnek"] <2>
  1508. },
  1509. "turkish_stemmer": {
  1510. "type": "stemmer",
  1511. "language": "turkish"
  1512. }
  1513. },
  1514. "analyzer": {
  1515. "turkish": {
  1516. "tokenizer": "standard",
  1517. "filter": [
  1518. "apostrophe",
  1519. "turkish_lowercase",
  1520. "turkish_stop",
  1521. "turkish_keywords",
  1522. "turkish_stemmer"
  1523. ]
  1524. }
  1525. }
  1526. }
  1527. }
  1528. }
  1529. ----------------------------------------------------
  1530. // CONSOLE
  1531. <1> The default stopwords can be overridden with the `stopwords`
  1532. or `stopwords_path` parameters.
  1533. <2> This filter should be removed unless there are words which should
  1534. be excluded from stemming.
  1535. [[thai-analyzer]]
  1536. ===== `thai` analyzer
  1537. The `thai` analyzer could be reimplemented as a `custom` analyzer as follows:
  1538. [source,js]
  1539. ----------------------------------------------------
  1540. PUT /thai_example
  1541. {
  1542. "settings": {
  1543. "analysis": {
  1544. "filter": {
  1545. "thai_stop": {
  1546. "type": "stop",
  1547. "stopwords": "_thai_" <1>
  1548. }
  1549. },
  1550. "analyzer": {
  1551. "thai": {
  1552. "tokenizer": "thai",
  1553. "filter": [
  1554. "lowercase",
  1555. "thai_stop"
  1556. ]
  1557. }
  1558. }
  1559. }
  1560. }
  1561. }
  1562. ----------------------------------------------------
  1563. // CONSOLE
  1564. <1> The default stopwords can be overridden with the `stopwords`
  1565. or `stopwords_path` parameters.