stop-analyzer.asciidoc 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. [[analysis-stop-analyzer]]
  2. === Stop Analyzer
  3. The `stop` analyzer is the same as the <<analysis-simple-analyzer,`simple` analyzer>>
  4. but adds support for removing stop words. It defaults to using the
  5. `_english_` stop words.
  6. [float]
  7. === Definition
  8. It consists of:
  9. Tokenizer::
  10. * <<analysis-lowercase-tokenizer,Lower Case Tokenizer>>
  11. Token filters::
  12. * <<analysis-stop-tokenfilter,Stop Token Filter>>
  13. [float]
  14. === Example output
  15. [source,js]
  16. ---------------------------
  17. POST _analyze
  18. {
  19. "analyzer": "stop",
  20. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  21. }
  22. ---------------------------
  23. // CONSOLE
  24. /////////////////////
  25. [source,js]
  26. ----------------------------
  27. {
  28. "tokens": [
  29. {
  30. "token": "quick",
  31. "start_offset": 6,
  32. "end_offset": 11,
  33. "type": "word",
  34. "position": 1
  35. },
  36. {
  37. "token": "brown",
  38. "start_offset": 12,
  39. "end_offset": 17,
  40. "type": "word",
  41. "position": 2
  42. },
  43. {
  44. "token": "foxes",
  45. "start_offset": 18,
  46. "end_offset": 23,
  47. "type": "word",
  48. "position": 3
  49. },
  50. {
  51. "token": "jumped",
  52. "start_offset": 24,
  53. "end_offset": 30,
  54. "type": "word",
  55. "position": 4
  56. },
  57. {
  58. "token": "over",
  59. "start_offset": 31,
  60. "end_offset": 35,
  61. "type": "word",
  62. "position": 5
  63. },
  64. {
  65. "token": "lazy",
  66. "start_offset": 40,
  67. "end_offset": 44,
  68. "type": "word",
  69. "position": 7
  70. },
  71. {
  72. "token": "dog",
  73. "start_offset": 45,
  74. "end_offset": 48,
  75. "type": "word",
  76. "position": 8
  77. },
  78. {
  79. "token": "s",
  80. "start_offset": 49,
  81. "end_offset": 50,
  82. "type": "word",
  83. "position": 9
  84. },
  85. {
  86. "token": "bone",
  87. "start_offset": 51,
  88. "end_offset": 55,
  89. "type": "word",
  90. "position": 10
  91. }
  92. ]
  93. }
  94. ----------------------------
  95. // TESTRESPONSE
  96. /////////////////////
  97. The above sentence would produce the following terms:
  98. [source,text]
  99. ---------------------------
  100. [ quick, brown, foxes, jumped, over, lazy, dog, s, bone ]
  101. ---------------------------
  102. [float]
  103. === Configuration
  104. The `stop` analyzer accepts the following parameters:
  105. [horizontal]
  106. `stopwords`::
  107. A pre-defined stop words list like `_english_` or an array containing a
  108. list of stop words. Defaults to `_english_`.
  109. `stopwords_path`::
  110. The path to a file containing stop words.
  111. See the <<analysis-stop-tokenfilter,Stop Token Filter>> for more information
  112. about stop word configuration.
  113. [float]
  114. === Example configuration
  115. In this example, we configure the `stop` analyzer to use a specified list of
  116. words as stop words:
  117. [source,js]
  118. ----------------------------
  119. PUT my_index
  120. {
  121. "settings": {
  122. "analysis": {
  123. "analyzer": {
  124. "my_stop_analyzer": {
  125. "type": "stop",
  126. "stopwords": ["the", "over"]
  127. }
  128. }
  129. }
  130. }
  131. }
  132. GET _cluster/health?wait_for_status=yellow
  133. POST my_index/_analyze
  134. {
  135. "analyzer": "my_stop_analyzer",
  136. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  137. }
  138. ----------------------------
  139. // CONSOLE
  140. /////////////////////
  141. [source,js]
  142. ----------------------------
  143. {
  144. "tokens": [
  145. {
  146. "token": "quick",
  147. "start_offset": 6,
  148. "end_offset": 11,
  149. "type": "word",
  150. "position": 1
  151. },
  152. {
  153. "token": "brown",
  154. "start_offset": 12,
  155. "end_offset": 17,
  156. "type": "word",
  157. "position": 2
  158. },
  159. {
  160. "token": "foxes",
  161. "start_offset": 18,
  162. "end_offset": 23,
  163. "type": "word",
  164. "position": 3
  165. },
  166. {
  167. "token": "jumped",
  168. "start_offset": 24,
  169. "end_offset": 30,
  170. "type": "word",
  171. "position": 4
  172. },
  173. {
  174. "token": "lazy",
  175. "start_offset": 40,
  176. "end_offset": 44,
  177. "type": "word",
  178. "position": 7
  179. },
  180. {
  181. "token": "dog",
  182. "start_offset": 45,
  183. "end_offset": 48,
  184. "type": "word",
  185. "position": 8
  186. },
  187. {
  188. "token": "s",
  189. "start_offset": 49,
  190. "end_offset": 50,
  191. "type": "word",
  192. "position": 9
  193. },
  194. {
  195. "token": "bone",
  196. "start_offset": 51,
  197. "end_offset": 55,
  198. "type": "word",
  199. "position": 10
  200. }
  201. ]
  202. }
  203. ----------------------------
  204. // TESTRESPONSE
  205. /////////////////////
  206. The above example produces the following terms:
  207. [source,text]
  208. ---------------------------
  209. [ quick, brown, foxes, jumped, lazy, dog, s, bone ]
  210. ---------------------------