simple-analyzer.asciidoc 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. [[analysis-simple-analyzer]]
  2. === Simple Analyzer
  3. The `simple` analyzer breaks text into terms at any non-letter character, such
  4. as numbers, spaces, hyphens and apostrophes, discards non-letter characters,
  5. and changes uppercase to lowercase.
  6. ==== Example
  7. [source,js]
  8. ---------------------------
  9. POST _analyze
  10. {
  11. "analyzer": "simple",
  12. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  13. }
  14. ---------------------------
  15. // CONSOLE
  16. /////////////////////
  17. [source,console-result]
  18. ----------------------------
  19. {
  20. "tokens": [
  21. {
  22. "token": "the",
  23. "start_offset": 0,
  24. "end_offset": 3,
  25. "type": "word",
  26. "position": 0
  27. },
  28. {
  29. "token": "quick",
  30. "start_offset": 6,
  31. "end_offset": 11,
  32. "type": "word",
  33. "position": 1
  34. },
  35. {
  36. "token": "brown",
  37. "start_offset": 12,
  38. "end_offset": 17,
  39. "type": "word",
  40. "position": 2
  41. },
  42. {
  43. "token": "foxes",
  44. "start_offset": 18,
  45. "end_offset": 23,
  46. "type": "word",
  47. "position": 3
  48. },
  49. {
  50. "token": "jumped",
  51. "start_offset": 24,
  52. "end_offset": 30,
  53. "type": "word",
  54. "position": 4
  55. },
  56. {
  57. "token": "over",
  58. "start_offset": 31,
  59. "end_offset": 35,
  60. "type": "word",
  61. "position": 5
  62. },
  63. {
  64. "token": "the",
  65. "start_offset": 36,
  66. "end_offset": 39,
  67. "type": "word",
  68. "position": 6
  69. },
  70. {
  71. "token": "lazy",
  72. "start_offset": 40,
  73. "end_offset": 44,
  74. "type": "word",
  75. "position": 7
  76. },
  77. {
  78. "token": "dog",
  79. "start_offset": 45,
  80. "end_offset": 48,
  81. "type": "word",
  82. "position": 8
  83. },
  84. {
  85. "token": "s",
  86. "start_offset": 49,
  87. "end_offset": 50,
  88. "type": "word",
  89. "position": 9
  90. },
  91. {
  92. "token": "bone",
  93. "start_offset": 51,
  94. "end_offset": 55,
  95. "type": "word",
  96. "position": 10
  97. }
  98. ]
  99. }
  100. ----------------------------
  101. /////////////////////
  102. The `simple` analyzer parses the sentence and produces the following
  103. terms:
  104. [source,text]
  105. ---------------------------
  106. [ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ]
  107. ---------------------------
  108. ==== Configure parameters
  109. The `simple` analyzer does not contain configurable parameters.
  110. ==== Customize
  111. The `simple` analyzer is defined by one tokenizer:
  112. Tokenizer::
  113. * <<analysis-lowercase-tokenizer,Lower Case Tokenizer>>
  114. To customize the `simple` analyzer, duplicate it to create the basis for
  115. a `custom` analyzer. The new analyzer can be modified as required, usually by
  116. adding token filters.
  117. ===== Example
  118. [source,js]
  119. ----------------------------------------------------
  120. PUT /simple_example
  121. {
  122. "settings": {
  123. "analysis": {
  124. "analyzer": {
  125. "rebuilt_simple": {
  126. "tokenizer": "lowercase",
  127. "filter": [ <1>
  128. ]
  129. }
  130. }
  131. }
  132. }
  133. }
  134. ----------------------------------------------------
  135. // CONSOLE
  136. // TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: simple_example, first: simple, second: rebuilt_simple}\nendyaml\n/]
  137. <1> Add token filters here.