simple-analyzer.asciidoc 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. [[analysis-simple-analyzer]]
  2. === Simple Analyzer
  3. The `simple` analyzer breaks text into terms at any non-letter character, such
  4. as numbers, spaces, hyphens and apostrophes, discards non-letter characters,
  5. and changes uppercase to lowercase.
  6. ==== Example
  7. [source,console]
  8. ---------------------------
  9. POST _analyze
  10. {
  11. "analyzer": "simple",
  12. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  13. }
  14. ---------------------------
  15. /////////////////////
  16. [source,console-result]
  17. ----------------------------
  18. {
  19. "tokens": [
  20. {
  21. "token": "the",
  22. "start_offset": 0,
  23. "end_offset": 3,
  24. "type": "word",
  25. "position": 0
  26. },
  27. {
  28. "token": "quick",
  29. "start_offset": 6,
  30. "end_offset": 11,
  31. "type": "word",
  32. "position": 1
  33. },
  34. {
  35. "token": "brown",
  36. "start_offset": 12,
  37. "end_offset": 17,
  38. "type": "word",
  39. "position": 2
  40. },
  41. {
  42. "token": "foxes",
  43. "start_offset": 18,
  44. "end_offset": 23,
  45. "type": "word",
  46. "position": 3
  47. },
  48. {
  49. "token": "jumped",
  50. "start_offset": 24,
  51. "end_offset": 30,
  52. "type": "word",
  53. "position": 4
  54. },
  55. {
  56. "token": "over",
  57. "start_offset": 31,
  58. "end_offset": 35,
  59. "type": "word",
  60. "position": 5
  61. },
  62. {
  63. "token": "the",
  64. "start_offset": 36,
  65. "end_offset": 39,
  66. "type": "word",
  67. "position": 6
  68. },
  69. {
  70. "token": "lazy",
  71. "start_offset": 40,
  72. "end_offset": 44,
  73. "type": "word",
  74. "position": 7
  75. },
  76. {
  77. "token": "dog",
  78. "start_offset": 45,
  79. "end_offset": 48,
  80. "type": "word",
  81. "position": 8
  82. },
  83. {
  84. "token": "s",
  85. "start_offset": 49,
  86. "end_offset": 50,
  87. "type": "word",
  88. "position": 9
  89. },
  90. {
  91. "token": "bone",
  92. "start_offset": 51,
  93. "end_offset": 55,
  94. "type": "word",
  95. "position": 10
  96. }
  97. ]
  98. }
  99. ----------------------------
  100. /////////////////////
  101. The `simple` analyzer parses the sentence and produces the following
  102. terms:
  103. [source,text]
  104. ---------------------------
  105. [ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ]
  106. ---------------------------
  107. ==== Configure parameters
  108. The `simple` analyzer does not contain configurable parameters.
  109. ==== Customize
  110. The `simple` analyzer is defined by one tokenizer:
  111. Tokenizer::
  112. * <<analysis-lowercase-tokenizer,Lower Case Tokenizer>>
  113. To customize the `simple` analyzer, duplicate it to create the basis for
  114. a `custom` analyzer. The new analyzer can be modified as required, usually by
  115. adding token filters.
  116. ===== Example
  117. [source,console]
  118. ----------------------------------------------------
  119. PUT /simple_example
  120. {
  121. "settings": {
  122. "analysis": {
  123. "analyzer": {
  124. "rebuilt_simple": {
  125. "tokenizer": "lowercase",
  126. "filter": [ <1>
  127. ]
  128. }
  129. }
  130. }
  131. }
  132. }
  133. ----------------------------------------------------
  134. // TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: simple_example, first: simple, second: rebuilt_simple}\nendyaml\n/]
  135. <1> Add token filters here.