configuring.asciidoc 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. [[configuring-analyzers]]
  2. === Configuring built-in analyzers
  3. The built-in analyzers can be used directly without any configuration. Some
  4. of them, however, support configuration options to alter their behaviour. For
  5. instance, the <<analysis-standard-analyzer,`standard` analyzer>> can be configured
  6. to support a list of stop words:
  7. [source,js]
  8. --------------------------------
  9. PUT my_index
  10. {
  11. "settings": {
  12. "analysis": {
  13. "analyzer": {
  14. "std_english": { <1>
  15. "type": "standard",
  16. "stopwords": "_english_"
  17. }
  18. }
  19. }
  20. },
  21. "mappings": {
  22. "properties": {
  23. "my_text": {
  24. "type": "text",
  25. "analyzer": "standard", <2>
  26. "fields": {
  27. "english": {
  28. "type": "text",
  29. "analyzer": "std_english" <3>
  30. }
  31. }
  32. }
  33. }
  34. }
  35. }
  36. POST my_index/_analyze
  37. {
  38. "field": "my_text", <2>
  39. "text": "The old brown cow"
  40. }
  41. POST my_index/_analyze
  42. {
  43. "field": "my_text.english", <3>
  44. "text": "The old brown cow"
  45. }
  46. --------------------------------
  47. // CONSOLE
  48. <1> We define the `std_english` analyzer to be based on the `standard`
  49. analyzer, but configured to remove the pre-defined list of English stopwords.
  50. <2> The `my_text` field uses the `standard` analyzer directly, without
  51. any configuration. No stop words will be removed from this field.
  52. The resulting terms are: `[ the, old, brown, cow ]`
  53. <3> The `my_text.english` field uses the `std_english` analyzer, so
  54. English stop words will be removed. The resulting terms are:
  55. `[ old, brown, cow ]`
  56. /////////////////////
  57. [source,console-result]
  58. ----------------------------
  59. {
  60. "tokens": [
  61. {
  62. "token": "old",
  63. "start_offset": 4,
  64. "end_offset": 7,
  65. "type": "<ALPHANUM>",
  66. "position": 1
  67. },
  68. {
  69. "token": "brown",
  70. "start_offset": 8,
  71. "end_offset": 13,
  72. "type": "<ALPHANUM>",
  73. "position": 2
  74. },
  75. {
  76. "token": "cow",
  77. "start_offset": 14,
  78. "end_offset": 17,
  79. "type": "<ALPHANUM>",
  80. "position": 3
  81. }
  82. ]
  83. }
  84. ----------------------------
  85. /////////////////////