configuring.asciidoc 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. [[configuring-analyzers]]
  2. === Configuring built-in analyzers
  3. The built-in analyzers can be used directly without any configuration. Some
  4. of them, however, support configuration options to alter their behaviour. For
  5. instance, the <<analysis-standard-analyzer,`standard` analyzer>> can be configured
  6. to support a list of stop words:
  7. [source,console]
  8. --------------------------------
  9. PUT my-index-000001
  10. {
  11. "settings": {
  12. "analysis": {
  13. "analyzer": {
  14. "std_english": { <1>
  15. "type": "standard",
  16. "stopwords": "_english_"
  17. }
  18. }
  19. }
  20. },
  21. "mappings": {
  22. "properties": {
  23. "my_text": {
  24. "type": "text",
  25. "analyzer": "standard", <2>
  26. "fields": {
  27. "english": {
  28. "type": "text",
  29. "analyzer": "std_english" <3>
  30. }
  31. }
  32. }
  33. }
  34. }
  35. }
  36. POST my-index-000001/_analyze
  37. {
  38. "field": "my_text", <2>
  39. "text": "The old brown cow"
  40. }
  41. POST my-index-000001/_analyze
  42. {
  43. "field": "my_text.english", <3>
  44. "text": "The old brown cow"
  45. }
  46. --------------------------------
  47. <1> We define the `std_english` analyzer to be based on the `standard`
  48. analyzer, but configured to remove the pre-defined list of English stopwords.
  49. <2> The `my_text` field uses the `standard` analyzer directly, without
  50. any configuration. No stop words will be removed from this field.
  51. The resulting terms are: `[ the, old, brown, cow ]`
  52. <3> The `my_text.english` field uses the `std_english` analyzer, so
  53. English stop words will be removed. The resulting terms are:
  54. `[ old, brown, cow ]`
  55. /////////////////////
  56. [source,console-result]
  57. ----------------------------
  58. {
  59. "tokens": [
  60. {
  61. "token": "old",
  62. "start_offset": 4,
  63. "end_offset": 7,
  64. "type": "<ALPHANUM>",
  65. "position": 1
  66. },
  67. {
  68. "token": "brown",
  69. "start_offset": 8,
  70. "end_offset": 13,
  71. "type": "<ALPHANUM>",
  72. "position": 2
  73. },
  74. {
  75. "token": "cow",
  76. "start_offset": 14,
  77. "end_offset": 17,
  78. "type": "<ALPHANUM>",
  79. "position": 3
  80. }
  81. ]
  82. }
  83. ----------------------------
  84. /////////////////////