normalizers.asciidoc 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. [[analysis-normalizers]]
  2. == Normalizers
  3. Normalizers are similar to analyzers except that they may only emit a single
  4. token. As a consequence, they do not have a tokenizer and only accept a subset
  5. of the available char filters and token filters. Only the filters that work on
  6. a per-character basis are allowed. For instance a lowercasing filter would be
  7. allowed, but not a stemming filter, which needs to look at the keyword as a
  8. whole. The current list of filters that can be used in a normalizer is
  9. following: `arabic_normalization`, `asciifolding`, `bengali_normalization`,
  10. `cjk_width`, `decimal_digit`, `elision`, `german_normalization`,
  11. `hindi_normalization`, `indic_normalization`, `lowercase`,
  12. `persian_normalization`, `scandinavian_folding`, `serbian_normalization`,
  13. `sorani_normalization`, `uppercase`.
  14. [float]
  15. === Custom normalizers
  16. Elasticsearch does not ship with built-in normalizers so far, so the only way
  17. to get one is by building a custom one. Custom normalizers take a list of char
  18. <<analysis-charfilters, character filters>> and a list of
  19. <<analysis-tokenfilters,token filters>>.
  20. [source,js]
  21. --------------------------------
  22. PUT index
  23. {
  24. "settings": {
  25. "analysis": {
  26. "char_filter": {
  27. "quote": {
  28. "type": "mapping",
  29. "mappings": [
  30. "« => \"",
  31. "» => \""
  32. ]
  33. }
  34. },
  35. "normalizer": {
  36. "my_normalizer": {
  37. "type": "custom",
  38. "char_filter": ["quote"],
  39. "filter": ["lowercase", "asciifolding"]
  40. }
  41. }
  42. }
  43. },
  44. "mappings": {
  45. "_doc": {
  46. "properties": {
  47. "foo": {
  48. "type": "keyword",
  49. "normalizer": "my_normalizer"
  50. }
  51. }
  52. }
  53. }
  54. }
  55. --------------------------------
  56. // CONSOLE