normalizer.asciidoc 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. [[normalizer]]
  2. === `normalizer`
  3. The `normalizer` property of <<keyword,`keyword`>> fields is similar to
  4. <<analyzer,`analyzer`>> except that it guarantees that the analysis chain
  5. produces a single token.
  6. The `normalizer` is applied prior to indexing the keyword, as well as at
  7. search-time when the `keyword` field is searched via a query parser such as
  8. the <<query-dsl-match-query,`match`>> query.
  9. [source,js]
  10. --------------------------------
  11. PUT index
  12. {
  13. "settings": {
  14. "analysis": {
  15. "normalizer": {
  16. "my_normalizer": {
  17. "type": "custom",
  18. "char_filter": [],
  19. "filter": ["lowercase", "asciifolding"]
  20. }
  21. }
  22. }
  23. },
  24. "mappings": {
  25. "type": {
  26. "properties": {
  27. "foo": {
  28. "type": "keyword",
  29. "normalizer": "my_normalizer"
  30. }
  31. }
  32. }
  33. }
  34. }
  35. PUT index/type/1
  36. {
  37. "foo": "BÀR"
  38. }
  39. PUT index/type/2
  40. {
  41. "foo": "bar"
  42. }
  43. PUT index/type/3
  44. {
  45. "foo": "baz"
  46. }
  47. POST index/_refresh
  48. GET index/_search
  49. {
  50. "query": {
  51. "match": {
  52. "foo": "BAR"
  53. }
  54. }
  55. }
  56. --------------------------------
  57. // CONSOLE
  58. The above query matches documents 1 and 2 since `BÀR` is converted to `bar` at
  59. both index and query time.
  60. [source,js]
  61. ----------------------------
  62. {
  63. "took": $body.took,
  64. "timed_out": false,
  65. "_shards": {
  66. "total": 5,
  67. "successful": 5,
  68. "skipped" : 0,
  69. "failed": 0
  70. },
  71. "hits": {
  72. "total": 2,
  73. "max_score": 0.2876821,
  74. "hits": [
  75. {
  76. "_index": "index",
  77. "_type": "type",
  78. "_id": "2",
  79. "_score": 0.2876821,
  80. "_source": {
  81. "foo": "bar"
  82. }
  83. },
  84. {
  85. "_index": "index",
  86. "_type": "type",
  87. "_id": "1",
  88. "_score": 0.2876821,
  89. "_source": {
  90. "foo": "BÀR"
  91. }
  92. }
  93. ]
  94. }
  95. }
  96. ----------------------------
  97. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]
  98. Also, the fact that keywords are converted prior to indexing also means that
  99. aggregations return normalized values:
  100. [source,js]
  101. ----------------------------
  102. GET index/_search
  103. {
  104. "size": 0,
  105. "aggs": {
  106. "foo_terms": {
  107. "terms": {
  108. "field": "foo"
  109. }
  110. }
  111. }
  112. }
  113. ----------------------------
  114. // CONSOLE
  115. // TEST[continued]
  116. returns
  117. [source,js]
  118. ----------------------------
  119. {
  120. "took": 43,
  121. "timed_out": false,
  122. "_shards": {
  123. "total": 5,
  124. "successful": 5,
  125. "skipped" : 0,
  126. "failed": 0
  127. },
  128. "hits": {
  129. "total": 3,
  130. "max_score": 0.0,
  131. "hits": []
  132. },
  133. "aggregations": {
  134. "foo_terms": {
  135. "doc_count_error_upper_bound": 0,
  136. "sum_other_doc_count": 0,
  137. "buckets": [
  138. {
  139. "key": "bar",
  140. "doc_count": 2
  141. },
  142. {
  143. "key": "baz",
  144. "doc_count": 1
  145. }
  146. ]
  147. }
  148. }
  149. }
  150. ----------------------------
  151. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]