normalizer.asciidoc 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. [[normalizer]]
  2. === `normalizer`
  3. The `normalizer` property of <<keyword,`keyword`>> fields is similar to
  4. <<analyzer,`analyzer`>> except that it guarantees that the analysis chain
  5. produces a single token.
  6. The `normalizer` is applied prior to indexing the keyword, as well as at
  7. search-time when the `keyword` field is searched via a query parser such as
  8. the <<query-dsl-match-query,`match`>> query or via a term level query
  9. such as the <<query-dsl-term-query,`term`>> query.
  10. [source,js]
  11. --------------------------------
  12. PUT index
  13. {
  14. "settings": {
  15. "analysis": {
  16. "normalizer": {
  17. "my_normalizer": {
  18. "type": "custom",
  19. "char_filter": [],
  20. "filter": ["lowercase", "asciifolding"]
  21. }
  22. }
  23. }
  24. },
  25. "mappings": {
  26. "properties": {
  27. "foo": {
  28. "type": "keyword",
  29. "normalizer": "my_normalizer"
  30. }
  31. }
  32. }
  33. }
  34. PUT index/_doc/1
  35. {
  36. "foo": "BÀR"
  37. }
  38. PUT index/_doc/2
  39. {
  40. "foo": "bar"
  41. }
  42. PUT index/_doc/3
  43. {
  44. "foo": "baz"
  45. }
  46. POST index/_refresh
  47. GET index/_search
  48. {
  49. "query": {
  50. "term": {
  51. "foo": "BAR"
  52. }
  53. }
  54. }
  55. GET index/_search
  56. {
  57. "query": {
  58. "match": {
  59. "foo": "BAR"
  60. }
  61. }
  62. }
  63. --------------------------------
  64. // CONSOLE
  65. The above queries match documents 1 and 2 since `BÀR` is converted to `bar` at
  66. both index and query time.
  67. [source,js]
  68. ----------------------------
  69. {
  70. "took": $body.took,
  71. "timed_out": false,
  72. "_shards": {
  73. "total": 1,
  74. "successful": 1,
  75. "skipped" : 0,
  76. "failed": 0
  77. },
  78. "hits": {
  79. "total" : {
  80. "value": 2,
  81. "relation": "eq"
  82. },
  83. "max_score": 0.47000363,
  84. "hits": [
  85. {
  86. "_index": "index",
  87. "_type": "_doc",
  88. "_id": "1",
  89. "_score": 0.47000363,
  90. "_source": {
  91. "foo": "BÀR"
  92. }
  93. },
  94. {
  95. "_index": "index",
  96. "_type": "_doc",
  97. "_id": "2",
  98. "_score": 0.47000363,
  99. "_source": {
  100. "foo": "bar"
  101. }
  102. }
  103. ]
  104. }
  105. }
  106. ----------------------------
  107. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]
  108. Also, the fact that keywords are converted prior to indexing also means that
  109. aggregations return normalized values:
  110. [source,js]
  111. ----------------------------
  112. GET index/_search
  113. {
  114. "size": 0,
  115. "aggs": {
  116. "foo_terms": {
  117. "terms": {
  118. "field": "foo"
  119. }
  120. }
  121. }
  122. }
  123. ----------------------------
  124. // CONSOLE
  125. // TEST[continued]
  126. returns
  127. [source,js]
  128. ----------------------------
  129. {
  130. "took": 43,
  131. "timed_out": false,
  132. "_shards": {
  133. "total": 1,
  134. "successful": 1,
  135. "skipped" : 0,
  136. "failed": 0
  137. },
  138. "hits": {
  139. "total" : {
  140. "value": 3,
  141. "relation": "eq"
  142. },
  143. "max_score": null,
  144. "hits": []
  145. },
  146. "aggregations": {
  147. "foo_terms": {
  148. "doc_count_error_upper_bound": 0,
  149. "sum_other_doc_count": 0,
  150. "buckets": [
  151. {
  152. "key": "bar",
  153. "doc_count": 2
  154. },
  155. {
  156. "key": "baz",
  157. "doc_count": 1
  158. }
  159. ]
  160. }
  161. }
  162. }
  163. ----------------------------
  164. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]