normalizer.asciidoc 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. [[normalizer]]
  2. === `normalizer`
  3. The `normalizer` property of <<keyword,`keyword`>> fields is similar to
  4. <<analyzer,`analyzer`>> except that it guarantees that the analysis chain
  5. produces a single token.
  6. The `normalizer` is applied prior to indexing the keyword, as well as at
  7. search-time when the `keyword` field is searched via a query parser such as
  8. the <<query-dsl-match-query,`match`>> query or via a term level query
  9. such as the <<query-dsl-term-query,`term`>> query.
  10. [source,js]
  11. --------------------------------
  12. PUT index
  13. {
  14. "settings": {
  15. "analysis": {
  16. "normalizer": {
  17. "my_normalizer": {
  18. "type": "custom",
  19. "char_filter": [],
  20. "filter": ["lowercase", "asciifolding"]
  21. }
  22. }
  23. }
  24. },
  25. "mappings": {
  26. "_doc": {
  27. "properties": {
  28. "foo": {
  29. "type": "keyword",
  30. "normalizer": "my_normalizer"
  31. }
  32. }
  33. }
  34. }
  35. }
  36. PUT index/_doc/1
  37. {
  38. "foo": "BÀR"
  39. }
  40. PUT index/_doc/2
  41. {
  42. "foo": "bar"
  43. }
  44. PUT index/_doc/3
  45. {
  46. "foo": "baz"
  47. }
  48. POST index/_refresh
  49. GET index/_search
  50. {
  51. "query": {
  52. "term": {
  53. "foo": "BAR"
  54. }
  55. }
  56. }
  57. GET index/_search
  58. {
  59. "query": {
  60. "match": {
  61. "foo": "BAR"
  62. }
  63. }
  64. }
  65. --------------------------------
  66. // CONSOLE
  67. The above queries match documents 1 and 2 since `BÀR` is converted to `bar` at
  68. both index and query time.
  69. [source,js]
  70. ----------------------------
  71. {
  72. "took": $body.took,
  73. "timed_out": false,
  74. "_shards": {
  75. "total": 1,
  76. "successful": 1,
  77. "skipped" : 0,
  78. "failed": 0
  79. },
  80. "hits": {
  81. "total" : {
  82. "value": 2,
  83. "relation": "eq"
  84. },
  85. "max_score": 0.47000363,
  86. "hits": [
  87. {
  88. "_index": "index",
  89. "_type": "_doc",
  90. "_id": "1",
  91. "_score": 0.47000363,
  92. "_source": {
  93. "foo": "BÀR"
  94. }
  95. },
  96. {
  97. "_index": "index",
  98. "_type": "_doc",
  99. "_id": "2",
  100. "_score": 0.47000363,
  101. "_source": {
  102. "foo": "bar"
  103. }
  104. }
  105. ]
  106. }
  107. }
  108. ----------------------------
  109. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]
  110. Also, the fact that keywords are converted prior to indexing also means that
  111. aggregations return normalized values:
  112. [source,js]
  113. ----------------------------
  114. GET index/_search
  115. {
  116. "size": 0,
  117. "aggs": {
  118. "foo_terms": {
  119. "terms": {
  120. "field": "foo"
  121. }
  122. }
  123. }
  124. }
  125. ----------------------------
  126. // CONSOLE
  127. // TEST[continued]
  128. returns
  129. [source,js]
  130. ----------------------------
  131. {
  132. "took": 43,
  133. "timed_out": false,
  134. "_shards": {
  135. "total": 1,
  136. "successful": 1,
  137. "skipped" : 0,
  138. "failed": 0
  139. },
  140. "hits": {
  141. "total" : {
  142. "value": 3,
  143. "relation": "eq"
  144. },
  145. "max_score": null,
  146. "hits": []
  147. },
  148. "aggregations": {
  149. "foo_terms": {
  150. "doc_count_error_upper_bound": 0,
  151. "sum_other_doc_count": 0,
  152. "buckets": [
  153. {
  154. "key": "bar",
  155. "doc_count": 2
  156. },
  157. {
  158. "key": "baz",
  159. "doc_count": 1
  160. }
  161. ]
  162. }
  163. }
  164. }
  165. ----------------------------
  166. // TESTRESPONSE[s/"took".*/"took": "$body.took",/]