multi-terms-aggregation.asciidoc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[search-aggregations-bucket-multi-terms-aggregation]]
  4. === Multi Terms aggregation
  5. ++++
  6. <titleabbrev>Multi Terms</titleabbrev>
  7. ++++
  8. A multi-bucket value source based aggregation where buckets are dynamically built - one per unique set of values. The multi terms
  9. aggregation is very similar to the <<search-aggregations-bucket-terms-aggregation-order,`terms aggregation`>>, however in most cases
  10. it will be slower than the terms aggregation and will consume more memory. Therefore, if the same set of fields is constantly used,
  11. it would be more efficient to index a combined key for this fields as a separate field and use the terms aggregation on this field.
  12. //////////////////////////
  13. [source,js]
  14. --------------------------------------------------
  15. PUT /products
  16. {
  17. "mappings": {
  18. "properties": {
  19. "genre": {
  20. "type": "keyword"
  21. },
  22. "product": {
  23. "type": "keyword"
  24. },
  25. "quantity": {
  26. "type": "integer"
  27. }
  28. }
  29. }
  30. }
  31. POST /products/_bulk?refresh
  32. {"index":{"_id":0}}
  33. {"genre": "rock", "product": "Product A", "quantity": 4}
  34. {"index":{"_id":1}}
  35. {"genre": "rock", "product": "Product A", "quantity": 5}
  36. {"index":{"_id":2}}
  37. {"genre": "rock", "product": "Product B", "quantity": 1}
  38. {"index":{"_id":3}}
  39. {"genre": "jazz", "product": "Product B", "quantity": 10}
  40. {"index":{"_id":4}}
  41. {"genre": "electronic", "product": "Product B", "quantity": 3}
  42. {"index":{"_id":5}}
  43. {"genre": "electronic"}
  44. -------------------------------------------------
  45. // NOTCONSOLE
  46. // TESTSETUP
  47. //////////////////////////
  48. Example:
  49. [source,console,id=multi-terms-aggregation-example]
  50. --------------------------------------------------
  51. GET /products/_search
  52. {
  53. "aggs": {
  54. "genres_and_products": {
  55. "multi_terms": {
  56. "terms": [{
  57. "field": "genre" <1>
  58. }, {
  59. "field": "product"
  60. }]
  61. }
  62. }
  63. }
  64. }
  65. --------------------------------------------------
  66. // TEST[s/_search/_search\?filter_path=aggregations/]
  67. <1> `multi_terms` aggregation can work with the same field types as a
  68. <<search-aggregations-bucket-terms-aggregation-order,`terms aggregation`>> and supports most of the terms aggregation parameters.
  69. Response:
  70. [source,console-result]
  71. --------------------------------------------------
  72. {
  73. ...
  74. "aggregations" : {
  75. "genres_and_products" : {
  76. "doc_count_error_upper_bound" : 0, <1>
  77. "sum_other_doc_count" : 0, <2>
  78. "buckets" : [ <3>
  79. {
  80. "key" : [ <4>
  81. "rock",
  82. "Product A"
  83. ],
  84. "key_as_string" : "rock|Product A",
  85. "doc_count" : 2
  86. },
  87. {
  88. "key" : [
  89. "electronic",
  90. "Product B"
  91. ],
  92. "key_as_string" : "electronic|Product B",
  93. "doc_count" : 1
  94. },
  95. {
  96. "key" : [
  97. "jazz",
  98. "Product B"
  99. ],
  100. "key_as_string" : "jazz|Product B",
  101. "doc_count" : 1
  102. },
  103. {
  104. "key" : [
  105. "rock",
  106. "Product B"
  107. ],
  108. "key_as_string" : "rock|Product B",
  109. "doc_count" : 1
  110. }
  111. ]
  112. }
  113. }
  114. }
  115. --------------------------------------------------
  116. // TESTRESPONSE[s/\.\.\.//]
  117. <1> an upper bound of the error on the document counts for each term, see <<search-aggregations-bucket-multi-terms-aggregation-approximate-counts,below>
  118. <2> when there are lots of unique terms, Elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response
  119. <3> the list of the top buckets.
  120. <4> the keys are arrays of values ordered the same ways as expression in the `terms` parameter of the aggregation
  121. By default, the `multi_terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can
  122. change this default behaviour by setting the `size` parameter.
  123. [[search-aggregations-bucket-multi-terms-aggregation-parameters]]
  124. ==== Aggregation Parameters
  125. The following parameters are supported. See <<search-aggregations-bucket-terms-aggregation-order,`terms aggregation`>> for more detailed
  126. explanation of these parameters.
  127. [horizontal]
  128. size:: Optional. Defines how many term buckets should be returned out of the overall terms list. Defaults to 10.
  129. shard_size:: Optional. The higher the requested `size` is, the more accurate the results will be, but also, the more
  130. expensive it will be to compute the final results. The default `shard_size` is `(size * 1.5 + 10)`.
  131. show_term_doc_count_error:: Optional. Calculates the doc count error on per term basis. Defaults to `false`
  132. order:: Optional. Specifies the order of the buckets. Defaults to the number of documents per bucket. The bucket terms
  133. value is used as a tiebreaker for buckets with the same document count.
  134. min_doc_count:: Optional. The minimal number of documents in a bucket for it to be returned. Defaults to 1.
  135. shard_min_doc_count:: Optional. The minimal number of documents in a bucket on each shard for it to be returned. Defaults to
  136. `min_doc_count`.
  137. collect_mode:: Optional. Specifies the strategy for data collection. The `depth_first` or `breadth_first` modes are
  138. supported. Defaults to `breadth_first`.
  139. [[search-aggregations-bucket-multi-terms-aggregation-script]]
  140. ==== Script
  141. Generating the terms using a script:
  142. [source,console,id=multi-terms-aggregation-script-example]
  143. --------------------------------------------------
  144. GET /products/_search
  145. {
  146. "aggs": {
  147. "genres_and_products": {
  148. "multi_terms": {
  149. "terms": [
  150. {
  151. "script": {
  152. "source": "doc['genre'].value.length()",
  153. "lang": "painless"
  154. }
  155. },
  156. {
  157. "field": "product"
  158. }
  159. ]
  160. }
  161. }
  162. }
  163. }
  164. --------------------------------------------------
  165. // TEST[s/_search/_search\?filter_path=aggregations/]
  166. Response:
  167. [source,console-result]
  168. --------------------------------------------------
  169. {
  170. ...
  171. "aggregations" : {
  172. "genres_and_products" : {
  173. "doc_count_error_upper_bound" : 0,
  174. "sum_other_doc_count" : 0,
  175. "buckets" : [
  176. {
  177. "key" : [
  178. "4",
  179. "Product A"
  180. ],
  181. "key_as_string" : "4|Product A",
  182. "doc_count" : 2
  183. },
  184. {
  185. "key" : [
  186. "4",
  187. "Product B"
  188. ],
  189. "key_as_string" : "4|Product B",
  190. "doc_count" : 2
  191. },
  192. {
  193. "key" : [
  194. "10",
  195. "Product B"
  196. ],
  197. "key_as_string" : "10|Product B",
  198. "doc_count" : 1
  199. }
  200. ]
  201. }
  202. }
  203. }
  204. --------------------------------------------------
  205. // TESTRESPONSE[s/\.\.\.//]
  206. ==== Missing value
  207. The `missing` parameter defines how documents that are missing a value should be treated.
  208. By default if any of the key components are missing the entire document will be ignored
  209. but it is also possible to treat them as if they had a value by using the `missing` parameter.
  210. [source,console,id=multi-terms-aggregation-missing-example]
  211. --------------------------------------------------
  212. GET /products/_search
  213. {
  214. "aggs": {
  215. "genres_and_products": {
  216. "multi_terms": {
  217. "terms": [
  218. {
  219. "field": "genre"
  220. },
  221. {
  222. "field": "product",
  223. "missing": "Product Z"
  224. }
  225. ]
  226. }
  227. }
  228. }
  229. }
  230. --------------------------------------------------
  231. // TEST[s/_search/_search\?filter_path=aggregations/]
  232. Response:
  233. [source,console-result]
  234. --------------------------------------------------
  235. {
  236. ...
  237. "aggregations" : {
  238. "genres_and_products" : {
  239. "doc_count_error_upper_bound" : 0,
  240. "sum_other_doc_count" : 0,
  241. "buckets" : [
  242. {
  243. "key" : [
  244. "rock",
  245. "Product A"
  246. ],
  247. "key_as_string" : "rock|Product A",
  248. "doc_count" : 2
  249. },
  250. {
  251. "key" : [
  252. "electronic",
  253. "Product B"
  254. ],
  255. "key_as_string" : "electronic|Product B",
  256. "doc_count" : 1
  257. },
  258. {
  259. "key" : [
  260. "electronic",
  261. "Product Z"
  262. ],
  263. "key_as_string" : "electronic|Product Z", <1>
  264. "doc_count" : 1
  265. },
  266. {
  267. "key" : [
  268. "jazz",
  269. "Product B"
  270. ],
  271. "key_as_string" : "jazz|Product B",
  272. "doc_count" : 1
  273. },
  274. {
  275. "key" : [
  276. "rock",
  277. "Product B"
  278. ],
  279. "key_as_string" : "rock|Product B",
  280. "doc_count" : 1
  281. }
  282. ]
  283. }
  284. }
  285. }
  286. --------------------------------------------------
  287. // TESTRESPONSE[s/\.\.\.//]
  288. <1> Documents without a value in the `product` field will fall into the same bucket as documents that have the value `Product Z`.
  289. ==== Mixing field types
  290. WARNING: When aggregating on multiple indices the type of the aggregated field may not be the same in all indices.
  291. Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix
  292. of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers.
  293. This can result in a loss of precision in the bucket values.
  294. ==== Sub aggregation and sorting examples
  295. As most bucket aggregations the `multi_term` supports sub aggregations and ordering the buckets by metrics sub-aggregation:
  296. [source,console,id=multi-terms-aggregation-subaggregation-example]
  297. --------------------------------------------------
  298. GET /products/_search
  299. {
  300. "aggs": {
  301. "genres_and_products": {
  302. "multi_terms": {
  303. "terms": [
  304. {
  305. "field": "genre"
  306. },
  307. {
  308. "field": "product"
  309. }
  310. ],
  311. "order": {
  312. "total_quantity": "desc"
  313. }
  314. },
  315. "aggs": {
  316. "total_quantity": {
  317. "sum": {
  318. "field": "quantity"
  319. }
  320. }
  321. }
  322. }
  323. }
  324. }
  325. --------------------------------------------------
  326. // TEST[s/_search/_search\?filter_path=aggregations/]
  327. [source,console-result]
  328. --------------------------------------------------
  329. {
  330. ...
  331. "aggregations" : {
  332. "genres_and_products" : {
  333. "doc_count_error_upper_bound" : 0,
  334. "sum_other_doc_count" : 0,
  335. "buckets" : [
  336. {
  337. "key" : [
  338. "jazz",
  339. "Product B"
  340. ],
  341. "key_as_string" : "jazz|Product B",
  342. "doc_count" : 1,
  343. "total_quantity" : {
  344. "value" : 10.0
  345. }
  346. },
  347. {
  348. "key" : [
  349. "rock",
  350. "Product A"
  351. ],
  352. "key_as_string" : "rock|Product A",
  353. "doc_count" : 2,
  354. "total_quantity" : {
  355. "value" : 9.0
  356. }
  357. },
  358. {
  359. "key" : [
  360. "electronic",
  361. "Product B"
  362. ],
  363. "key_as_string" : "electronic|Product B",
  364. "doc_count" : 1,
  365. "total_quantity" : {
  366. "value" : 3.0
  367. }
  368. },
  369. {
  370. "key" : [
  371. "rock",
  372. "Product B"
  373. ],
  374. "key_as_string" : "rock|Product B",
  375. "doc_count" : 1,
  376. "total_quantity" : {
  377. "value" : 1.0
  378. }
  379. }
  380. ]
  381. }
  382. }
  383. }
  384. --------------------------------------------------
  385. // TESTRESPONSE[s/\.\.\.//]