bucket-correlation-aggregation.asciidoc 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. [role="xpack"]
  2. [[search-aggregations-bucket-correlation-aggregation]]
  3. === Bucket correlation aggregation
  4. ++++
  5. <titleabbrev>Bucket correlation</titleabbrev>
  6. ++++
  7. experimental::[]
  8. A sibling pipeline aggregation which executes a correlation function on the
  9. configured sibling multi-bucket aggregation.
  10. [[bucket-correlation-agg-syntax]]
  11. ==== Parameters
  12. `buckets_path`::
  13. (Required, string)
  14. Path to the buckets that contain one set of values to correlate.
  15. For syntax, see <<buckets-path-syntax>>.
  16. `function`::
  17. (Required, object)
  18. The correlation function to execute.
  19. +
  20. .Properties of `function`
  21. [%collapsible%open]
  22. ====
  23. `count_correlation`:::
  24. (Required^*^, object)
  25. The configuration to calculate a count correlation. This function is designed for
  26. determining the correlation of a term value and a given metric. Consequently, it
  27. needs to meet the following requirements.
  28. +
  29. --
  30. * The `buckets_path` must point to a `_count` metric.
  31. * The total count of all the `bucket_path` count values must be less than or equal to `indicator.doc_count`.
  32. * When utilizing this function, an initial calculation to gather the required `indicator` values is required.
  33. --
  34. +
  35. .Properties of `count_correlation`
  36. [%collapsible%open]
  37. =====
  38. `indicator`:::
  39. (Required, object)
  40. The indicator with which to correlate the configured `bucket_path` values.
  41. +
  42. .Properties of `indicator`
  43. [%collapsible%open]
  44. ======
  45. `doc_count`:::
  46. (Required, integer)
  47. The total number of documents that initially created the `expectations`. It's required to be greater than or equal to the sum
  48. of all values in the `buckets_path` as this is the originating superset of data to which the term values are correlated.
  49. `expectations`:::
  50. (Required, array)
  51. An array of numbers with which to correlate the configured `bucket_path` values. The length of this value must always equal
  52. the number of buckets returned by the `bucket_path`.
  53. `fractions`:::
  54. (Optional, array)
  55. An array of fractions to use when averaging and calculating variance. This should be used if the pre-calculated data and the
  56. `buckets_path` have known gaps. The length of `fractions`, if provided, must equal `expectations`.
  57. ======
  58. =====
  59. ====
  60. ==== Syntax
  61. A `bucket_correlation` aggregation looks like this in isolation:
  62. [source,js]
  63. --------------------------------------------------
  64. {
  65. "bucket_correlation": {
  66. "buckets_path": "range_values>_count", <1>
  67. "function": {
  68. "count_correlation": { <2>
  69. "indicator": {
  70. "expectations": [...],
  71. "doc_count": 10000
  72. }
  73. }
  74. }
  75. }
  76. }
  77. --------------------------------------------------
  78. // NOTCONSOLE
  79. <1> The buckets containing the values to correlate against.
  80. <2> The correlation function definition.
  81. [[bucket-correlation-agg-example]]
  82. ==== Example
  83. The following snippet correlates the individual terms in the field `version` with the `latency` metric. Not shown
  84. is the pre-calculation of the `latency` indicator values, which was done utilizing the
  85. <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation.
  86. This example is only using the 10s percentiles.
  87. [source,console]
  88. -------------------------------------------------
  89. POST correlate_latency/_search?size=0&filter_path=aggregations
  90. {
  91. "aggs": {
  92. "buckets": {
  93. "terms": { <1>
  94. "field": "version",
  95. "size": 2
  96. },
  97. "aggs": {
  98. "latency_ranges": {
  99. "range": { <2>
  100. "field": "latency",
  101. "ranges": [
  102. { "to": 0.0 },
  103. { "from": 0, "to": 105 },
  104. { "from": 105, "to": 225 },
  105. { "from": 225, "to": 445 },
  106. { "from": 445, "to": 665 },
  107. { "from": 665, "to": 885 },
  108. { "from": 885, "to": 1115 },
  109. { "from": 1115, "to": 1335 },
  110. { "from": 1335, "to": 1555 },
  111. { "from": 1555, "to": 1775 },
  112. { "from": 1775 }
  113. ]
  114. }
  115. },
  116. "bucket_correlation": { <3>
  117. "bucket_correlation": {
  118. "buckets_path": "latency_ranges>_count",
  119. "function": {
  120. "count_correlation": {
  121. "indicator": {
  122. "expectations": [0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775],
  123. "doc_count": 200
  124. }
  125. }
  126. }
  127. }
  128. }
  129. }
  130. }
  131. }
  132. }
  133. -------------------------------------------------
  134. // TEST[setup:correlate_latency]
  135. <1> The term buckets containing a range aggregation and the bucket correlation aggregation. Both are utilized to calculate
  136. the correlation of the term values with the latency.
  137. <2> The range aggregation on the latency field. The ranges were created referencing the percentiles of the latency field.
  138. <3> The bucket correlation aggregation that calculates the correlation of the number of term values within each range
  139. and the previously calculated indicator values.
  140. And the following may be the response:
  141. [source,console-result]
  142. ----
  143. {
  144. "aggregations" : {
  145. "buckets" : {
  146. "doc_count_error_upper_bound" : 0,
  147. "sum_other_doc_count" : 0,
  148. "buckets" : [
  149. {
  150. "key" : "1.0",
  151. "doc_count" : 100,
  152. "latency_ranges" : {
  153. "buckets" : [
  154. {
  155. "key" : "*-0.0",
  156. "to" : 0.0,
  157. "doc_count" : 0
  158. },
  159. {
  160. "key" : "0.0-105.0",
  161. "from" : 0.0,
  162. "to" : 105.0,
  163. "doc_count" : 1
  164. },
  165. {
  166. "key" : "105.0-225.0",
  167. "from" : 105.0,
  168. "to" : 225.0,
  169. "doc_count" : 9
  170. },
  171. {
  172. "key" : "225.0-445.0",
  173. "from" : 225.0,
  174. "to" : 445.0,
  175. "doc_count" : 0
  176. },
  177. {
  178. "key" : "445.0-665.0",
  179. "from" : 445.0,
  180. "to" : 665.0,
  181. "doc_count" : 0
  182. },
  183. {
  184. "key" : "665.0-885.0",
  185. "from" : 665.0,
  186. "to" : 885.0,
  187. "doc_count" : 0
  188. },
  189. {
  190. "key" : "885.0-1115.0",
  191. "from" : 885.0,
  192. "to" : 1115.0,
  193. "doc_count" : 10
  194. },
  195. {
  196. "key" : "1115.0-1335.0",
  197. "from" : 1115.0,
  198. "to" : 1335.0,
  199. "doc_count" : 20
  200. },
  201. {
  202. "key" : "1335.0-1555.0",
  203. "from" : 1335.0,
  204. "to" : 1555.0,
  205. "doc_count" : 20
  206. },
  207. {
  208. "key" : "1555.0-1775.0",
  209. "from" : 1555.0,
  210. "to" : 1775.0,
  211. "doc_count" : 20
  212. },
  213. {
  214. "key" : "1775.0-*",
  215. "from" : 1775.0,
  216. "doc_count" : 20
  217. }
  218. ]
  219. },
  220. "bucket_correlation" : {
  221. "value" : 0.8402398981360937
  222. }
  223. },
  224. {
  225. "key" : "2.0",
  226. "doc_count" : 100,
  227. "latency_ranges" : {
  228. "buckets" : [
  229. {
  230. "key" : "*-0.0",
  231. "to" : 0.0,
  232. "doc_count" : 0
  233. },
  234. {
  235. "key" : "0.0-105.0",
  236. "from" : 0.0,
  237. "to" : 105.0,
  238. "doc_count" : 19
  239. },
  240. {
  241. "key" : "105.0-225.0",
  242. "from" : 105.0,
  243. "to" : 225.0,
  244. "doc_count" : 11
  245. },
  246. {
  247. "key" : "225.0-445.0",
  248. "from" : 225.0,
  249. "to" : 445.0,
  250. "doc_count" : 20
  251. },
  252. {
  253. "key" : "445.0-665.0",
  254. "from" : 445.0,
  255. "to" : 665.0,
  256. "doc_count" : 20
  257. },
  258. {
  259. "key" : "665.0-885.0",
  260. "from" : 665.0,
  261. "to" : 885.0,
  262. "doc_count" : 20
  263. },
  264. {
  265. "key" : "885.0-1115.0",
  266. "from" : 885.0,
  267. "to" : 1115.0,
  268. "doc_count" : 10
  269. },
  270. {
  271. "key" : "1115.0-1335.0",
  272. "from" : 1115.0,
  273. "to" : 1335.0,
  274. "doc_count" : 0
  275. },
  276. {
  277. "key" : "1335.0-1555.0",
  278. "from" : 1335.0,
  279. "to" : 1555.0,
  280. "doc_count" : 0
  281. },
  282. {
  283. "key" : "1555.0-1775.0",
  284. "from" : 1555.0,
  285. "to" : 1775.0,
  286. "doc_count" : 0
  287. },
  288. {
  289. "key" : "1775.0-*",
  290. "from" : 1775.0,
  291. "doc_count" : 0
  292. }
  293. ]
  294. },
  295. "bucket_correlation" : {
  296. "value" : -0.5759855613334943
  297. }
  298. }
  299. ]
  300. }
  301. }
  302. }
  303. ----