aggs.asciidoc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[sql-functions-aggs]]
  4. === Aggregate Functions
  5. Functions for computing a _single_ result from a set of input values.
  6. {es-sql} supports aggregate functions only alongside <<sql-syntax-group-by,grouping>> (implicit or explicit).
  7. [[sql-functions-aggs-general]]
  8. [float]
  9. === General Purpose
  10. [[sql-functions-aggs-avg]]
  11. ==== `AVG`
  12. .Synopsis:
  13. [source, sql]
  14. --------------------------------------------------
  15. AVG(numeric_field) <1>
  16. --------------------------------------------------
  17. *Input*:
  18. <1> numeric field
  19. *Output*: `double` numeric value
  20. *Description*: Returns the https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (arithmetic mean) of input values.
  21. ["source","sql",subs="attributes,macros"]
  22. --------------------------------------------------
  23. include-tagged::{sql-specs}/docs/docs.csv-spec[aggAvg]
  24. --------------------------------------------------
  25. [[sql-functions-aggs-count]]
  26. ==== `COUNT`
  27. .Synopsis:
  28. [source, sql]
  29. --------------------------------------------------
  30. COUNT(expression) <1>
  31. --------------------------------------------------
  32. *Input*:
  33. <1> a field name, wildcard (`*`) or any numeric value
  34. *Output*: numeric value
  35. *Description*: Returns the total number (count) of input values.
  36. In case of `COUNT(*)` or `COUNT(<literal>)`, _all_ values are considered (including `null` or missing ones).
  37. In case of `COUNT(<field_name>)` `null` values are not considered.
  38. ["source","sql",subs="attributes,macros"]
  39. --------------------------------------------------
  40. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountStar]
  41. --------------------------------------------------
  42. [[sql-functions-aggs-count-all]]
  43. ==== `COUNT(ALL)`
  44. .Synopsis:
  45. [source, sql]
  46. --------------------------------------------------
  47. COUNT(ALL field_name) <1>
  48. --------------------------------------------------
  49. *Input*:
  50. <1> a field name
  51. *Output*: numeric value
  52. *Description*: Returns the total number (count) of all _non-null_ input values. `COUNT(<field_name>)` and `COUNT(ALL <field_name>)` are equivalent.
  53. ["source","sql",subs="attributes,macros"]
  54. --------------------------------------------------
  55. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountAll]
  56. --------------------------------------------------
  57. [[sql-functions-aggs-count-distinct]]
  58. ==== `COUNT(DISTINCT)`
  59. .Synopsis:
  60. [source, sql]
  61. --------------------------------------------------
  62. COUNT(DISTINCT field_name) <1>
  63. --------------------------------------------------
  64. *Input*:
  65. <1> a field name
  66. *Output*: numeric value
  67. *Description*: Returns the total number of _distinct non-null_ values in input values.
  68. ["source","sql",subs="attributes,macros"]
  69. --------------------------------------------------
  70. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountDistinct]
  71. --------------------------------------------------
  72. [[sql-functions-aggs-first]]
  73. ==== `FIRST/FIRST_VALUE`
  74. .Synopsis:
  75. [source, sql]
  76. ----------------------------------------------
  77. FIRST(
  78. field_name <1>
  79. [, ordering_field_name]) <2>
  80. ----------------------------------------------
  81. *Input*:
  82. <1> target field for the aggregation
  83. <2> optional field used for ordering
  84. *Output*: same type as the input
  85. *Description*: Returns the first **non-NULL** value (if such exists) of the `field_name` input column sorted by
  86. the `ordering_field_name` column. If `ordering_field_name` is not provided, only the `field_name`
  87. column is used for the sorting. E.g.:
  88. [cols="<,<"]
  89. |===
  90. s| a | b
  91. | 100 | 1
  92. | 200 | 1
  93. | 1 | 2
  94. | 2 | 2
  95. | 10 | null
  96. | 20 | null
  97. | null | null
  98. |===
  99. [source, sql]
  100. ----------------------
  101. SELECT FIRST(a) FROM t
  102. ----------------------
  103. will result in:
  104. [cols="<"]
  105. |===
  106. s| FIRST(a)
  107. | 1
  108. |===
  109. and
  110. [source, sql]
  111. -------------------------
  112. SELECT FIRST(a, b) FROM t
  113. -------------------------
  114. will result in:
  115. [cols="<"]
  116. |===
  117. s| FIRST(a, b)
  118. | 100
  119. |===
  120. ["source","sql",subs="attributes,macros"]
  121. -----------------------------------------------------------
  122. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArg]
  123. -----------------------------------------------------------
  124. ["source","sql",subs="attributes,macros"]
  125. --------------------------------------------------------------------
  126. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArgAndGroupBy]
  127. --------------------------------------------------------------------
  128. ["source","sql",subs="attributes,macros"]
  129. -----------------------------------------------------------
  130. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgs]
  131. -----------------------------------------------------------
  132. ["source","sql",subs="attributes,macros"]
  133. ---------------------------------------------------------------------
  134. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgsAndGroupBy]
  135. ---------------------------------------------------------------------
  136. `FIRST_VALUE` is a name alias and can be used instead of `FIRST`, e.g.:
  137. ["source","sql",subs="attributes,macros"]
  138. --------------------------------------------------------------------------
  139. include-tagged::{sql-specs}/docs/docs.csv-spec[firstValueWithTwoArgsAndGroupBy]
  140. --------------------------------------------------------------------------
  141. [NOTE]
  142. `FIRST` cannot be used in a HAVING clause.
  143. [NOTE]
  144. `FIRST` cannot be used with columns of type <<text, `text`>> unless
  145. the field is also <<before-enabling-fielddata,saved as a keyword>>.
  146. [[sql-functions-aggs-last]]
  147. ==== `LAST/LAST_VALUE`
  148. .Synopsis:
  149. [source, sql]
  150. --------------------------------------------------
  151. LAST(
  152. field_name <1>
  153. [, ordering_field_name]) <2>
  154. --------------------------------------------------
  155. *Input*:
  156. <1> target field for the aggregation
  157. <2> optional field used for ordering
  158. *Output*: same type as the input
  159. *Description*: It's the inverse of <<sql-functions-aggs-first>>. Returns the last **non-NULL** value (if such exists) of the
  160. `field_name` input column sorted descending by the `ordering_field_name` column. If `ordering_field_name` is not
  161. provided, only the `field_name` column is used for the sorting. E.g.:
  162. [cols="<,<"]
  163. |===
  164. s| a | b
  165. | 10 | 1
  166. | 20 | 1
  167. | 1 | 2
  168. | 2 | 2
  169. | 100 | null
  170. | 200 | null
  171. | null | null
  172. |===
  173. [source, sql]
  174. ------------------------
  175. SELECT LAST(a) FROM t
  176. ------------------------
  177. will result in:
  178. [cols="<"]
  179. |===
  180. s| LAST(a)
  181. | 200
  182. |===
  183. and
  184. [source, sql]
  185. ------------------------
  186. SELECT LAST(a, b) FROM t
  187. ------------------------
  188. will result in:
  189. [cols="<"]
  190. |===
  191. s| LAST(a, b)
  192. | 2
  193. |===
  194. ["source","sql",subs="attributes,macros"]
  195. -----------------------------------------------------------
  196. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArg]
  197. -----------------------------------------------------------
  198. ["source","sql",subs="attributes,macros"]
  199. -------------------------------------------------------------------
  200. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArgAndGroupBy]
  201. -------------------------------------------------------------------
  202. ["source","sql",subs="attributes,macros"]
  203. -----------------------------------------------------------
  204. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgs]
  205. -----------------------------------------------------------
  206. ["source","sql",subs="attributes,macros"]
  207. --------------------------------------------------------------------
  208. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgsAndGroupBy]
  209. --------------------------------------------------------------------
  210. `LAST_VALUE` is a name alias and can be used instead of `LAST`, e.g.:
  211. ["source","sql",subs="attributes,macros"]
  212. -------------------------------------------------------------------------
  213. include-tagged::{sql-specs}/docs/docs.csv-spec[lastValueWithTwoArgsAndGroupBy]
  214. -------------------------------------------------------------------------
  215. [NOTE]
  216. `LAST` cannot be used in `HAVING` clause.
  217. [NOTE]
  218. `LAST` cannot be used with columns of type <<text, `text`>> unless
  219. the field is also <<before-enabling-fielddata,`saved as a keyword`>>.
  220. [[sql-functions-aggs-max]]
  221. ==== `MAX`
  222. .Synopsis:
  223. [source, sql]
  224. --------------------------------------------------
  225. MAX(field_name) <1>
  226. --------------------------------------------------
  227. *Input*:
  228. <1> a numeric field
  229. *Output*: same type as the input
  230. *Description*: Returns the maximum value across input values in the field `field_name`.
  231. ["source","sql",subs="attributes,macros"]
  232. --------------------------------------------------
  233. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMax]
  234. --------------------------------------------------
  235. [NOTE]
  236. `MAX` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  237. <<sql-functions-aggs-last>> and therefore, it cannot be used in `HAVING` clause.
  238. [[sql-functions-aggs-min]]
  239. ==== `MIN`
  240. .Synopsis:
  241. [source, sql]
  242. --------------------------------------------------
  243. MIN(field_name) <1>
  244. --------------------------------------------------
  245. *Input*:
  246. <1> a numeric field
  247. *Output*: same type as the input
  248. *Description*: Returns the minimum value across input values in the field `field_name`.
  249. ["source","sql",subs="attributes,macros"]
  250. --------------------------------------------------
  251. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMin]
  252. --------------------------------------------------
  253. [NOTE]
  254. `MIN` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  255. <<sql-functions-aggs-first>> and therefore, it cannot be used in `HAVING` clause.
  256. [[sql-functions-aggs-sum]]
  257. ==== `SUM`
  258. .Synopsis:
  259. [source, sql]
  260. --------------------------------------------------
  261. SUM(field_name) <1>
  262. --------------------------------------------------
  263. *Input*:
  264. <1> a numeric field
  265. *Output*: `bigint` for integer input, `double` for floating points
  266. *Description*: Returns the sum of input values in the field `field_name`.
  267. ["source","sql",subs="attributes,macros"]
  268. --------------------------------------------------
  269. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSum]
  270. --------------------------------------------------
  271. [[sql-functions-aggs-statistics]]
  272. [float]
  273. === Statistics
  274. [[sql-functions-aggs-kurtosis]]
  275. ==== `KURTOSIS`
  276. .Synopsis:
  277. [source, sql]
  278. --------------------------------------------------
  279. KURTOSIS(field_name) <1>
  280. --------------------------------------------------
  281. *Input*:
  282. <1> a numeric field
  283. *Output*: `double` numeric value
  284. *Description*:
  285. https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution of input values in the field `field_name`.
  286. ["source","sql",subs="attributes,macros"]
  287. --------------------------------------------------
  288. include-tagged::{sql-specs}/docs/docs.csv-spec[aggKurtosis]
  289. --------------------------------------------------
  290. [[sql-functions-aggs-mad]]
  291. ==== `MAD`
  292. .Synopsis:
  293. [source, sql]
  294. --------------------------------------------------
  295. MAD(field_name) <1>
  296. --------------------------------------------------
  297. *Input*:
  298. <1> a numeric field
  299. *Output*: `double` numeric value
  300. *Description*:
  301. https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
  302. ["source","sql",subs="attributes,macros"]
  303. --------------------------------------------------
  304. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMad]
  305. --------------------------------------------------
  306. [[sql-functions-aggs-percentile]]
  307. ==== `PERCENTILE`
  308. .Synopsis:
  309. [source, sql]
  310. --------------------------------------------------
  311. PERCENTILE(
  312. field_name, <1>
  313. numeric_exp) <2>
  314. --------------------------------------------------
  315. *Input*:
  316. <1> a numeric field
  317. <2> a numeric expression (must be a constant and not based on a field)
  318. *Output*: `double` numeric value
  319. *Description*:
  320. Returns the nth https://en.wikipedia.org/wiki/Percentile[percentile] (represented by `numeric_exp` parameter)
  321. of input values in the field `field_name`.
  322. ["source","sql",subs="attributes,macros"]
  323. --------------------------------------------------
  324. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentile]
  325. --------------------------------------------------
  326. [[sql-functions-aggs-percentile-rank]]
  327. ==== `PERCENTILE_RANK`
  328. .Synopsis:
  329. [source, sql]
  330. --------------------------------------------------
  331. PERCENTILE_RANK(
  332. field_name, <1>
  333. numeric_exp) <2>
  334. --------------------------------------------------
  335. *Input*:
  336. <1> a numeric field
  337. <2> a numeric expression (must be a constant and not based on a field)
  338. *Output*: `double` numeric value
  339. *Description*:
  340. Returns the nth https://en.wikipedia.org/wiki/Percentile_rank[percentile rank] (represented by `numeric_exp` parameter)
  341. of input values in the field `field_name`.
  342. ["source","sql",subs="attributes,macros"]
  343. --------------------------------------------------
  344. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRank]
  345. --------------------------------------------------
  346. [[sql-functions-aggs-skewness]]
  347. ==== `SKEWNESS`
  348. .Synopsis:
  349. [source, sql]
  350. --------------------------------------------------
  351. SKEWNESS(field_name) <1>
  352. --------------------------------------------------
  353. *Input*:
  354. <1> a numeric field
  355. *Output*: `double` numeric value
  356. *Description*:
  357. https://en.wikipedia.org/wiki/Skewness[Quantify] the asymmetric distribution of input values in the field `field_name`.
  358. ["source","sql",subs="attributes,macros"]
  359. --------------------------------------------------
  360. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSkewness]
  361. --------------------------------------------------
  362. [[sql-functions-aggs-stddev-pop]]
  363. ==== `STDDEV_POP`
  364. .Synopsis:
  365. [source, sql]
  366. --------------------------------------------------
  367. STDDEV_POP(field_name) <1>
  368. --------------------------------------------------
  369. *Input*:
  370. <1> a numeric field
  371. *Output*: `double` numeric value
  372. *Description*:
  373. Returns the https://en.wikipedia.org/wiki/Standard_deviations[population standard deviation] of input values in the field `field_name`.
  374. ["source","sql",subs="attributes,macros"]
  375. --------------------------------------------------
  376. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevPop]
  377. --------------------------------------------------
  378. [[sql-functions-aggs-sum-squares]]
  379. ==== `SUM_OF_SQUARES`
  380. .Synopsis:
  381. [source, sql]
  382. --------------------------------------------------
  383. SUM_OF_SQUARES(field_name) <1>
  384. --------------------------------------------------
  385. *Input*:
  386. <1> a numeric field
  387. *Output*: `double` numeric value
  388. *Description*:
  389. Returns the https://en.wikipedia.org/wiki/Total_sum_of_squares[sum of squares] of input values in the field `field_name`.
  390. ["source","sql",subs="attributes,macros"]
  391. --------------------------------------------------
  392. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumOfSquares]
  393. --------------------------------------------------
  394. [[sql-functions-aggs-var-pop]]
  395. ==== `VAR_POP`
  396. .Synopsis:
  397. [source, sql]
  398. --------------------------------------------------
  399. VAR_POP(field_name) <1>
  400. --------------------------------------------------
  401. *Input*:
  402. <1> a numeric field
  403. *Output*: `double` numeric value
  404. *Description*:
  405. Returns the https://en.wikipedia.org/wiki/Variance[population variance] of input values in the field `field_name`.
  406. ["source","sql",subs="attributes,macros"]
  407. --------------------------------------------------
  408. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarPop]
  409. --------------------------------------------------