aggs.asciidoc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[sql-functions-aggs]]
  4. === Aggregate Functions
  5. Functions for computing a _single_ result from a set of input values.
  6. {es-sql} supports aggregate functions only alongside <<sql-syntax-group-by,grouping>> (implicit or explicit).
  7. [[sql-functions-aggs-general]]
  8. [float]
  9. === General Purpose
  10. [[sql-functions-aggs-avg]]
  11. ==== `AVG`
  12. .Synopsis:
  13. [source, sql]
  14. --------------------------------------------------
  15. AVG(numeric_field<1>)
  16. --------------------------------------------------
  17. *Input*:
  18. <1> numeric field
  19. *Output*: `double` numeric value
  20. .Description:
  21. Returns the https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (arithmetic mean) of input values.
  22. ["source","sql",subs="attributes,macros"]
  23. --------------------------------------------------
  24. include-tagged::{sql-specs}/docs/docs.csv-spec[aggAvg]
  25. --------------------------------------------------
  26. [[sql-functions-aggs-count]]
  27. ==== `COUNT`
  28. .Synopsis:
  29. [source, sql]
  30. --------------------------------------------------
  31. COUNT(expression<1>)
  32. --------------------------------------------------
  33. *Input*:
  34. <1> a field name, wildcard (`*`) or any numeric value
  35. *Output*: numeric value
  36. .Description:
  37. Returns the total number (count) of input values.
  38. In case of `COUNT(*)` or `COUNT(<literal>)`, _all_ values are considered (including `null` or missing ones).
  39. In case of `COUNT(<field_name>)` `null` values are not considered.
  40. ["source","sql",subs="attributes,macros"]
  41. --------------------------------------------------
  42. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountStar]
  43. --------------------------------------------------
  44. [[sql-functions-aggs-count-all]]
  45. ==== `COUNT(ALL)`
  46. .Synopsis:
  47. [source, sql]
  48. --------------------------------------------------
  49. COUNT(ALL field_name<1>)
  50. --------------------------------------------------
  51. *Input*:
  52. <1> a field name
  53. *Output*: numeric value
  54. .Description:
  55. Returns the total number (count) of all _non-null_ input values. `COUNT(<field_name>)` and `COUNT(ALL <field_name>)` are equivalent.
  56. ["source","sql",subs="attributes,macros"]
  57. --------------------------------------------------
  58. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountAll]
  59. --------------------------------------------------
  60. [[sql-functions-aggs-count-distinct]]
  61. ==== `COUNT(DISTINCT)`
  62. .Synopsis:
  63. [source, sql]
  64. --------------------------------------------------
  65. COUNT(DISTINCT field_name<1>)
  66. --------------------------------------------------
  67. *Input*:
  68. <1> a field name
  69. *Output*: numeric value
  70. .Description:
  71. Returns the total number of _distinct non-null_ values in input values.
  72. ["source","sql",subs="attributes,macros"]
  73. --------------------------------------------------
  74. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountDistinct]
  75. --------------------------------------------------
  76. [[sql-functions-aggs-first]]
  77. ==== `FIRST/FIRST_VALUE`
  78. .Synopsis:
  79. [source, sql]
  80. ----------------------------------------------
  81. FIRST(field_name<1>[, ordering_field_name]<2>)
  82. ----------------------------------------------
  83. *Input*:
  84. <1> target field for the aggregation
  85. <2> optional field used for ordering
  86. *Output*: same type as the input
  87. .Description:
  88. Returns the first **non-NULL** value (if such exists) of the `field_name` input column sorted by
  89. the `ordering_field_name` column. If `ordering_field_name` is not provided, only the `field_name`
  90. column is used for the sorting. E.g.:
  91. [cols="<,<"]
  92. |===
  93. s| a | b
  94. | 100 | 1
  95. | 200 | 1
  96. | 1 | 2
  97. | 2 | 2
  98. | 10 | null
  99. | 20 | null
  100. | null | null
  101. |===
  102. [source, sql]
  103. ----------------------
  104. SELECT FIRST(a) FROM t
  105. ----------------------
  106. will result in:
  107. [cols="<"]
  108. |===
  109. s| FIRST(a)
  110. | 1
  111. |===
  112. and
  113. [source, sql]
  114. -------------------------
  115. SELECT FIRST(a, b) FROM t
  116. -------------------------
  117. will result in:
  118. [cols="<"]
  119. |===
  120. s| FIRST(a, b)
  121. | 100
  122. |===
  123. ["source","sql",subs="attributes,macros"]
  124. -----------------------------------------------------------
  125. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArg]
  126. -----------------------------------------------------------
  127. ["source","sql",subs="attributes,macros"]
  128. --------------------------------------------------------------------
  129. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArgAndGroupBy]
  130. --------------------------------------------------------------------
  131. ["source","sql",subs="attributes,macros"]
  132. -----------------------------------------------------------
  133. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgs]
  134. -----------------------------------------------------------
  135. ["source","sql",subs="attributes,macros"]
  136. ---------------------------------------------------------------------
  137. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgsAndGroupBy]
  138. ---------------------------------------------------------------------
  139. `FIRST_VALUE` is a name alias and can be used instead of `FIRST`, e.g.:
  140. ["source","sql",subs="attributes,macros"]
  141. --------------------------------------------------------------------------
  142. include-tagged::{sql-specs}/docs/docs.csv-spec[firstValueWithTwoArgsAndGroupBy]
  143. --------------------------------------------------------------------------
  144. [NOTE]
  145. `FIRST` cannot be used in a HAVING clause.
  146. [NOTE]
  147. `FIRST` cannot be used with columns of type <<text, `text`>> unless
  148. the field is also <<before-enabling-fielddata,saved as a keyword>>.
  149. [[sql-functions-aggs-last]]
  150. ==== `LAST/LAST_VALUE`
  151. .Synopsis:
  152. [source, sql]
  153. --------------------------------------------------
  154. LAST(field_name<1>[, ordering_field_name]<2>)
  155. --------------------------------------------------
  156. *Input*:
  157. <1> target field for the aggregation
  158. <2> optional field used for ordering
  159. *Output*: same type as the input
  160. .Description:
  161. It's the inverse of <<sql-functions-aggs-first>>. Returns the last **non-NULL** value (if such exists) of the
  162. `field_name`input column sorted descending by the `ordering_field_name` column. If `ordering_field_name` is not
  163. provided, only the `field_name` column is used for the sorting. E.g.:
  164. [cols="<,<"]
  165. |===
  166. s| a | b
  167. | 10 | 1
  168. | 20 | 1
  169. | 1 | 2
  170. | 2 | 2
  171. | 100 | null
  172. | 200 | null
  173. | null | null
  174. |===
  175. [source, sql]
  176. ------------------------
  177. SELECT LAST(a) FROM t
  178. ------------------------
  179. will result in:
  180. [cols="<"]
  181. |===
  182. s| LAST(a)
  183. | 200
  184. |===
  185. and
  186. [source, sql]
  187. ------------------------
  188. SELECT LAST(a, b) FROM t
  189. ------------------------
  190. will result in:
  191. [cols="<"]
  192. |===
  193. s| LAST(a, b)
  194. | 2
  195. |===
  196. ["source","sql",subs="attributes,macros"]
  197. -----------------------------------------------------------
  198. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArg]
  199. -----------------------------------------------------------
  200. ["source","sql",subs="attributes,macros"]
  201. -------------------------------------------------------------------
  202. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArgAndGroupBy]
  203. -------------------------------------------------------------------
  204. ["source","sql",subs="attributes,macros"]
  205. -----------------------------------------------------------
  206. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgs]
  207. -----------------------------------------------------------
  208. ["source","sql",subs="attributes,macros"]
  209. --------------------------------------------------------------------
  210. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgsAndGroupBy]
  211. --------------------------------------------------------------------
  212. `LAST_VALUE` is a name alias and can be used instead of `LAST`, e.g.:
  213. ["source","sql",subs="attributes,macros"]
  214. -------------------------------------------------------------------------
  215. include-tagged::{sql-specs}/docs/docs.csv-spec[lastValueWithTwoArgsAndGroupBy]
  216. -------------------------------------------------------------------------
  217. [NOTE]
  218. `LAST` cannot be used in `HAVING` clause.
  219. [NOTE]
  220. `LAST` cannot be used with columns of type <<text, `text`>> unless
  221. the field is also <<before-enabling-fielddata,`saved as a keyword`>>.
  222. [[sql-functions-aggs-max]]
  223. ==== `MAX`
  224. .Synopsis:
  225. [source, sql]
  226. --------------------------------------------------
  227. MAX(field_name<1>)
  228. --------------------------------------------------
  229. *Input*:
  230. <1> a numeric field
  231. *Output*: same type as the input
  232. .Description:
  233. Returns the maximum value across input values in the field `field_name`.
  234. ["source","sql",subs="attributes,macros"]
  235. --------------------------------------------------
  236. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMax]
  237. --------------------------------------------------
  238. [NOTE]
  239. `MAX` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  240. <<sql-functions-aggs-last>> and therefore, it cannot be used in `HAVING` clause.
  241. [[sql-functions-aggs-min]]
  242. ==== `MIN`
  243. .Synopsis:
  244. [source, sql]
  245. --------------------------------------------------
  246. MIN(field_name<1>)
  247. --------------------------------------------------
  248. *Input*:
  249. <1> a numeric field
  250. *Output*: same type as the input
  251. .Description:
  252. Returns the minimum value across input values in the field `field_name`.
  253. ["source","sql",subs="attributes,macros"]
  254. --------------------------------------------------
  255. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMin]
  256. --------------------------------------------------
  257. [NOTE]
  258. `MIN` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  259. <<sql-functions-aggs-first>> and therefore, it cannot be used in `HAVING` clause.
  260. [[sql-functions-aggs-sum]]
  261. ==== `SUM`
  262. .Synopsis:
  263. [source, sql]
  264. --------------------------------------------------
  265. SUM(field_name<1>)
  266. --------------------------------------------------
  267. *Input*:
  268. <1> a numeric field
  269. *Output*: `bigint` for integer input, `double` for floating points
  270. .Description:
  271. Returns the sum of input values in the field `field_name`.
  272. ["source","sql",subs="attributes,macros"]
  273. --------------------------------------------------
  274. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSum]
  275. --------------------------------------------------
  276. [[sql-functions-aggs-statistics]]
  277. [float]
  278. === Statistics
  279. [[sql-functions-aggs-kurtosis]]
  280. ==== `KURTOSIS`
  281. .Synopsis:
  282. [source, sql]
  283. --------------------------------------------------
  284. KURTOSIS(field_name<1>)
  285. --------------------------------------------------
  286. *Input*:
  287. <1> a numeric field
  288. *Output*: `double` numeric value
  289. .Description:
  290. https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution of input values in the field `field_name`.
  291. ["source","sql",subs="attributes,macros"]
  292. --------------------------------------------------
  293. include-tagged::{sql-specs}/docs/docs.csv-spec[aggKurtosis]
  294. --------------------------------------------------
  295. [[sql-functions-aggs-mad]]
  296. ==== `MAD`
  297. .Synopsis:
  298. [source, sql]
  299. --------------------------------------------------
  300. MAD(field_name<1>)
  301. --------------------------------------------------
  302. *Input*:
  303. <1> a numeric field
  304. *Output*: `double` numeric value
  305. .Description:
  306. https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
  307. ["source","sql",subs="attributes,macros"]
  308. --------------------------------------------------
  309. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMad]
  310. --------------------------------------------------
  311. [[sql-functions-aggs-percentile]]
  312. ==== `PERCENTILE`
  313. .Synopsis:
  314. [source, sql]
  315. --------------------------------------------------
  316. PERCENTILE(field_name<1>, numeric_exp<2>)
  317. --------------------------------------------------
  318. *Input*:
  319. <1> a numeric field
  320. <2> a numeric expression (must be a constant and not based on a field)
  321. *Output*: `double` numeric value
  322. .Description:
  323. Returns the nth https://en.wikipedia.org/wiki/Percentile[percentile] (represented by `numeric_exp` parameter)
  324. of input values in the field `field_name`.
  325. ["source","sql",subs="attributes,macros"]
  326. --------------------------------------------------
  327. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentile]
  328. --------------------------------------------------
  329. [[sql-functions-aggs-percentile-rank]]
  330. ==== `PERCENTILE_RANK`
  331. .Synopsis:
  332. [source, sql]
  333. --------------------------------------------------
  334. PERCENTILE_RANK(field_name<1>, numeric_exp<2>)
  335. --------------------------------------------------
  336. *Input*:
  337. <1> a numeric field
  338. <2> a numeric expression (must be a constant and not based on a field)
  339. *Output*: `double` numeric value
  340. .Description:
  341. Returns the nth https://en.wikipedia.org/wiki/Percentile_rank[percentile rank] (represented by `numeric_exp` parameter)
  342. of input values in the field `field_name`.
  343. ["source","sql",subs="attributes,macros"]
  344. --------------------------------------------------
  345. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRank]
  346. --------------------------------------------------
  347. [[sql-functions-aggs-skewness]]
  348. ==== `SKEWNESS`
  349. .Synopsis:
  350. [source, sql]
  351. --------------------------------------------------
  352. SKEWNESS(field_name<1>)
  353. --------------------------------------------------
  354. *Input*:
  355. <1> a numeric field
  356. *Output*: `double` numeric value
  357. .Description:
  358. https://en.wikipedia.org/wiki/Skewness[Quantify] the asymmetric distribution of input values in the field `field_name`.
  359. ["source","sql",subs="attributes,macros"]
  360. --------------------------------------------------
  361. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSkewness]
  362. --------------------------------------------------
  363. [[sql-functions-aggs-stddev-pop]]
  364. ==== `STDDEV_POP`
  365. .Synopsis:
  366. [source, sql]
  367. --------------------------------------------------
  368. STDDEV_POP(field_name<1>)
  369. --------------------------------------------------
  370. *Input*:
  371. <1> a numeric field
  372. *Output*: `double` numeric value
  373. .Description:
  374. Returns the https://en.wikipedia.org/wiki/Standard_deviations[population standard deviation] of input values in the field `field_name`.
  375. ["source","sql",subs="attributes,macros"]
  376. --------------------------------------------------
  377. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevPop]
  378. --------------------------------------------------
  379. [[sql-functions-aggs-sum-squares]]
  380. ==== `SUM_OF_SQUARES`
  381. .Synopsis:
  382. [source, sql]
  383. --------------------------------------------------
  384. SUM_OF_SQUARES(field_name<1>)
  385. --------------------------------------------------
  386. *Input*:
  387. <1> a numeric field
  388. *Output*: `double` numeric value
  389. .Description:
  390. Returns the https://en.wikipedia.org/wiki/Total_sum_of_squares[sum of squares] of input values in the field `field_name`.
  391. ["source","sql",subs="attributes,macros"]
  392. --------------------------------------------------
  393. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumOfSquares]
  394. --------------------------------------------------
  395. [[sql-functions-aggs-var-pop]]
  396. ==== `VAR_POP`
  397. .Synopsis:
  398. [source, sql]
  399. --------------------------------------------------
  400. VAR_POP(field_name<1>)
  401. --------------------------------------------------
  402. *Input*:
  403. <1> a numeric field
  404. *Output*: `double` numeric value
  405. .Description:
  406. Returns the https://en.wikipedia.org/wiki/Variance[population variance] of input values in the field `field_name`.
  407. ["source","sql",subs="attributes,macros"]
  408. --------------------------------------------------
  409. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarPop]
  410. --------------------------------------------------