aggs.asciidoc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[sql-functions-aggs]]
  4. === Aggregate Functions
  5. Functions for computing a _single_ result from a set of input values.
  6. {es-sql} supports aggregate functions only alongside <<sql-syntax-group-by,grouping>> (implicit or explicit).
  7. [[sql-functions-aggs-general]]
  8. [float]
  9. === General Purpose
  10. [[sql-functions-aggs-avg]]
  11. ==== `AVG`
  12. .Synopsis:
  13. [source, sql]
  14. --------------------------------------------------
  15. AVG(numeric_field) <1>
  16. --------------------------------------------------
  17. *Input*:
  18. <1> numeric field
  19. *Output*: `double` numeric value
  20. .Description:
  21. Returns the https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (arithmetic mean) of input values.
  22. ["source","sql",subs="attributes,macros"]
  23. --------------------------------------------------
  24. include-tagged::{sql-specs}/docs/docs.csv-spec[aggAvg]
  25. --------------------------------------------------
  26. [[sql-functions-aggs-count]]
  27. ==== `COUNT`
  28. .Synopsis:
  29. [source, sql]
  30. --------------------------------------------------
  31. COUNT(expression) <1>
  32. --------------------------------------------------
  33. *Input*:
  34. <1> a field name, wildcard (`*`) or any numeric value
  35. *Output*: numeric value
  36. .Description:
  37. Returns the total number (count) of input values.
  38. In case of `COUNT(*)` or `COUNT(<literal>)`, _all_ values are considered (including `null` or missing ones).
  39. In case of `COUNT(<field_name>)` `null` values are not considered.
  40. ["source","sql",subs="attributes,macros"]
  41. --------------------------------------------------
  42. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountStar]
  43. --------------------------------------------------
  44. [[sql-functions-aggs-count-all]]
  45. ==== `COUNT(ALL)`
  46. .Synopsis:
  47. [source, sql]
  48. --------------------------------------------------
  49. COUNT(ALL field_name) <1>
  50. --------------------------------------------------
  51. *Input*:
  52. <1> a field name
  53. *Output*: numeric value
  54. .Description:
  55. Returns the total number (count) of all _non-null_ input values. `COUNT(<field_name>)` and `COUNT(ALL <field_name>)` are equivalent.
  56. ["source","sql",subs="attributes,macros"]
  57. --------------------------------------------------
  58. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountAll]
  59. --------------------------------------------------
  60. [[sql-functions-aggs-count-distinct]]
  61. ==== `COUNT(DISTINCT)`
  62. .Synopsis:
  63. [source, sql]
  64. --------------------------------------------------
  65. COUNT(DISTINCT field_name) <1>
  66. --------------------------------------------------
  67. *Input*:
  68. <1> a field name
  69. *Output*: numeric value
  70. .Description:
  71. Returns the total number of _distinct non-null_ values in input values.
  72. ["source","sql",subs="attributes,macros"]
  73. --------------------------------------------------
  74. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountDistinct]
  75. --------------------------------------------------
  76. [[sql-functions-aggs-first]]
  77. ==== `FIRST/FIRST_VALUE`
  78. .Synopsis:
  79. [source, sql]
  80. ----------------------------------------------
  81. FIRST(
  82. field_name <1>
  83. [, ordering_field_name]) <2>
  84. ----------------------------------------------
  85. *Input*:
  86. <1> target field for the aggregation
  87. <2> optional field used for ordering
  88. *Output*: same type as the input
  89. .Description:
  90. Returns the first **non-NULL** value (if such exists) of the `field_name` input column sorted by
  91. the `ordering_field_name` column. If `ordering_field_name` is not provided, only the `field_name`
  92. column is used for the sorting. E.g.:
  93. [cols="<,<"]
  94. |===
  95. s| a | b
  96. | 100 | 1
  97. | 200 | 1
  98. | 1 | 2
  99. | 2 | 2
  100. | 10 | null
  101. | 20 | null
  102. | null | null
  103. |===
  104. [source, sql]
  105. ----------------------
  106. SELECT FIRST(a) FROM t
  107. ----------------------
  108. will result in:
  109. [cols="<"]
  110. |===
  111. s| FIRST(a)
  112. | 1
  113. |===
  114. and
  115. [source, sql]
  116. -------------------------
  117. SELECT FIRST(a, b) FROM t
  118. -------------------------
  119. will result in:
  120. [cols="<"]
  121. |===
  122. s| FIRST(a, b)
  123. | 100
  124. |===
  125. ["source","sql",subs="attributes,macros"]
  126. -----------------------------------------------------------
  127. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArg]
  128. -----------------------------------------------------------
  129. ["source","sql",subs="attributes,macros"]
  130. --------------------------------------------------------------------
  131. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArgAndGroupBy]
  132. --------------------------------------------------------------------
  133. ["source","sql",subs="attributes,macros"]
  134. -----------------------------------------------------------
  135. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgs]
  136. -----------------------------------------------------------
  137. ["source","sql",subs="attributes,macros"]
  138. ---------------------------------------------------------------------
  139. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgsAndGroupBy]
  140. ---------------------------------------------------------------------
  141. `FIRST_VALUE` is a name alias and can be used instead of `FIRST`, e.g.:
  142. ["source","sql",subs="attributes,macros"]
  143. --------------------------------------------------------------------------
  144. include-tagged::{sql-specs}/docs/docs.csv-spec[firstValueWithTwoArgsAndGroupBy]
  145. --------------------------------------------------------------------------
  146. [NOTE]
  147. `FIRST` cannot be used in a HAVING clause.
  148. [NOTE]
  149. `FIRST` cannot be used with columns of type <<text, `text`>> unless
  150. the field is also <<before-enabling-fielddata,saved as a keyword>>.
  151. [[sql-functions-aggs-last]]
  152. ==== `LAST/LAST_VALUE`
  153. .Synopsis:
  154. [source, sql]
  155. --------------------------------------------------
  156. LAST(
  157. field_name <1>
  158. [, ordering_field_name]) <2>
  159. --------------------------------------------------
  160. *Input*:
  161. <1> target field for the aggregation
  162. <2> optional field used for ordering
  163. *Output*: same type as the input
  164. .Description:
  165. It's the inverse of <<sql-functions-aggs-first>>. Returns the last **non-NULL** value (if such exists) of the
  166. `field_name`input column sorted descending by the `ordering_field_name` column. If `ordering_field_name` is not
  167. provided, only the `field_name` column is used for the sorting. E.g.:
  168. [cols="<,<"]
  169. |===
  170. s| a | b
  171. | 10 | 1
  172. | 20 | 1
  173. | 1 | 2
  174. | 2 | 2
  175. | 100 | null
  176. | 200 | null
  177. | null | null
  178. |===
  179. [source, sql]
  180. ------------------------
  181. SELECT LAST(a) FROM t
  182. ------------------------
  183. will result in:
  184. [cols="<"]
  185. |===
  186. s| LAST(a)
  187. | 200
  188. |===
  189. and
  190. [source, sql]
  191. ------------------------
  192. SELECT LAST(a, b) FROM t
  193. ------------------------
  194. will result in:
  195. [cols="<"]
  196. |===
  197. s| LAST(a, b)
  198. | 2
  199. |===
  200. ["source","sql",subs="attributes,macros"]
  201. -----------------------------------------------------------
  202. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArg]
  203. -----------------------------------------------------------
  204. ["source","sql",subs="attributes,macros"]
  205. -------------------------------------------------------------------
  206. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArgAndGroupBy]
  207. -------------------------------------------------------------------
  208. ["source","sql",subs="attributes,macros"]
  209. -----------------------------------------------------------
  210. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgs]
  211. -----------------------------------------------------------
  212. ["source","sql",subs="attributes,macros"]
  213. --------------------------------------------------------------------
  214. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgsAndGroupBy]
  215. --------------------------------------------------------------------
  216. `LAST_VALUE` is a name alias and can be used instead of `LAST`, e.g.:
  217. ["source","sql",subs="attributes,macros"]
  218. -------------------------------------------------------------------------
  219. include-tagged::{sql-specs}/docs/docs.csv-spec[lastValueWithTwoArgsAndGroupBy]
  220. -------------------------------------------------------------------------
  221. [NOTE]
  222. `LAST` cannot be used in `HAVING` clause.
  223. [NOTE]
  224. `LAST` cannot be used with columns of type <<text, `text`>> unless
  225. the field is also <<before-enabling-fielddata,`saved as a keyword`>>.
  226. [[sql-functions-aggs-max]]
  227. ==== `MAX`
  228. .Synopsis:
  229. [source, sql]
  230. --------------------------------------------------
  231. MAX(field_name) <1>
  232. --------------------------------------------------
  233. *Input*:
  234. <1> a numeric field
  235. *Output*: same type as the input
  236. .Description:
  237. Returns the maximum value across input values in the field `field_name`.
  238. ["source","sql",subs="attributes,macros"]
  239. --------------------------------------------------
  240. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMax]
  241. --------------------------------------------------
  242. [NOTE]
  243. `MAX` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  244. <<sql-functions-aggs-last>> and therefore, it cannot be used in `HAVING` clause.
  245. [[sql-functions-aggs-min]]
  246. ==== `MIN`
  247. .Synopsis:
  248. [source, sql]
  249. --------------------------------------------------
  250. MIN(field_name) <1>
  251. --------------------------------------------------
  252. *Input*:
  253. <1> a numeric field
  254. *Output*: same type as the input
  255. .Description:
  256. Returns the minimum value across input values in the field `field_name`.
  257. ["source","sql",subs="attributes,macros"]
  258. --------------------------------------------------
  259. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMin]
  260. --------------------------------------------------
  261. [NOTE]
  262. `MIN` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  263. <<sql-functions-aggs-first>> and therefore, it cannot be used in `HAVING` clause.
  264. [[sql-functions-aggs-sum]]
  265. ==== `SUM`
  266. .Synopsis:
  267. [source, sql]
  268. --------------------------------------------------
  269. SUM(field_name) <1>
  270. --------------------------------------------------
  271. *Input*:
  272. <1> a numeric field
  273. *Output*: `bigint` for integer input, `double` for floating points
  274. .Description:
  275. Returns the sum of input values in the field `field_name`.
  276. ["source","sql",subs="attributes,macros"]
  277. --------------------------------------------------
  278. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSum]
  279. --------------------------------------------------
  280. [[sql-functions-aggs-statistics]]
  281. [float]
  282. === Statistics
  283. [[sql-functions-aggs-kurtosis]]
  284. ==== `KURTOSIS`
  285. .Synopsis:
  286. [source, sql]
  287. --------------------------------------------------
  288. KURTOSIS(field_name) <1>
  289. --------------------------------------------------
  290. *Input*:
  291. <1> a numeric field
  292. *Output*: `double` numeric value
  293. .Description:
  294. https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution of input values in the field `field_name`.
  295. ["source","sql",subs="attributes,macros"]
  296. --------------------------------------------------
  297. include-tagged::{sql-specs}/docs/docs.csv-spec[aggKurtosis]
  298. --------------------------------------------------
  299. [[sql-functions-aggs-mad]]
  300. ==== `MAD`
  301. .Synopsis:
  302. [source, sql]
  303. --------------------------------------------------
  304. MAD(field_name) <1>
  305. --------------------------------------------------
  306. *Input*:
  307. <1> a numeric field
  308. *Output*: `double` numeric value
  309. .Description:
  310. https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
  311. ["source","sql",subs="attributes,macros"]
  312. --------------------------------------------------
  313. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMad]
  314. --------------------------------------------------
  315. [[sql-functions-aggs-percentile]]
  316. ==== `PERCENTILE`
  317. .Synopsis:
  318. [source, sql]
  319. --------------------------------------------------
  320. PERCENTILE(
  321. field_name, <1>
  322. numeric_exp) <2>
  323. --------------------------------------------------
  324. *Input*:
  325. <1> a numeric field
  326. <2> a numeric expression (must be a constant and not based on a field)
  327. *Output*: `double` numeric value
  328. .Description:
  329. Returns the nth https://en.wikipedia.org/wiki/Percentile[percentile] (represented by `numeric_exp` parameter)
  330. of input values in the field `field_name`.
  331. ["source","sql",subs="attributes,macros"]
  332. --------------------------------------------------
  333. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentile]
  334. --------------------------------------------------
  335. [[sql-functions-aggs-percentile-rank]]
  336. ==== `PERCENTILE_RANK`
  337. .Synopsis:
  338. [source, sql]
  339. --------------------------------------------------
  340. PERCENTILE_RANK(
  341. field_name, <1>
  342. numeric_exp) <2>
  343. --------------------------------------------------
  344. *Input*:
  345. <1> a numeric field
  346. <2> a numeric expression (must be a constant and not based on a field)
  347. *Output*: `double` numeric value
  348. .Description:
  349. Returns the nth https://en.wikipedia.org/wiki/Percentile_rank[percentile rank] (represented by `numeric_exp` parameter)
  350. of input values in the field `field_name`.
  351. ["source","sql",subs="attributes,macros"]
  352. --------------------------------------------------
  353. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRank]
  354. --------------------------------------------------
  355. [[sql-functions-aggs-skewness]]
  356. ==== `SKEWNESS`
  357. .Synopsis:
  358. [source, sql]
  359. --------------------------------------------------
  360. SKEWNESS(field_name) <1>
  361. --------------------------------------------------
  362. *Input*:
  363. <1> a numeric field
  364. *Output*: `double` numeric value
  365. .Description:
  366. https://en.wikipedia.org/wiki/Skewness[Quantify] the asymmetric distribution of input values in the field `field_name`.
  367. ["source","sql",subs="attributes,macros"]
  368. --------------------------------------------------
  369. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSkewness]
  370. --------------------------------------------------
  371. [[sql-functions-aggs-stddev-pop]]
  372. ==== `STDDEV_POP`
  373. .Synopsis:
  374. [source, sql]
  375. --------------------------------------------------
  376. STDDEV_POP(field_name) <1>
  377. --------------------------------------------------
  378. *Input*:
  379. <1> a numeric field
  380. *Output*: `double` numeric value
  381. .Description:
  382. Returns the https://en.wikipedia.org/wiki/Standard_deviations[population standard deviation] of input values in the field `field_name`.
  383. ["source","sql",subs="attributes,macros"]
  384. --------------------------------------------------
  385. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevPop]
  386. --------------------------------------------------
  387. [[sql-functions-aggs-sum-squares]]
  388. ==== `SUM_OF_SQUARES`
  389. .Synopsis:
  390. [source, sql]
  391. --------------------------------------------------
  392. SUM_OF_SQUARES(field_name) <1>
  393. --------------------------------------------------
  394. *Input*:
  395. <1> a numeric field
  396. *Output*: `double` numeric value
  397. .Description:
  398. Returns the https://en.wikipedia.org/wiki/Total_sum_of_squares[sum of squares] of input values in the field `field_name`.
  399. ["source","sql",subs="attributes,macros"]
  400. --------------------------------------------------
  401. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumOfSquares]
  402. --------------------------------------------------
  403. [[sql-functions-aggs-var-pop]]
  404. ==== `VAR_POP`
  405. .Synopsis:
  406. [source, sql]
  407. --------------------------------------------------
  408. VAR_POP(field_name) <1>
  409. --------------------------------------------------
  410. *Input*:
  411. <1> a numeric field
  412. *Output*: `double` numeric value
  413. .Description:
  414. Returns the https://en.wikipedia.org/wiki/Variance[population variance] of input values in the field `field_name`.
  415. ["source","sql",subs="attributes,macros"]
  416. --------------------------------------------------
  417. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarPop]
  418. --------------------------------------------------