aggs.asciidoc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[sql-functions-aggs]]
  4. === Aggregate Functions
  5. Functions for computing a _single_ result from a set of input values.
  6. {es-sql} supports aggregate functions only alongside <<sql-syntax-group-by,grouping>> (implicit or explicit).
  7. [[sql-functions-aggs-general]]
  8. [float]
  9. === General Purpose
  10. [[sql-functions-aggs-avg]]
  11. ==== `AVG`
  12. .Synopsis:
  13. [source, sql]
  14. --------------------------------------------------
  15. AVG(numeric_field) <1>
  16. --------------------------------------------------
  17. *Input*:
  18. <1> numeric field
  19. *Output*: `double` numeric value
  20. *Description*: Returns the https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (arithmetic mean) of input values.
  21. ["source","sql",subs="attributes,macros"]
  22. --------------------------------------------------
  23. include-tagged::{sql-specs}/docs/docs.csv-spec[aggAvg]
  24. --------------------------------------------------
  25. ["source","sql",subs="attributes,macros"]
  26. --------------------------------------------------
  27. include-tagged::{sql-specs}/docs/docs.csv-spec[aggAvgScalars]
  28. --------------------------------------------------
  29. [[sql-functions-aggs-count]]
  30. ==== `COUNT`
  31. .Synopsis:
  32. [source, sql]
  33. --------------------------------------------------
  34. COUNT(expression) <1>
  35. --------------------------------------------------
  36. *Input*:
  37. <1> a field name, wildcard (`*`) or any numeric value
  38. *Output*: numeric value
  39. *Description*: Returns the total number (count) of input values.
  40. In case of `COUNT(*)` or `COUNT(<literal>)`, _all_ values are considered (including `null` or missing ones).
  41. In case of `COUNT(<field_name>)` `null` values are not considered.
  42. ["source","sql",subs="attributes,macros"]
  43. --------------------------------------------------
  44. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountStar]
  45. --------------------------------------------------
  46. [[sql-functions-aggs-count-all]]
  47. ==== `COUNT(ALL)`
  48. .Synopsis:
  49. [source, sql]
  50. --------------------------------------------------
  51. COUNT(ALL field_name) <1>
  52. --------------------------------------------------
  53. *Input*:
  54. <1> a field name
  55. *Output*: numeric value
  56. *Description*: Returns the total number (count) of all _non-null_ input values. `COUNT(<field_name>)` and `COUNT(ALL <field_name>)` are equivalent.
  57. ["source","sql",subs="attributes,macros"]
  58. --------------------------------------------------
  59. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountAll]
  60. --------------------------------------------------
  61. ["source","sql",subs="attributes,macros"]
  62. --------------------------------------------------
  63. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountAllScalars]
  64. --------------------------------------------------
  65. [[sql-functions-aggs-count-distinct]]
  66. ==== `COUNT(DISTINCT)`
  67. .Synopsis:
  68. [source, sql]
  69. --------------------------------------------------
  70. COUNT(DISTINCT field_name) <1>
  71. --------------------------------------------------
  72. *Input*:
  73. <1> a field name
  74. *Output*: numeric value
  75. *Description*: Returns the total number of _distinct non-null_ values in input values.
  76. ["source","sql",subs="attributes,macros"]
  77. --------------------------------------------------
  78. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountDistinct]
  79. --------------------------------------------------
  80. ["source","sql",subs="attributes,macros"]
  81. --------------------------------------------------
  82. include-tagged::{sql-specs}/docs/docs.csv-spec[aggCountDistinctScalars]
  83. --------------------------------------------------
  84. [[sql-functions-aggs-first]]
  85. ==== `FIRST/FIRST_VALUE`
  86. .Synopsis:
  87. [source, sql]
  88. ----------------------------------------------
  89. FIRST(
  90. field_name <1>
  91. [, ordering_field_name]) <2>
  92. ----------------------------------------------
  93. *Input*:
  94. <1> target field for the aggregation
  95. <2> optional field used for ordering
  96. *Output*: same type as the input
  97. *Description*: Returns the first **non-NULL** value (if such exists) of the `field_name` input column sorted by
  98. the `ordering_field_name` column. If `ordering_field_name` is not provided, only the `field_name`
  99. column is used for the sorting. E.g.:
  100. [cols="<,<"]
  101. |===
  102. s| a | b
  103. | 100 | 1
  104. | 200 | 1
  105. | 1 | 2
  106. | 2 | 2
  107. | 10 | null
  108. | 20 | null
  109. | null | null
  110. |===
  111. [source, sql]
  112. ----------------------
  113. SELECT FIRST(a) FROM t
  114. ----------------------
  115. will result in:
  116. [cols="<"]
  117. |===
  118. s| FIRST(a)
  119. | 1
  120. |===
  121. and
  122. [source, sql]
  123. -------------------------
  124. SELECT FIRST(a, b) FROM t
  125. -------------------------
  126. will result in:
  127. [cols="<"]
  128. |===
  129. s| FIRST(a, b)
  130. | 100
  131. |===
  132. ["source","sql",subs="attributes,macros"]
  133. -----------------------------------------------------------
  134. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArg]
  135. -----------------------------------------------------------
  136. ["source","sql",subs="attributes,macros"]
  137. --------------------------------------------------------------------
  138. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithOneArgAndGroupBy]
  139. --------------------------------------------------------------------
  140. ["source","sql",subs="attributes,macros"]
  141. -----------------------------------------------------------
  142. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgs]
  143. -----------------------------------------------------------
  144. ["source","sql",subs="attributes,macros"]
  145. ---------------------------------------------------------------------
  146. include-tagged::{sql-specs}/docs/docs.csv-spec[firstWithTwoArgsAndGroupBy]
  147. ---------------------------------------------------------------------
  148. `FIRST_VALUE` is a name alias and can be used instead of `FIRST`, e.g.:
  149. ["source","sql",subs="attributes,macros"]
  150. --------------------------------------------------------------------------
  151. include-tagged::{sql-specs}/docs/docs.csv-spec[firstValueWithTwoArgsAndGroupBy]
  152. --------------------------------------------------------------------------
  153. ["source","sql",subs="attributes,macros"]
  154. --------------------------------------------------------------------------
  155. include-tagged::{sql-specs}/docs/docs.csv-spec[firstValueWithTwoArgsAndGroupByScalars]
  156. --------------------------------------------------------------------------
  157. [NOTE]
  158. `FIRST` cannot be used in a HAVING clause.
  159. [NOTE]
  160. `FIRST` cannot be used with columns of type <<text, `text`>> unless
  161. the field is also <<before-enabling-fielddata,saved as a keyword>>.
  162. [[sql-functions-aggs-last]]
  163. ==== `LAST/LAST_VALUE`
  164. .Synopsis:
  165. [source, sql]
  166. --------------------------------------------------
  167. LAST(
  168. field_name <1>
  169. [, ordering_field_name]) <2>
  170. --------------------------------------------------
  171. *Input*:
  172. <1> target field for the aggregation
  173. <2> optional field used for ordering
  174. *Output*: same type as the input
  175. *Description*: It's the inverse of <<sql-functions-aggs-first>>. Returns the last **non-NULL** value (if such exists) of the
  176. `field_name` input column sorted descending by the `ordering_field_name` column. If `ordering_field_name` is not
  177. provided, only the `field_name` column is used for the sorting. E.g.:
  178. [cols="<,<"]
  179. |===
  180. s| a | b
  181. | 10 | 1
  182. | 20 | 1
  183. | 1 | 2
  184. | 2 | 2
  185. | 100 | null
  186. | 200 | null
  187. | null | null
  188. |===
  189. [source, sql]
  190. ------------------------
  191. SELECT LAST(a) FROM t
  192. ------------------------
  193. will result in:
  194. [cols="<"]
  195. |===
  196. s| LAST(a)
  197. | 200
  198. |===
  199. and
  200. [source, sql]
  201. ------------------------
  202. SELECT LAST(a, b) FROM t
  203. ------------------------
  204. will result in:
  205. [cols="<"]
  206. |===
  207. s| LAST(a, b)
  208. | 2
  209. |===
  210. ["source","sql",subs="attributes,macros"]
  211. -----------------------------------------------------------
  212. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArg]
  213. -----------------------------------------------------------
  214. ["source","sql",subs="attributes,macros"]
  215. -------------------------------------------------------------------
  216. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithOneArgAndGroupBy]
  217. -------------------------------------------------------------------
  218. ["source","sql",subs="attributes,macros"]
  219. -----------------------------------------------------------
  220. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgs]
  221. -----------------------------------------------------------
  222. ["source","sql",subs="attributes,macros"]
  223. --------------------------------------------------------------------
  224. include-tagged::{sql-specs}/docs/docs.csv-spec[lastWithTwoArgsAndGroupBy]
  225. --------------------------------------------------------------------
  226. `LAST_VALUE` is a name alias and can be used instead of `LAST`, e.g.:
  227. ["source","sql",subs="attributes,macros"]
  228. -------------------------------------------------------------------------
  229. include-tagged::{sql-specs}/docs/docs.csv-spec[lastValueWithTwoArgsAndGroupBy]
  230. -------------------------------------------------------------------------
  231. ["source","sql",subs="attributes,macros"]
  232. -------------------------------------------------------------------------
  233. include-tagged::{sql-specs}/docs/docs.csv-spec[lastValueWithTwoArgsAndGroupByScalars]
  234. -------------------------------------------------------------------------
  235. [NOTE]
  236. `LAST` cannot be used in `HAVING` clause.
  237. [NOTE]
  238. `LAST` cannot be used with columns of type <<text, `text`>> unless
  239. the field is also <<before-enabling-fielddata,`saved as a keyword`>>.
  240. [[sql-functions-aggs-max]]
  241. ==== `MAX`
  242. .Synopsis:
  243. [source, sql]
  244. --------------------------------------------------
  245. MAX(field_name) <1>
  246. --------------------------------------------------
  247. *Input*:
  248. <1> a numeric field
  249. *Output*: same type as the input
  250. *Description*: Returns the maximum value across input values in the field `field_name`.
  251. ["source","sql",subs="attributes,macros"]
  252. --------------------------------------------------
  253. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMax]
  254. --------------------------------------------------
  255. ["source","sql",subs="attributes,macros"]
  256. --------------------------------------------------
  257. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMaxScalars]
  258. --------------------------------------------------
  259. [NOTE]
  260. `MAX` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  261. <<sql-functions-aggs-last>> and therefore, it cannot be used in `HAVING` clause.
  262. [[sql-functions-aggs-min]]
  263. ==== `MIN`
  264. .Synopsis:
  265. [source, sql]
  266. --------------------------------------------------
  267. MIN(field_name) <1>
  268. --------------------------------------------------
  269. *Input*:
  270. <1> a numeric field
  271. *Output*: same type as the input
  272. *Description*: Returns the minimum value across input values in the field `field_name`.
  273. ["source","sql",subs="attributes,macros"]
  274. --------------------------------------------------
  275. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMin]
  276. --------------------------------------------------
  277. [NOTE]
  278. `MIN` on a field of type <<text, `text`>> or <<keyword, `keyword`>> is translated into
  279. <<sql-functions-aggs-first>> and therefore, it cannot be used in `HAVING` clause.
  280. [[sql-functions-aggs-sum]]
  281. ==== `SUM`
  282. .Synopsis:
  283. [source, sql]
  284. --------------------------------------------------
  285. SUM(field_name) <1>
  286. --------------------------------------------------
  287. *Input*:
  288. <1> a numeric field
  289. *Output*: `bigint` for integer input, `double` for floating points
  290. *Description*: Returns the sum of input values in the field `field_name`.
  291. ["source","sql",subs="attributes,macros"]
  292. --------------------------------------------------
  293. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSum]
  294. --------------------------------------------------
  295. ["source","sql",subs="attributes,macros"]
  296. --------------------------------------------------
  297. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumScalars]
  298. --------------------------------------------------
  299. [[sql-functions-aggs-statistics]]
  300. [float]
  301. === Statistics
  302. [[sql-functions-aggs-kurtosis]]
  303. ==== `KURTOSIS`
  304. .Synopsis:
  305. [source, sql]
  306. --------------------------------------------------
  307. KURTOSIS(field_name) <1>
  308. --------------------------------------------------
  309. *Input*:
  310. <1> a numeric field
  311. *Output*: `double` numeric value
  312. *Description*:
  313. https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution of input values in the field `field_name`.
  314. ["source","sql",subs="attributes,macros"]
  315. --------------------------------------------------
  316. include-tagged::{sql-specs}/docs/docs.csv-spec[aggKurtosis]
  317. --------------------------------------------------
  318. [NOTE]
  319. ====
  320. `KURTOSIS` cannot be used on top of scalar functions or operators but only directly on a field. So, for example,
  321. the following is not allowed and an error is returned:
  322. [source, sql]
  323. ---------------------------------------
  324. SELECT KURTOSIS(salary / 12.0), gender FROM emp GROUP BY gender
  325. ---------------------------------------
  326. ====
  327. [[sql-functions-aggs-mad]]
  328. ==== `MAD`
  329. .Synopsis:
  330. [source, sql]
  331. --------------------------------------------------
  332. MAD(field_name) <1>
  333. --------------------------------------------------
  334. *Input*:
  335. <1> a numeric field
  336. *Output*: `double` numeric value
  337. *Description*:
  338. https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
  339. ["source","sql",subs="attributes,macros"]
  340. --------------------------------------------------
  341. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMad]
  342. --------------------------------------------------
  343. ["source","sql",subs="attributes,macros"]
  344. --------------------------------------------------
  345. include-tagged::{sql-specs}/docs/docs.csv-spec[aggMadScalars]
  346. --------------------------------------------------
  347. [[sql-functions-aggs-percentile]]
  348. ==== `PERCENTILE`
  349. .Synopsis:
  350. [source, sql]
  351. --------------------------------------------------
  352. PERCENTILE(
  353. field_name, <1>
  354. numeric_exp) <2>
  355. --------------------------------------------------
  356. *Input*:
  357. <1> a numeric field
  358. <2> a numeric expression (must be a constant and not based on a field)
  359. *Output*: `double` numeric value
  360. *Description*:
  361. Returns the nth https://en.wikipedia.org/wiki/Percentile[percentile] (represented by `numeric_exp` parameter)
  362. of input values in the field `field_name`.
  363. ["source","sql",subs="attributes,macros"]
  364. --------------------------------------------------
  365. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentile]
  366. --------------------------------------------------
  367. ["source","sql",subs="attributes,macros"]
  368. --------------------------------------------------
  369. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileScalars]
  370. --------------------------------------------------
  371. [[sql-functions-aggs-percentile-rank]]
  372. ==== `PERCENTILE_RANK`
  373. .Synopsis:
  374. [source, sql]
  375. --------------------------------------------------
  376. PERCENTILE_RANK(
  377. field_name, <1>
  378. numeric_exp) <2>
  379. --------------------------------------------------
  380. *Input*:
  381. <1> a numeric field
  382. <2> a numeric expression (must be a constant and not based on a field)
  383. *Output*: `double` numeric value
  384. *Description*:
  385. Returns the nth https://en.wikipedia.org/wiki/Percentile_rank[percentile rank] (represented by `numeric_exp` parameter)
  386. of input values in the field `field_name`.
  387. ["source","sql",subs="attributes,macros"]
  388. --------------------------------------------------
  389. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRank]
  390. --------------------------------------------------
  391. ["source","sql",subs="attributes,macros"]
  392. --------------------------------------------------
  393. include-tagged::{sql-specs}/docs/docs.csv-spec[aggPercentileRankScalars]
  394. --------------------------------------------------
  395. [[sql-functions-aggs-skewness]]
  396. ==== `SKEWNESS`
  397. .Synopsis:
  398. [source, sql]
  399. --------------------------------------------------
  400. SKEWNESS(field_name) <1>
  401. --------------------------------------------------
  402. *Input*:
  403. <1> a numeric field
  404. *Output*: `double` numeric value
  405. *Description*:
  406. https://en.wikipedia.org/wiki/Skewness[Quantify] the asymmetric distribution of input values in the field `field_name`.
  407. ["source","sql",subs="attributes,macros"]
  408. --------------------------------------------------
  409. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSkewness]
  410. --------------------------------------------------
  411. [NOTE]
  412. ====
  413. `SKEWNESS` cannot be used on top of scalar functions but only directly on a field. So, for example, the following is
  414. not allowed and an error is returned:
  415. [source, sql]
  416. ---------------------------------------
  417. SELECT SKEWNESS(ROUND(salary / 12.0, 2), gender FROM emp GROUP BY gender
  418. ---------------------------------------
  419. ====
  420. [[sql-functions-aggs-stddev-pop]]
  421. ==== `STDDEV_POP`
  422. .Synopsis:
  423. [source, sql]
  424. --------------------------------------------------
  425. STDDEV_POP(field_name) <1>
  426. --------------------------------------------------
  427. *Input*:
  428. <1> a numeric field
  429. *Output*: `double` numeric value
  430. *Description*:
  431. Returns the https://en.wikipedia.org/wiki/Standard_deviations[population standard deviation] of input values in the field `field_name`.
  432. ["source","sql",subs="attributes,macros"]
  433. --------------------------------------------------
  434. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevPop]
  435. --------------------------------------------------
  436. ["source","sql",subs="attributes,macros"]
  437. --------------------------------------------------
  438. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevPopScalars]
  439. --------------------------------------------------
  440. [[sql-functions-aggs-stddev-samp]]
  441. ==== `STDDEV_SAMP`
  442. .Synopsis:
  443. [source, sql]
  444. --------------------------------------------------
  445. STDDEV_SAMP(field_name) <1>
  446. --------------------------------------------------
  447. *Input*:
  448. <1> a numeric field
  449. *Output*: `double` numeric value
  450. *Description*:
  451. Returns the https://en.wikipedia.org/wiki/Standard_deviations[sample standard deviation] of input values in the field `field_name`.
  452. ["source","sql",subs="attributes,macros"]
  453. --------------------------------------------------
  454. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevSamp]
  455. --------------------------------------------------
  456. ["source","sql",subs="attributes,macros"]
  457. --------------------------------------------------
  458. include-tagged::{sql-specs}/docs/docs.csv-spec[aggStddevSampScalars]
  459. --------------------------------------------------
  460. [[sql-functions-aggs-sum-squares]]
  461. ==== `SUM_OF_SQUARES`
  462. .Synopsis:
  463. [source, sql]
  464. --------------------------------------------------
  465. SUM_OF_SQUARES(field_name) <1>
  466. --------------------------------------------------
  467. *Input*:
  468. <1> a numeric field
  469. *Output*: `double` numeric value
  470. *Description*:
  471. Returns the sum of squares of input values in the field `field_name`.
  472. ["source","sql",subs="attributes,macros"]
  473. --------------------------------------------------
  474. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumOfSquares]
  475. --------------------------------------------------
  476. ["source","sql",subs="attributes,macros"]
  477. --------------------------------------------------
  478. include-tagged::{sql-specs}/docs/docs.csv-spec[aggSumOfSquaresScalars]
  479. --------------------------------------------------
  480. [[sql-functions-aggs-var-pop]]
  481. ==== `VAR_POP`
  482. .Synopsis:
  483. [source, sql]
  484. --------------------------------------------------
  485. VAR_POP(field_name) <1>
  486. --------------------------------------------------
  487. *Input*:
  488. <1> a numeric field
  489. *Output*: `double` numeric value
  490. *Description*:
  491. Returns the https://en.wikipedia.org/wiki/Variance[population variance] of input values in the field `field_name`.
  492. ["source","sql",subs="attributes,macros"]
  493. --------------------------------------------------
  494. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarPop]
  495. --------------------------------------------------
  496. ["source","sql",subs="attributes,macros"]
  497. --------------------------------------------------
  498. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarPopScalars]
  499. --------------------------------------------------
  500. [[sql-functions-aggs-var-samp]]
  501. ==== `VAR_SAMP`
  502. .Synopsis:
  503. [source, sql]
  504. --------------------------------------------------
  505. VAR_SAMP(field_name) <1>
  506. --------------------------------------------------
  507. *Input*:
  508. <1> a numeric field
  509. *Output*: `double` numeric value
  510. *Description*:
  511. Returns the https://en.wikipedia.org/wiki/Variance[sample variance] of input values in the field `field_name`.
  512. ["source","sql",subs="attributes,macros"]
  513. --------------------------------------------------
  514. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarSamp]
  515. --------------------------------------------------
  516. ["source","sql",subs="attributes,macros"]
  517. --------------------------------------------------
  518. include-tagged::{sql-specs}/docs/docs.csv-spec[aggVarSampScalars]
  519. --------------------------------------------------