Kaynağa Gözat

Better explain STATS on multivalued fields (#135109) (#135704)

Updates the docs for multivalued fields to make it clear that grouping
puts the entire *row* in each group.

Relates to #134792

Co-authored-by: Craig Taverner <craig@amanzi.com>
Nik Everett 1 hafta önce
ebeveyn
işleme
f158400b17

+ 12 - 6
docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/multi-mv-group.md

@@ -1,12 +1,18 @@
 % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
 
 ```esql
-ROW i=1, a=["a", "b"], b=[2, 3] | STATS MIN(i) BY a, b | SORT a ASC, b ASC
+ROW price = 10, color = ["blue", "pink", "yellow"], size = ["s", "m", "l"]
+| STATS SUM(price) BY color, size
 ```
 
-| MIN(i):integer | a:keyword | b:integer |
+| SUM(price):long | color:keyword | size:keyword |
 | --- | --- | --- |
-| 1 | a | 2 |
-| 1 | a | 3 |
-| 1 | b | 2 |
-| 1 | b | 3 |
+| 10 | blue | l |
+| 10 | blue | m |
+| 10 | blue | s |
+| 10 | pink | l |
+| 10 | pink | m |
+| 10 | pink | s |
+| 10 | yellow | l |
+| 10 | yellow | m |
+| 10 | yellow | s |

+ 13 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/mv-group-values-expand.md

@@ -0,0 +1,13 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+```esql
+ROW color = ["blue", "pink", "yellow"]
+| MV_EXPAND color
+| STATS VALUES(color) BY color
+```
+
+| VALUES(color):keyword | color:keyword |
+| --- | --- |
+| blue | blue |
+| pink | pink |
+| yellow | yellow |

+ 12 - 0
docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/mv-group-values.md

@@ -0,0 +1,12 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+```esql
+ROW color = ["blue", "pink", "yellow"]
+| STATS VALUES(color) BY color
+```
+
+| VALUES(color):keyword | color:keyword |
+| --- | --- |
+| [blue, pink, yellow] | blue |
+| [blue, pink, yellow] | pink |
+| [blue, pink, yellow] | yellow |

+ 6 - 4
docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/mv-group.md

@@ -1,10 +1,12 @@
 % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
 
 ```esql
-ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC
+ROW price = 10, color = ["blue", "pink", "yellow"]
+| STATS SUM(price) BY color
 ```
 
-| MIN(i):integer | a:keyword |
+| SUM(price):long | color:keyword |
 | --- | --- |
-| 1 | a |
-| 1 | b |
+| 10 | blue |
+| 10 | pink |
+| 10 | yellow |

+ 13 - 0
docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md

@@ -110,11 +110,24 @@ It’s also possible to group by multiple values:
 
 :::{include} ../examples/stats.csv-spec/statsGroupByMultipleValues.md
 :::
+
 If all the grouping keys are multivalued then the input row is in all groups:
 
 :::{include} ../examples/stats.csv-spec/multi-mv-group.md
 :::
 
+The input **ROW** is in all groups. The entire row. All the values. Even group
+keys. That means that:
+
+:::{include} ../examples/stats.csv-spec/mv-group-values.md
+:::
+
+The `VALUES` function above sees the whole row - all of the values of the group
+key. If you want to send the group key to the function then `MV_EXPAND` first:
+
+:::{include} ../examples/stats.csv-spec/mv-group-values-expand.md
+:::
+
 Both the aggregating functions and the grouping expressions accept other
 functions. This is useful for using `STATS` on multivalue columns.
 For example, to calculate the average salary change, you can use `MV_AVG` to

+ 71 - 28
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

@@ -2276,34 +2276,6 @@ M
 null
 ;
 
-docsStatsMvGroup
-// tag::mv-group[]
-ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC
-// end::mv-group[]
-;
-
-// tag::mv-group-result[]
-MIN(i):integer | a:keyword
-             1 | a
-             1 | b
-// end::mv-group-result[]
-;
-
-docsStatsMultiMvGroup
-// tag::multi-mv-group[]
-ROW i=1, a=["a", "b"], b=[2, 3] | STATS MIN(i) BY a, b | SORT a ASC, b ASC
-// end::multi-mv-group[]
-;
-
-// tag::multi-mv-group-result[]
-MIN(i):integer | a:keyword | b:integer
-             1 | a         | 2
-             1 | a         | 3
-             1 | b         | 2
-             1 | b         | 3
-// end::multi-mv-group-result[]
-;
-
 statsByConstant#[skip:-8.14.1,reason:implemented in 8.14]
 from employees
 | stats m = max(salary), a = round(avg(salary)) by 0
@@ -3301,3 +3273,74 @@ FROM employees
 min1:integer | min2:integer   | max1:integer | max2:integer
 10011        | [10011, 10012] | 10079        | [10079, 10078]
 ;
+
+sumRowMany
+// tag::mv-group[]
+ROW price = 10, color = ["blue", "pink", "yellow"]
+| STATS SUM(price) BY color
+// end::mv-group[]
+| SORT color ASC
+;
+
+// tag::mv-group-result[]
+SUM(price):long | color:keyword
+             10 | blue
+             10 | pink
+             10 | yellow
+// end::mv-group-result[]
+;
+
+sumRowManyTwo
+// tag::multi-mv-group[]
+ROW price = 10, color = ["blue", "pink", "yellow"], size = ["s", "m", "l"]
+| STATS SUM(price) BY color, size
+// end::multi-mv-group[]
+| SORT color ASC, size ASC
+;
+
+// tag::multi-mv-group-result[]
+SUM(price):long | color:keyword | size:keyword
+             10 | blue          | l
+             10 | blue          | m
+             10 | blue          | s
+             10 | pink          | l
+             10 | pink          | m
+             10 | pink          | s
+             10 | yellow        | l
+             10 | yellow        | m
+             10 | yellow        | s
+// end::multi-mv-group-result[]
+;
+
+valuesRowMany
+// tag::mv-group-values[]
+ROW color = ["blue", "pink", "yellow"]
+| STATS VALUES(color) BY color
+// end::mv-group-values[]
+| SORT color ASC
+;
+
+// tag::mv-group-values-result[]
+VALUES(color):keyword | color:keyword
+ [blue, pink, yellow] | blue
+ [blue, pink, yellow] | pink
+ [blue, pink, yellow] | yellow
+// end::mv-group-values-result[]
+;
+
+valuesRowManyExpand
+// tag::mv-group-values-expand[]
+ROW color = ["blue", "pink", "yellow"]
+| MV_EXPAND color
+| STATS VALUES(color) BY color
+// end::mv-group-values-expand[]
+| SORT color ASC
+;
+
+// tag::mv-group-values-expand-result[]
+VALUES(color):keyword | color:keyword
+                 blue | blue
+                 pink | pink
+               yellow | yellow
+// end::mv-group-values-expand-result[]
+;