فهرست منبع

SQL: Verify GROUP BY ordering on grouped columns (#30585)

Due to the way composite aggregation works, ordering in GROUP BY can be
applied only through grouped columns which now the analyzer verifier
enforces.

Fix 29900
Costin Leau 7 سال پیش
والد
کامیت
09329eb84f

+ 20 - 3
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

@@ -211,12 +211,13 @@ abstract class Verifier {
 
     /**
      * Check validity of Aggregate/GroupBy.
-     * This rule is needed for two reasons:
+     * This rule is needed for multiple reasons:
      * 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
      * 2. the order/having might contain a non-grouped attribute. This is typically
      * caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
      * (because the expression gets resolved little by little without being pushed down,
      * without the Analyzer modifying anything.
+     * 3. composite agg (used for GROUP BY) allows ordering only on the group keys
      */
     private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
             Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
@@ -225,7 +226,7 @@ abstract class Verifier {
                 && checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
     }
 
-    // check whether an orderBy failed
+    // check whether an orderBy failed or if it occurs on a non-key
     private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailures,
             Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
         if (p instanceof OrderBy) {
@@ -234,7 +235,23 @@ abstract class Verifier {
                 Aggregate a = (Aggregate) o.child();
 
                 Map<Expression, Node<?>> missing = new LinkedHashMap<>();
-                o.order().forEach(oe -> oe.collectFirstChildren(c -> checkGroupMatch(c, oe, a.groupings(), missing, functions)));
+                o.order().forEach(oe -> {
+                    Expression e = oe.child();
+                    // cannot order by aggregates (not supported by composite)
+                    if (Functions.isAggregate(e)) {
+                        missing.put(e, oe);
+                        return;
+                    }
+
+                    // make sure to compare attributes directly
+                    if (Expressions.anyMatch(a.groupings(), 
+                            g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
+                        return;
+                    }
+
+                    // nothing matched, cannot group by it
+                    missing.put(e, oe);
+                });
 
                 if (!missing.isEmpty()) {
                     String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;

+ 17 - 2
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java

@@ -111,7 +111,7 @@ public class VerifierErrorMessagesTests extends ESTestCase {
     }
 
     public void testGroupByOrderByScalarOverNonGrouped() {
-        assertEquals("1:50: Cannot order by non-grouped column [date], expected [text]",
+        assertEquals("1:50: Cannot order by non-grouped column [YEAR(date [UTC])], expected [text]",
                 verify("SELECT MAX(int) FROM test GROUP BY text ORDER BY YEAR(date)"));
     }
 
@@ -144,4 +144,19 @@ public class VerifierErrorMessagesTests extends ESTestCase {
         assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
                 verify("SELECT unsupported FROM test"));
     }
-}
+
+    public void testGroupByOrderByNonKey() {
+        assertEquals("1:52: Cannot order by non-grouped column [a], expected [bool]",
+                verify("SELECT AVG(int) a FROM test GROUP BY bool ORDER BY a"));
+    }
+
+    public void testGroupByOrderByFunctionOverKey() {
+        assertEquals("1:44: Cannot order by non-grouped column [MAX(int)], expected [int]",
+                verify("SELECT int FROM test GROUP BY int ORDER BY MAX(int)"));
+    }
+
+    public void testGroupByOrderByScore() {
+        assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
+                verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
+    }
+}

+ 14 - 0
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierErrorMessagesTests.java

@@ -49,4 +49,18 @@ public class VerifierErrorMessagesTests extends ESTestCase {
         assertEquals("1:32: Currently, only a single expression can be used with GROUP BY; please select one of [bool, keyword]",
                 verify("SELECT bool FROM test GROUP BY bool, keyword"));
     }
+
+    //
+    // TODO potential improvements
+    //
+    // regarding resolution
+    //    public void testGroupByOrderByKeyAlias() {
+    //        assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
+    //                verify("SELECT int i FROM test GROUP BY int ORDER BY i"));
+    //    }
+    //
+    //    public void testGroupByAlias() {
+    //        assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
+    //                verify("SELECT int i FROM test GROUP BY i ORDER BY int"));
+    //    }
 }