소스 검색

ESQL: Fix missing refs due to pruning renamed grouping columns (#107328)

Sometimes, CombineProjections does not correctly update an aggregation's groupings when combining with a preceding projection.
Fix this by resolving any aliases used in the groupings and de-duplicating them.

---------

Co-authored-by: Andrei Stefan <astefan@users.noreply.github.com>
Alexander Spies 1 년 전
부모
커밋
adaa4763f3

+ 7 - 0
docs/changelog/107328.yaml

@@ -0,0 +1,7 @@
+pr: 107328
+summary: "ESQL: Fix missing refs due to pruning renamed grouping columns"
+area: ES|QL
+type: bug
+issues:
+ - 107083
+ - 107166

+ 17 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

@@ -1586,6 +1586,23 @@ c:l | k1:i | languages:i
 10  | null | null
 ;
 
+evalMultipleOverridingKeysWithAggregateExpr#[skip:-8.13.99,reason:supported in 8.14]
+FROM employees
+| EVAL k = languages, k1 = k
+| STATS c = 3*COUNT() BY languages, k, k1, languages
+| DROP k
+| SORT languages
+;
+
+c:l | k1:i | languages:i
+45  | 1    | 1
+57  | 2    | 2
+51  | 3    | 3
+54  | 4    | 4
+63  | 5    | 5
+30  | null | null
+;
+
 minWithSortExpression1#[skip:-8.13.99,reason:supported in 8.14]
 FROM employees | STATS min = min(salary) by languages | SORT min + languages;
 

+ 37 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

@@ -389,6 +389,7 @@ public class LogicalPlanOptimizer extends ParameterizedRuleExecutor<LogicalPlan,
         }
 
         @Override
+        @SuppressWarnings("unchecked")
         protected LogicalPlan rule(UnaryPlan plan) {
             LogicalPlan child = plan.child();
 
@@ -419,7 +420,22 @@ public class LogicalPlanOptimizer extends ParameterizedRuleExecutor<LogicalPlan,
             // Agg with underlying Project (group by on sub-queries)
             if (plan instanceof Aggregate a) {
                 if (child instanceof Project p) {
-                    plan = new Aggregate(a.source(), p.child(), a.groupings(), combineProjections(a.aggregates(), p.projections()));
+                    var groupings = a.groupings();
+                    List<Attribute> groupingAttrs = new ArrayList<>(a.groupings().size());
+                    for (Expression grouping : groupings) {
+                        if (grouping instanceof Attribute attribute) {
+                            groupingAttrs.add(attribute);
+                        } else {
+                            // After applying ReplaceStatsNestedExpressionWithEval, groupings can only contain attributes.
+                            throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping);
+                        }
+                    }
+                    plan = new Aggregate(
+                        a.source(),
+                        p.child(),
+                        combineUpperGroupingsAndLowerProjections(groupingAttrs, p.projections()),
+                        combineProjections(a.aggregates(), p.projections())
+                    );
                 }
             }
 
@@ -482,6 +498,26 @@ public class LogicalPlanOptimizer extends ParameterizedRuleExecutor<LogicalPlan,
             return replaced;
         }
 
+        private static List<Expression> combineUpperGroupingsAndLowerProjections(
+            List<? extends Attribute> upperGroupings,
+            List<? extends NamedExpression> lowerProjections
+        ) {
+            // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
+            AttributeMap<Attribute> aliases = new AttributeMap<>();
+            for (NamedExpression ne : lowerProjections) {
+                // Projections are just aliases for attributes, so casting is safe.
+                aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
+            }
+
+            // Replace any matching attribute directly with the aliased attribute from the projection.
+            AttributeSet replaced = new AttributeSet();
+            for (Attribute attr : upperGroupings) {
+                // All substitutions happen before; groupings must be attributes at this point.
+                replaced.add(aliases.resolve(attr, attr));
+            }
+            return new ArrayList<>(replaced);
+        }
+
         /**
          * Replace grouping alias previously contained in the aggregations that might have been projected away.
          */

+ 42 - 17
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

@@ -417,8 +417,8 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
     /**
      * Expects
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[last_name{f}#23, first_name{f}#20, k{r}#4],[SUM(salary{f}#24) AS s, last_name{f}#23, first_name{f}#20, first_n
-     * ame{f}#20 AS k]]
+     * \_Aggregate[[last_name{f}#23, first_name{f}#20],[SUM(salary{f}#24) AS s, last_name{f}#23, first_name{f}#20, first_name{f}#2
+     * 0 AS k]]
      *   \_EsRelation[test][_meta_field{f}#25, emp_no{f}#19, first_name{f}#20, ..]
      */
     public void testCombineProjectionWithAggregationAndEval() {
@@ -432,7 +432,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         var limit = as(plan, Limit.class);
         var agg = as(limit.child(), Aggregate.class);
         assertThat(Expressions.names(agg.aggregates()), contains("s", "last_name", "first_name", "k"));
-        assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name", "k"));
+        assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name"));
     }
 
     /**
@@ -552,6 +552,12 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         assertThat(condition.list(), equalTo(List.of(new Literal(EMPTY, 1, INTEGER), new Literal(EMPTY, 2, INTEGER))));
     }
 
+    /**
+     * Expects
+     * Limit[1000[INTEGER]]
+     * \_Aggregate[[first_name{f}#12],[COUNT(salary{f}#16) AS count(salary), first_name{f}#12 AS x]]
+     *   \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
+     */
     public void testCombineProjectionWithPruning() {
         var plan = plan("""
             from test
@@ -563,19 +569,17 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         var limit = as(plan, Limit.class);
         var agg = as(limit.child(), Aggregate.class);
         assertThat(Expressions.names(agg.aggregates()), contains("count(salary)", "x"));
-        assertThat(Expressions.names(agg.groupings()), contains("x"));
+        assertThat(Expressions.names(agg.groupings()), contains("first_name"));
         var alias = as(agg.aggregates().get(1), Alias.class);
         var field = as(alias.child(), FieldAttribute.class);
         assertThat(field.name(), is("first_name"));
-        var group = as(agg.groupings().get(0), Attribute.class);
-        assertThat(group, is(alias.toAttribute()));
         var from = as(agg.child(), EsRelation.class);
     }
 
     /**
      * Expects
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[f{r}#7],[SUM(emp_no{f}#15) AS s, COUNT(first_name{f}#16) AS c, first_name{f}#16 AS f]]
+     * \_Aggregate[[first_name{f}#16],[SUM(emp_no{f}#15) AS s, COUNT(first_name{f}#16) AS c, first_name{f}#16 AS f]]
      *   \_EsRelation[test][_meta_field{f}#21, emp_no{f}#15, first_name{f}#16, ..]
      */
     public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg() {
@@ -599,13 +603,13 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         as = as(aggs.get(2), Alias.class);
         assertThat(Expressions.name(as.child()), is("first_name"));
 
-        assertThat(Expressions.names(agg.groupings()), contains("f"));
+        assertThat(Expressions.names(agg.groupings()), contains("first_name"));
     }
 
     /**
      * Expects
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[f{r}#7],[SUM(emp_no{f}#15) AS s, first_name{f}#16 AS f]]
+     * \_Aggregate[[first_name{f}#16],[SUM(emp_no{f}#15) AS s, first_name{f}#16 AS f]]
      *   \_EsRelation[test][_meta_field{f}#21, emp_no{f}#15, first_name{f}#16, ..]
      */
     public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUnused() {
@@ -625,7 +629,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         as = as(aggs.get(1), Alias.class);
         assertThat(Expressions.name(as.child()), is("first_name"));
 
-        assertThat(Expressions.names(agg.groupings()), contains("f"));
+        assertThat(Expressions.names(agg.groupings()), contains("first_name"));
     }
 
     /**
@@ -2786,6 +2790,27 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         var source = as(agg.child(), EsRelation.class);
     }
 
+    /**
+     * Expects
+     * Limit[1000[INTEGER]]
+     * \_Aggregate[[salary{f}#12],[salary{f}#12, salary{f}#12 AS x]]
+     *   \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..]
+     */
+    public void testEliminateDuplicateRenamedGroupings() {
+        var plan = plan("""
+            from test
+            | eval x = salary
+            | stats by salary, x
+            """);
+
+        var limit = as(plan, Limit.class);
+        var agg = as(limit.child(), Aggregate.class);
+        var relation = as(agg.child(), EsRelation.class);
+
+        assertThat(Expressions.names(agg.groupings()), contains("salary"));
+        assertThat(Expressions.names(agg.aggregates()), contains("salary", "x"));
+    }
+
     /**
      * Expected
      * Limit[2[INTEGER]]
@@ -2832,7 +2857,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
     /**
      * Expected
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[a{r}#2, bar{r}#8],[COUNT([2a][KEYWORD]) AS baz, b{r}#4 AS bar]]
+     * \_Aggregate[[a{r}#3, b{r}#5],[COUNT([2a][KEYWORD]) AS baz, b{r}#5 AS bar]]
      *   \_Row[[1[INTEGER] AS a, 2[INTEGER] AS b]]
      */
     public void testMultipleRenameStatsDropGroup() {
@@ -2844,15 +2869,15 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
 
         var limit = as(plan, Limit.class);
         var agg = as(limit.child(), Aggregate.class);
-        assertThat(Expressions.names(agg.groupings()), contains("a", "bar"));
+        assertThat(Expressions.names(agg.groupings()), contains("a", "b"));
         var row = as(agg.child(), Row.class);
     }
 
     /**
      * Expected
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[emp_no{f}#11, bar{r}#4],[MAX(salary{f}#16) AS baz, gender{f}#13 AS bar]]
-     *   \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
+     * \_Aggregate[[emp_no{f}#14, gender{f}#16],[MAX(salary{f}#19) AS baz, gender{f}#16 AS bar]]
+     *   \_EsRelation[test][_meta_field{f}#20, emp_no{f}#14, first_name{f}#15, ..]
      */
     public void testMultipleRenameStatsDropGroupMultirow() {
         LogicalPlan plan = optimizedPlan("""
@@ -2863,7 +2888,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
 
         var limit = as(plan, Limit.class);
         var agg = as(limit.child(), Aggregate.class);
-        assertThat(Expressions.names(agg.groupings()), contains("emp_no", "bar"));
+        assertThat(Expressions.names(agg.groupings()), contains("emp_no", "gender"));
         var row = as(agg.child(), EsRelation.class);
     }
 
@@ -2944,7 +2969,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
     /**
      * Expects
      * Limit[1000[INTEGER]]
-     * \_Aggregate[[x{r}#4],[SUM(salary{f}#13) AS sum(salary), salary{f}#13 AS x]]
+     * \_Aggregate[[salary{f}#13],[SUM(salary{f}#13) AS sum(salary), salary{f}#13 AS x]]
      *   \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..]
      */
     public void testIsNotNullConstraintForStatsWithAndOnGroupingAlias() {
@@ -2956,7 +2981,7 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
 
         var limit = as(plan, Limit.class);
         var agg = as(limit.child(), Aggregate.class);
-        assertThat(Expressions.names(agg.groupings()), contains("x"));
+        assertThat(Expressions.names(agg.groupings()), contains("salary"));
         assertThat(Expressions.names(agg.aggregates()), contains("sum(salary)", "x"));
         var from = as(agg.child(), EsRelation.class);
     }