Selaa lähdekoodia

SQL: fix NPE on ambiguous GROUP BY (#59370)

* fix npe on ambiguous group by

* add tests for aggregates and group by, add quotes to error message

* add more cases for Group By ambiguity test

* change error messages for field ambiguity

* change collection aliases approach

* add locations of attributes for ambiguous grouping error

* Adress review comments

- remove Comparable implementations from Attribute and Location;
- add ad-hoc comparator for sorting locations in ambiguity message;
- remove added AttributeAlias class with Touple;
- add code comment to explain issue with Location overwriting.

* Fix c&p error in location ref generation comparator

Fix copy&paste error in dedicated comparator used for sorting ambiguity
location references.
Slightly increase its readability.

Co-authored-by: Nikita Verkhovin <verkhovin13@gmail.com>
Bogdan Pintea 5 vuotta sitten
vanhempi
commit
9ba70a3483

+ 6 - 6
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/Expressions.java

@@ -5,6 +5,7 @@
  */
 package org.elasticsearch.xpack.ql.expression;
 
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
 import org.elasticsearch.xpack.ql.expression.function.Function;
 import org.elasticsearch.xpack.ql.expression.gen.pipeline.AttributeInput;
@@ -14,10 +15,8 @@ import org.elasticsearch.xpack.ql.type.DataTypes;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 import java.util.function.Predicate;
 
@@ -164,14 +163,15 @@ public final class Expressions {
         return true;
     }
 
-    public static AttributeMap<Expression> aliases(List<? extends NamedExpression> named) {
-        Map<Attribute, Expression> aliasMap = new LinkedHashMap<>();
+    public static List<Tuple<Attribute, Expression>> aliases(List<? extends NamedExpression> named) {
+        // an alias of same name and data type can be reused (by mistake): need to use a list to collect all refs (and later report them)
+        List<Tuple<Attribute, Expression>> aliases = new ArrayList<>();
         for (NamedExpression ne : named) {
             if (ne instanceof Alias) {
-                aliasMap.put(ne.toAttribute(), ((Alias) ne).child());
+                aliases.add(new Tuple<>(ne.toAttribute(), ((Alias) ne).child()));
             }
         }
-        return new AttributeMap<>(aliasMap);
+        return aliases;
     }
 
     public static boolean hasReferenceAttribute(Collection<Attribute> output) {

+ 30 - 16
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java

@@ -5,12 +5,12 @@
  */
 package org.elasticsearch.xpack.sql.analysis.analyzer;
 
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.logging.LoggerMessageFormat;
 import org.elasticsearch.xpack.ql.capabilities.Resolvables;
 import org.elasticsearch.xpack.ql.common.Failure;
 import org.elasticsearch.xpack.ql.expression.Alias;
 import org.elasticsearch.xpack.ql.expression.Attribute;
-import org.elasticsearch.xpack.ql.expression.AttributeMap;
 import org.elasticsearch.xpack.ql.expression.AttributeSet;
 import org.elasticsearch.xpack.ql.expression.Expression;
 import org.elasticsearch.xpack.ql.expression.Expressions;
@@ -181,7 +181,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
                              // but also if the qualifier might not be quoted and if there's any ambiguity with nested fields
                              || Objects.equals(u.name(), attribute.qualifiedName()));
                 if (match) {
-                    matches.add(attribute.withLocation(u.source()));
+                    matches.add(attribute);
                 }
             }
         }
@@ -192,16 +192,21 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
         }
 
         if (matches.size() == 1) {
-            return handleSpecialFields(u, matches.get(0), allowCompound);
+            // only add the location if the match is univocal; b/c otherwise adding the location will overwrite any preexisting one
+            return handleSpecialFields(u, matches.get(0).withLocation(u.source()), allowCompound);
         }
 
-        return u.withUnresolvedMessage("Reference [" + u.qualifiedName()
-                + "] is ambiguous (to disambiguate use quotes or qualifiers); matches any of " +
-                 matches.stream()
-                 .map(a -> "\"" + a.qualifier() + "\".\"" + a.name() + "\"")
-                 .sorted()
-                 .collect(toList())
-                );
+        List<String> refs = matches.stream()
+            .sorted((a, b) -> {
+                int lineDiff = a.sourceLocation().getLineNumber() - b.sourceLocation().getLineNumber();
+                int colDiff = a.sourceLocation().getColumnNumber() - b.sourceLocation().getColumnNumber();
+                return lineDiff != 0 ? lineDiff : (colDiff != 0 ? colDiff : a.qualifiedName().compareTo(b.qualifiedName()));
+            })
+            .map(a -> "line " + a.sourceLocation().toString().substring(1) + " [" +
+                (a.qualifier() != null ? "\"" + a.qualifier() + "\".\"" + a.name() + "\"" : a.name()) + "]")
+            .collect(toList());
+        return u.withUnresolvedMessage("Reference [" + u.qualifiedName() + "] is ambiguous (to disambiguate use quotes or qualifiers); " +
+            "matches any of " + refs);
     }
 
     private static Attribute handleSpecialFields(UnresolvedAttribute u, Attribute named, boolean allowCompound) {
@@ -327,22 +332,31 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
                     return new Aggregate(a.source(), a.child(), a.groupings(),
                             expandProjections(a.aggregates(), a.child()));
                 }
-                // if the grouping is unresolved but the aggs are, use the former to resolve the latter
-                // solves the case of queries declaring an alias in SELECT and referring to it in GROUP BY
+                // if the grouping is unresolved but the aggs are, use the latter to resolve the former.
+                // solves the case of queries declaring an alias in SELECT and referring to it in GROUP BY.
                 // e.g. SELECT x AS a ... GROUP BY a
                 if (!a.expressionsResolved() && Resolvables.resolved(a.aggregates())) {
                     List<Expression> groupings = a.groupings();
                     List<Expression> newGroupings = new ArrayList<>();
-                    AttributeMap<Expression> resolved = Expressions.aliases(a.aggregates());
+                    List<Tuple<Attribute, Expression>> resolvedAliases = Expressions.aliases(a.aggregates());
 
                     boolean changed = false;
                     for (Expression grouping : groupings) {
                         if (grouping instanceof UnresolvedAttribute) {
-                            Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping, resolved.keySet());
+                            Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping,
+                                resolvedAliases.stream().map(Tuple::v1).collect(toList()));
                             if (maybeResolved != null) {
                                 changed = true;
-                                // use the matched expression (not its attribute)
-                                grouping = resolved.get(maybeResolved);
+                                if (maybeResolved.resolved()) {
+                                    grouping = resolvedAliases.stream()
+                                        .filter(t -> t.v1().equals(maybeResolved))
+                                        // use the matched expression (not its attribute)
+                                        .map(Tuple::v2)
+                                        .findAny()
+                                        .get(); // there should always be exactly one match
+                                } else {
+                                    grouping = maybeResolved;
+                                }
                             }
                         }
                         newGroupings.add(grouping);

+ 78 - 2
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java

@@ -5,15 +5,19 @@
  */
 package org.elasticsearch.xpack.sql.analysis.analyzer;
 
+import java.util.stream.Collectors;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
+import org.elasticsearch.xpack.ql.expression.Alias;
 import org.elasticsearch.xpack.ql.expression.Attribute;
+import org.elasticsearch.xpack.ql.expression.Expression;
 import org.elasticsearch.xpack.ql.expression.Expressions;
 import org.elasticsearch.xpack.ql.expression.FieldAttribute;
 import org.elasticsearch.xpack.ql.expression.NamedExpression;
 import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry;
 import org.elasticsearch.xpack.ql.index.EsIndex;
 import org.elasticsearch.xpack.ql.index.IndexResolution;
+import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
 import org.elasticsearch.xpack.ql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.ql.plan.logical.Project;
 import org.elasticsearch.xpack.ql.type.EsField;
@@ -31,6 +35,7 @@ import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
 import static org.elasticsearch.xpack.ql.type.DataTypes.TEXT;
 import static org.elasticsearch.xpack.sql.types.SqlTypesTests.loadMapping;
 import static org.hamcrest.CoreMatchers.instanceOf;
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.hasItem;
 import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.hasSize;
@@ -178,13 +183,13 @@ public class FieldAttributeTests extends ESTestCase {
         VerificationException ex = expectThrows(VerificationException.class, () -> plan("SELECT test.bar FROM test"));
         assertEquals(
                 "Found 1 problem\nline 1:8: Reference [test.bar] is ambiguous (to disambiguate use quotes or qualifiers); "
-                        + "matches any of [\"test\".\"bar\", \"test\".\"test.bar\"]",
+                        + "matches any of [line 1:22 [\"test\".\"bar\"], line 1:22 [\"test\".\"test.bar\"]]",
                 ex.getMessage());
 
         ex = expectThrows(VerificationException.class, () -> plan("SELECT test.test FROM test"));
         assertEquals(
                 "Found 1 problem\nline 1:8: Reference [test.test] is ambiguous (to disambiguate use quotes or qualifiers); "
-                        + "matches any of [\"test\".\"test\", \"test\".\"test.test\"]",
+                        + "matches any of [line 1:23 [\"test\".\"test\"], line 1:23 [\"test\".\"test.test\"]]",
                 ex.getMessage());
 
         LogicalPlan plan = plan("SELECT test.test FROM test AS x");
@@ -201,4 +206,75 @@ public class FieldAttributeTests extends ESTestCase {
         assertThat(attribute.qualifier(), is("test"));
         assertThat(attribute.name(), is("test.test"));
     }
+
+    public void testAggregations() {
+        Map<String, EsField> mapping = TypesTests.loadMapping("mapping-basic.json");
+        EsIndex index = new EsIndex("test", mapping);
+        getIndexResult = IndexResolution.valid(index);
+        analyzer = new Analyzer(SqlTestUtils.TEST_CFG, functionRegistry, getIndexResult, verifier);
+
+        LogicalPlan plan = plan("SELECT sum(salary) AS s FROM test");
+        assertThat(plan, instanceOf(Aggregate.class));
+
+        Aggregate aggregate = (Aggregate) plan;
+        assertThat(aggregate.aggregates(), hasSize(1));
+        NamedExpression attribute = aggregate.aggregates().get(0);
+        assertThat(attribute, instanceOf(Alias.class));
+        assertThat(attribute.name(), is("s"));
+        assertThat(aggregate.groupings(), hasSize(0));
+
+        plan = plan("SELECT gender AS g, sum(salary) AS s FROM test GROUP BY g");
+        assertThat(plan, instanceOf(Aggregate.class));
+
+        aggregate = (Aggregate) plan;
+        List<? extends NamedExpression> aggregates = aggregate.aggregates();
+        assertThat(aggregates, hasSize(2));
+        assertThat(aggregates.get(0), instanceOf(Alias.class));
+        assertThat(aggregates.get(1), instanceOf(Alias.class));
+        List<String> names = aggregate.aggregates().stream().map(NamedExpression::name).collect(Collectors.toList());
+        assertThat(names, contains("g", "s"));
+
+        List<Expression> groupings = aggregate.groupings();
+        assertThat(groupings, hasSize(1));
+        FieldAttribute grouping = (FieldAttribute) groupings.get(0);
+        assertThat(grouping.name(), is("gender"));
+    }
+
+    public void testGroupByAmbiguity() {
+        Map<String, EsField> mapping = TypesTests.loadMapping("mapping-basic.json");
+        EsIndex index = new EsIndex("test", mapping);
+        getIndexResult = IndexResolution.valid(index);
+        analyzer = new Analyzer(SqlTestUtils.TEST_CFG, functionRegistry, getIndexResult, verifier);
+
+        VerificationException ex = expectThrows(VerificationException.class,
+            () -> plan("SELECT gender AS g, sum(salary) AS g FROM test GROUP BY g"));
+        assertEquals(
+            "Found 1 problem\nline 1:57: Reference [g] is ambiguous (to disambiguate use quotes or qualifiers); " +
+                "matches any of [line 1:8 [g], line 1:21 [g]]",
+            ex.getMessage());
+
+        ex = expectThrows(VerificationException.class,
+            () -> plan("SELECT gender AS g, max(salary) AS g, min(salary) AS g FROM test GROUP BY g"));
+        assertEquals(
+            "Found 1 problem\nline 1:75: Reference [g] is ambiguous (to disambiguate use quotes or qualifiers); " +
+                "matches any of [line 1:8 [g], line 1:21 [g], line 1:39 [g]]",
+            ex.getMessage());
+
+        ex = expectThrows(VerificationException.class,
+            () -> plan("SELECT gender AS g, last_name AS g, sum(salary) AS s FROM test GROUP BY g"));
+        assertEquals(
+            "Found 1 problem\nline 1:73: Reference [g] is ambiguous (to disambiguate use quotes or qualifiers); " +
+                "matches any of [line 1:8 [g], line 1:21 [g]]",
+            ex.getMessage());
+
+        ex = expectThrows(VerificationException.class,
+            () -> plan("SELECT gender AS g, last_name AS g, min(salary) AS m, max(salary) as m FROM test GROUP BY g, m"));
+        assertEquals(
+            "Found 2 problems\n" +
+                "line 1:91: Reference [g] is ambiguous (to disambiguate use quotes or qualifiers); "
+                + "matches any of [line 1:8 [g], line 1:21 [g]]\n" +
+                "line 1:94: Reference [m] is ambiguous (to disambiguate use quotes or qualifiers); "
+                + "matches any of [line 1:37 [m], line 1:55 [m]]",
+            ex.getMessage());
+    }
 }