Browse Source

Propagate foldable Eval fields (#98702)

Constants declared in Eval should be propagated during planning instead
 at runtime. Consider the query
   eval a = 1
 | where a > 1

The current PR improves the optimizer to determine this scenario by
 folding a during planning and propagating its result.

Add additional block handling methods
Costin Leau 2 years ago
parent
commit
2cacd33ec5

+ 28 - 78
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java

@@ -8,7 +8,6 @@
 package org.elasticsearch.compute.data;
 
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.lucene.BytesRefs;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -16,7 +15,7 @@ import java.util.List;
 import java.util.function.Consumer;
 
 import static org.elasticsearch.common.lucene.BytesRefs.toBytesRef;
-import static org.elasticsearch.compute.data.Block.constantNullBlock;
+import static org.elasticsearch.compute.data.ElementType.fromJava;
 
 public final class BlockUtils {
 
@@ -43,6 +42,10 @@ public final class BlockUtils {
                 append.accept(o);
             };
         }
+
+        public void accept(Object object) {
+            append.accept(object);
+        }
     }
 
     public static Block[] fromArrayRow(Object... row) {
@@ -62,27 +65,15 @@ public final class BlockUtils {
         Block[] blocks = new Block[size];
         for (int i = 0; i < size; i++) {
             Object object = row.get(i);
-            if (object instanceof Integer intVal) {
-                blocks[i] = IntBlock.newConstantBlockWith(intVal, blockSize);
-            } else if (object instanceof Long longVal) {
-                blocks[i] = LongBlock.newConstantBlockWith(longVal, blockSize);
-            } else if (object instanceof Double doubleVal) {
-                blocks[i] = DoubleBlock.newConstantBlockWith(doubleVal, blockSize);
-            } else if (object instanceof BytesRef bytesRefVal) {
-                blocks[i] = BytesRefBlock.newConstantBlockWith(bytesRefVal, blockSize);
-            } else if (object instanceof Boolean booleanVal) {
-                blocks[i] = BooleanBlock.newConstantBlockWith(booleanVal, blockSize);
-            } else if (object instanceof List<?> listVal) {
-                BuilderWrapper wrapper = wrapperFor(listVal.get(0).getClass(), 1);
-                wrapper.append.accept(listVal);
+            if (object instanceof List<?> listVal) {
+                BuilderWrapper wrapper = wrapperFor(fromJava(listVal.get(0).getClass()), blockSize);
+                wrapper.accept(listVal);
                 if (isAscending(listVal)) {
                     wrapper.builder.mvOrdering(Block.MvOrdering.ASCENDING);
                 }
                 blocks[i] = wrapper.builder.build();
-            } else if (object == null) {
-                blocks[i] = constantNullBlock(blockSize);
             } else {
-                throw new UnsupportedOperationException("can't make a block out of [" + object + "/" + object.getClass() + "]");
+                blocks[i] = constantBlock(object, blockSize);
             }
         }
         return blocks;
@@ -121,7 +112,7 @@ public final class BlockUtils {
         var wrappers = new BuilderWrapper[list.get(0).size()];
 
         for (int i = 0; i < wrappers.length; i++) {
-            wrappers[i] = wrapperFor(type(list, i), size);
+            wrappers[i] = wrapperFor(fromJava(type(list, i)), size);
         }
         for (List<Object> values : list) {
             for (int j = 0, vSize = values.size(); j < vSize; j++) {
@@ -149,65 +140,9 @@ public final class BlockUtils {
         return null;
     }
 
-    public static BuilderWrapper wrapperFor(Class<?> type, int size) {
-        BuilderWrapper builder;
-        if (type == Integer.class) {
-            var b = IntBlock.newBlockBuilder(size);
-            builder = new BuilderWrapper(b, o -> b.appendInt((int) o));
-        } else if (type == Long.class) {
-            var b = LongBlock.newBlockBuilder(size);
-            builder = new BuilderWrapper(b, o -> b.appendLong((long) o));
-        } else if (type == Double.class) {
-            var b = DoubleBlock.newBlockBuilder(size);
-            builder = new BuilderWrapper(b, o -> b.appendDouble((double) o));
-        } else if (type == BytesRef.class) {
-            var b = BytesRefBlock.newBlockBuilder(size);
-            builder = new BuilderWrapper(b, o -> b.appendBytesRef(BytesRefs.toBytesRef(o)));
-        } else if (type == Boolean.class) {
-            var b = BooleanBlock.newBlockBuilder(size);
-            builder = new BuilderWrapper(b, o -> b.appendBoolean((boolean) o));
-        } else if (type == null) {
-            var b = new Block.Builder() {
-                @Override
-                public Block.Builder appendNull() {
-                    return this;
-                }
-
-                @Override
-                public Block.Builder beginPositionEntry() {
-                    return this;
-                }
-
-                @Override
-                public Block.Builder endPositionEntry() {
-                    return this;
-                }
-
-                @Override
-                public Block.Builder copyFrom(Block block, int beginInclusive, int endExclusive) {
-                    return this;
-                }
-
-                @Override
-                public Block.Builder mvOrdering(Block.MvOrdering mvOrdering) {
-                    throw new UnsupportedOperationException();
-                }
-
-                @Override
-                public Block.Builder appendAllValuesToCurrentPosition(Block block) {
-                    throw new UnsupportedOperationException();
-                }
-
-                @Override
-                public Block build() {
-                    return constantNullBlock(size);
-                }
-            };
-            builder = new BuilderWrapper(b, o -> {});
-        } else {
-            throw new UnsupportedOperationException("Unrecognized type " + type);
-        }
-        return builder;
+    public static BuilderWrapper wrapperFor(ElementType type, int size) {
+        var b = type.newBlockBuilder(size);
+        return new BuilderWrapper(b, o -> appendValue(b, o, type));
     }
 
     public static void appendValue(Block.Builder builder, Object val, ElementType type) {
@@ -225,6 +160,21 @@ public final class BlockUtils {
         }
     }
 
+    public static Block constantBlock(Object val, int size) {
+        if (val == null) {
+            return Block.constantNullBlock(size);
+        }
+        var type = fromJava(val.getClass());
+        return switch (type) {
+            case LONG -> LongBlock.newConstantBlockWith((long) val, size);
+            case INT -> IntBlock.newConstantBlockWith((int) val, size);
+            case BYTES_REF -> BytesRefBlock.newConstantBlockWith(toBytesRef(val), size);
+            case DOUBLE -> DoubleBlock.newConstantBlockWith((double) val, size);
+            case BOOLEAN -> BooleanBlock.newConstantBlockWith((boolean) val, size);
+            default -> throw new UnsupportedOperationException("unsupported element type [" + type + "]");
+        };
+    }
+
     /**
      * Returned by {@link #toJavaObject} for "doc" type blocks.
      */

+ 22 - 0
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java

@@ -7,6 +7,8 @@
 
 package org.elasticsearch.compute.data;
 
+import org.apache.lucene.util.BytesRef;
+
 import java.util.function.IntFunction;
 
 /**
@@ -46,4 +48,24 @@ public enum ElementType {
     public Block.Builder newBlockBuilder(int estimatedSize) {
         return builder.apply(estimatedSize);
     }
+
+    public static ElementType fromJava(Class<?> type) {
+        ElementType elementType;
+        if (type == Integer.class) {
+            elementType = INT;
+        } else if (type == Long.class) {
+            elementType = LONG;
+        } else if (type == Double.class) {
+            elementType = DOUBLE;
+        } else if (type == String.class || type == BytesRef.class) {
+            elementType = BYTES_REF;
+        } else if (type == Boolean.class) {
+            elementType = BOOLEAN;
+        } else if (type == null || type == Void.class) {
+            elementType = NULL;
+        } else {
+            throw new IllegalArgumentException("Unrecognized class type " + type);
+        }
+        return elementType;
+    }
 }

+ 1 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java

@@ -117,7 +117,7 @@ public final class CsvTestUtils {
                             if (type == Type.NULL) {
                                 throw new IllegalArgumentException("Null type is not allowed in the test data; found " + entries[i]);
                             }
-                            columns[i] = new CsvColumn(name, type, BlockUtils.wrapperFor(type.clazz(), 8));
+                            columns[i] = new CsvColumn(name, type, BlockUtils.wrapperFor(ElementType.fromJava(type.clazz()), 8));
                         }
                     }
                     // data rows

+ 27 - 41
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java

@@ -7,28 +7,23 @@
 
 package org.elasticsearch.xpack.esql.evaluator;
 
-import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.compute.ann.Evaluator;
 import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockUtils;
 import org.elasticsearch.compute.data.BooleanArrayVector;
 import org.elasticsearch.compute.data.BooleanBlock;
 import org.elasticsearch.compute.data.BooleanVector;
-import org.elasticsearch.compute.data.BytesRefBlock;
-import org.elasticsearch.compute.data.DoubleBlock;
-import org.elasticsearch.compute.data.IntBlock;
-import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.ElementType;
 import org.elasticsearch.compute.data.Page;
 import org.elasticsearch.compute.data.Vector;
 import org.elasticsearch.compute.operator.EvalOperator;
 import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
-import org.elasticsearch.xpack.esql.EsqlUnsupportedOperationException;
 import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
 import org.elasticsearch.xpack.esql.evaluator.mapper.ExpressionMapper;
 import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.ComparisonMapper;
 import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.InMapper;
 import org.elasticsearch.xpack.esql.evaluator.predicate.operator.regex.RegexMapper;
 import org.elasticsearch.xpack.esql.planner.Layout;
-import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner;
 import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
 import org.elasticsearch.xpack.ql.expression.Attribute;
 import org.elasticsearch.xpack.ql.expression.Expression;
@@ -181,47 +176,38 @@ public final class EvalMapper {
                     return block.apply(page.getPositionCount());
                 }
             }
-            IntFunction<Block> block = block(lit);
-            return () -> new LiteralsEvaluator(block);
+            // wrap the closure to provide a nice toString (used by tests)
+            var blockClosure = new IntFunction<Block>() {
+                @Override
+                public Block apply(int value) {
+                    return block(lit).apply(value);
+                }
+
+                @Override
+                public String toString() {
+                    return lit.toString();
+                }
+            };
+            return () -> new LiteralsEvaluator(blockClosure);
         }
 
         private IntFunction<Block> block(Literal lit) {
-            if (lit.value() == null) {
+            var value = lit.value();
+            if (value == null) {
                 return Block::constantNullBlock;
             }
-            return switch (LocalExecutionPlanner.toElementType(lit.dataType())) {
-                case BOOLEAN -> {
-                    boolean v = (boolean) lit.value();
-                    yield positions -> BooleanBlock.newConstantBlockWith(v, positions);
-                }
-                case BYTES_REF -> {
-                    BytesRef v = (BytesRef) lit.value();
-                    yield positions -> BytesRefBlock.newConstantBlockWith(v, positions);
-                }
-                case DOUBLE -> new IntFunction<>() { // TODO toString in the rest of these and tests for this
-                    private final double v = (double) lit.value();
 
-                    @Override
-                    public Block apply(int positions) {
-                        return DoubleBlock.newConstantBlockWith(v, positions);
-                    }
-
-                    @Override
-                    public String toString() {
-                        return Double.toString(v);
-                    }
-                };
-                case INT -> {
-                    int v = (int) lit.value();
-                    yield positions -> IntBlock.newConstantBlockWith(v, positions);
+            if (value instanceof List<?> multiValue) {
+                if (multiValue.isEmpty()) {
+                    return Block::constantNullBlock;
                 }
-                case LONG -> {
-                    long v = (long) lit.value();
-                    yield positions -> LongBlock.newConstantBlockWith(v, positions);
-                }
-                case NULL -> Block::constantNullBlock;
-                case DOC, UNKNOWN -> throw new EsqlUnsupportedOperationException("can't eval to doc or unknown");
-            };
+                return positions -> {
+                    var wrapper = BlockUtils.wrapperFor(ElementType.fromJava(multiValue.get(0).getClass()), positions);
+                    wrapper.accept(multiValue);
+                    return wrapper.builder().build();
+                };
+            }
+            return positions -> BlockUtils.constantBlock(value, positions);
         }
     }
 

+ 34 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

@@ -95,6 +95,7 @@ public class LogicalPlanOptimizer extends RuleExecutor<LogicalPlan> {
             new FoldNull(),
             new SplitInWithFoldableValue(),
             new ConstantFolding(),
+            new PropagateEvalFoldables(),
             // boolean
             new BooleanSimplification(),
             new LiteralsOnTheRight(),
@@ -302,6 +303,39 @@ public class LogicalPlanOptimizer extends RuleExecutor<LogicalPlan> {
         }
     }
 
+    //
+    // Replace any reference attribute with its source, if it does not affect the result.
+    // This avoids ulterior look-ups between attributes and its source across nodes.
+    //
+    static class PropagateEvalFoldables extends Rule<LogicalPlan, LogicalPlan> {
+
+        @Override
+        public LogicalPlan apply(LogicalPlan plan) {
+            var collectRefs = new AttributeMap<Expression>();
+            // collect aliases
+            plan.forEachExpressionUp(Alias.class, a -> {
+                var c = a.child();
+                if (c.foldable()) {
+                    collectRefs.put(a.toAttribute(), c);
+                }
+            });
+            if (collectRefs.isEmpty()) {
+                return plan;
+            }
+            java.util.function.Function<ReferenceAttribute, Expression> replaceReference = r -> collectRefs.resolve(r, r);
+
+            plan = plan.transformUp(p -> {
+                // Apply the replacement inside Filter and Eval (which shouldn't make a difference)
+                if (p instanceof Filter || p instanceof Eval) {
+                    p = p.transformExpressionsOnly(ReferenceAttribute.class, replaceReference);
+                }
+                return p;
+            });
+
+            return plan;
+        }
+    }
+
     static class PushDownAndCombineLimits extends OptimizerRules.OptimizerRule<Limit> {
 
         @Override

+ 3 - 1
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java

@@ -287,7 +287,9 @@ public abstract class AbstractFunctionTestCase extends ESTestCase {
     }
 
     public final void testEvaluatorSimpleToString() {
-        assertThat(evaluator(buildFieldExpression(testCase)).get().toString(), equalTo(testCase.evaluatorToString));
+        var supplier = evaluator(buildFieldExpression(testCase));
+        var ev = supplier.get();
+        assertThat(ev.toString(), equalTo(testCase.evaluatorToString));
     }
 
     public final void testSimpleConstantFolding() {

+ 60 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java

@@ -190,6 +190,66 @@ public class LocalLogicalPlanOptimizerTests extends ESTestCase {
         var source = as(limit.child(), EsRelation.class);
     }
 
+    /**
+     * Expects
+     * LocalRelation[[first_name{f}#4],EMPTY]
+     */
+    public void testMissingFieldInFilterNumericWithReference() {
+        var plan = plan("""
+              from test
+            | eval x = emp_no
+            | where x > 10
+            | keep first_name
+            """);
+
+        var testStats = statsForMissingField("emp_no");
+        var localPlan = localPlan(plan, testStats);
+
+        var local = as(localPlan, LocalRelation.class);
+        assertThat(Expressions.names(local.output()), contains("first_name"));
+    }
+
+    /**
+     * Expects
+     * LocalRelation[[first_name{f}#4],EMPTY]
+     */
+    public void testMissingFieldInFilterNumericWithReferenceToEval() {
+        var plan = plan("""
+              from test
+            | eval x = emp_no + 1
+            | where x > 10
+            | keep first_name
+            """);
+
+        var testStats = statsForMissingField("emp_no");
+        var localPlan = localPlan(plan, testStats);
+
+        var local = as(localPlan, LocalRelation.class);
+        assertThat(Expressions.names(local.output()), contains("first_name"));
+    }
+
+    /**
+     * Expects
+     * LocalRelation[[_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, gender{f}#7, languages{f}#8, last_name{f}#9, salary{f}#10, x
+     * {r}#3],EMPTY]
+     */
+    public void testMissingFieldInFilterNoProjection() {
+        var plan = plan("""
+              from test
+            | eval x = emp_no
+            | where x > 10
+            """);
+
+        var testStats = statsForMissingField("emp_no");
+        var localPlan = localPlan(plan, testStats);
+
+        var local = as(localPlan, LocalRelation.class);
+        assertThat(
+            Expressions.names(local.output()),
+            contains("_meta_field", "emp_no", "first_name", "gender", "languages", "last_name", "salary", "x")
+        );
+    }
+
     private LocalRelation asEmptyRelation(Object o) {
         var empty = as(o, LocalRelation.class);
         assertThat(empty.supplier(), is(LocalSupplier.EMPTY));

+ 50 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

@@ -38,6 +38,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Grok;
 import org.elasticsearch.xpack.esql.plan.logical.TopN;
 import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject;
 import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
+import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
 import org.elasticsearch.xpack.esql.stats.Metrics;
 import org.elasticsearch.xpack.ql.expression.Alias;
 import org.elasticsearch.xpack.ql.expression.Attribute;
@@ -1091,6 +1092,55 @@ public class LogicalPlanOptimizerTests extends ESTestCase {
         as(limit.child(), EsRelation.class);
     }
 
+    public void testFoldInEval() {
+        var plan = optimizedPlan("""
+            from test
+            | eval a = 1, b = a + 1, c = b + a
+            | where c > 10
+            """);
+
+        var local = as(plan, LocalRelation.class);
+        assertThat(local.supplier(), is(LocalSupplier.EMPTY));
+    }
+
+    public void testFoldFromRow() {
+        var plan = optimizedPlan("""
+              row a = 1, b = 2, c = 3
+            | where c > 10
+            """);
+
+        as(plan, LocalRelation.class);
+    }
+
+    public void testFoldFromRowInEval() {
+        var plan = optimizedPlan("""
+              row a = 1, b = 2, c = 3
+            | eval x = c
+            | where x > 10
+            """);
+
+        as(plan, LocalRelation.class);
+    }
+
+    public void testInvalidFoldDueToReplacement() {
+        var plan = optimizedPlan("""
+              from test
+            | eval x = 1
+            | eval x = emp_no
+            | where x > 10
+            | keep x
+            """);
+
+        var project = as(plan, EsqlProject.class);
+        var limit = as(project.child(), Limit.class);
+        var filter = as(limit.child(), Filter.class);
+        var eval = as(filter.child(), Eval.class);
+        assertThat(eval.fields(), hasSize(1));
+        var alias = as(eval.fields().get(0), Alias.class);
+        assertThat(Expressions.name(alias.child()), is("emp_no"));
+        var source = as(eval.child(), EsRelation.class);
+    }
+
     public void testEnrich() {
         LogicalPlan plan = optimizedPlan("""
             from test