Browse Source

ESQL: Lazy collection copying during node transform (#124424) (#124530)

* ESQL: Lazy collection copying during node transform

A set of optimization for tree traversal:
1. perform lazy copying during children transform
2. use long hashing to avoid object creation
3. perform type check first before collection checking

Relates #124395
Costin Leau 7 months ago
parent
commit
de853f473f

+ 5 - 0
docs/changelog/124424.yaml

@@ -0,0 +1,5 @@
+pr: 124424
+summary: Lazy collection copying during node transform
+area: ES|QL
+type: bug
+issues: []

+ 1 - 2
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NameId.java

@@ -12,7 +12,6 @@ import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.xpack.esql.core.util.PlanStreamInput;
 
 import java.io.IOException;
-import java.util.Objects;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -34,7 +33,7 @@ public class NameId implements Writeable {
 
     @Override
     public int hashCode() {
-        return Objects.hash(id);
+        return Long.hashCode(id);
     }
 
     @Override

+ 13 - 12
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java

@@ -110,7 +110,7 @@ public abstract class Node<T extends Node<T>> implements NamedWriteable {
     protected <E> void forEachProperty(Class<E> typeToken, Consumer<? super E> rule) {
         for (Object prop : info().properties()) {
             // skip children (only properties are interesting)
-            if (prop != children && children.contains(prop) == false && typeToken.isInstance(prop)) {
+            if (prop != children && typeToken.isInstance(prop) && children.contains(prop) == false) {
                 rule.accept((E) prop);
             }
         }
@@ -203,20 +203,21 @@ public abstract class Node<T extends Node<T>> implements NamedWriteable {
     protected <R extends Function<? super T, ? extends T>> T transformChildren(Function<T, ? extends T> traversalOperation) {
         boolean childrenChanged = false;
 
-        // stream() could be used but the code is just as complicated without any advantages
-        // further more, it would include bring in all the associated stream/collector object creation even though in
-        // most cases the immediate tree would be quite small (0,1,2 elements)
-        List<T> transformedChildren = new ArrayList<>(children().size());
+        // Avoid creating a new array of children if no change is needed.
+        // And when it happens, look at using replacement to minimize the amount of method invocations.
+        List<T> transformedChildren = null;
 
-        for (T child : children) {
+        for (int i = 0, s = children.size(); i < s; i++) {
+            T child = children.get(i);
             T next = traversalOperation.apply(child);
-            if (child.equals(next)) {
-                // use the initial value
-                next = child;
-            } else {
-                childrenChanged = true;
+            if (child.equals(next) == false) {
+                // lazy copy + replacement in place
+                if (childrenChanged == false) {
+                    childrenChanged = true;
+                    transformedChildren = new ArrayList<>(children);
+                }
+                transformedChildren.set(i, next);
             }
-            transformedChildren.add(next);
         }
 
         return (childrenChanged ? replaceChildrenSameSize(transformedChildren) : (T) this);

+ 1 - 1
x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/NodeInfo.java

@@ -52,7 +52,7 @@ public abstract class NodeInfo<T extends Node<?>> {
         List<?> children = node.children();
 
         Function<Object, Object> realRule = p -> {
-            if (p != children && false == children.contains(p) && (p == null || typeToken.isInstance(p))) {
+            if (p != children && (p == null || typeToken.isInstance(p)) && false == children.contains(p)) {
                 return rule.apply(typeToken.cast(p));
             }
             return p;

+ 13 - 11
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QueryPlan.java

@@ -131,8 +131,8 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
 
     @SuppressWarnings("unchecked")
     private static Object doTransformExpression(Object arg, Function<Expression, ? extends Expression> traversal) {
-        if (arg instanceof Expression) {
-            return traversal.apply((Expression) arg);
+        if (arg instanceof Expression exp) {
+            return traversal.apply(exp);
         }
 
         // WARNING: if the collection is typed, an incompatible function will be applied to it
@@ -141,17 +141,19 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
         // has no type info so it's difficult to have automatic checking without having base classes).
 
         if (arg instanceof Collection<?> c) {
-            List<Object> transformed = new ArrayList<>(c.size());
+            List<Object> transformed = null;
             boolean hasChanged = false;
+            int i = 0;
             for (Object e : c) {
                 Object next = doTransformExpression(e, traversal);
-                if (e.equals(next)) {
-                    // use the initial value
-                    next = e;
-                } else {
-                    hasChanged = true;
+                if (e.equals(next) == false) {
+                    if (hasChanged == false) {
+                        hasChanged = true;
+                        transformed = new ArrayList<>(c);
+                    }
+                    transformed.set(i, next);
                 }
-                transformed.add(next);
+                i++;
             }
 
             return hasChanged ? transformed : arg;
@@ -186,8 +188,8 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
 
     @SuppressWarnings("unchecked")
     private static void doForEachExpression(Object arg, Consumer<Expression> traversal) {
-        if (arg instanceof Expression) {
-            traversal.accept((Expression) arg);
+        if (arg instanceof Expression exp) {
+            traversal.accept(exp);
         } else if (arg instanceof Collection<?> c) {
             for (Object o : c) {
                 doForEachExpression(o, traversal);

+ 1 - 2
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/NameId.java

@@ -6,7 +6,6 @@
  */
 package org.elasticsearch.xpack.ql.expression;
 
-import java.util.Objects;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -28,7 +27,7 @@ public class NameId {
 
     @Override
     public int hashCode() {
-        return Objects.hash(id);
+        return Long.hashCode(id);
     }
 
     @Override

+ 13 - 11
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/plan/QueryPlan.java

@@ -109,8 +109,8 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
 
     @SuppressWarnings("unchecked")
     private static Object doTransformExpression(Object arg, Function<Expression, ? extends Expression> traversal) {
-        if (arg instanceof Expression) {
-            return traversal.apply((Expression) arg);
+        if (arg instanceof Expression exp) {
+            return traversal.apply(exp);
         }
 
         // WARNING: if the collection is typed, an incompatible function will be applied to it
@@ -119,17 +119,19 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
         // has no type info so it's difficult to have automatic checking without having base classes).
 
         if (arg instanceof Collection<?> c) {
-            List<Object> transformed = new ArrayList<>(c.size());
+            List<Object> transformed = null;
             boolean hasChanged = false;
+            int i = 0;
             for (Object e : c) {
                 Object next = doTransformExpression(e, traversal);
-                if (e.equals(next)) {
-                    // use the initial value
-                    next = e;
-                } else {
-                    hasChanged = true;
+                if (e.equals(next) == false) {
+                    if (hasChanged == false) {
+                        hasChanged = true;
+                        transformed = new ArrayList<>(c);
+                    }
+                    transformed.set(i, next);
                 }
-                transformed.add(next);
+                i++;
             }
 
             return hasChanged ? transformed : arg;
@@ -164,8 +166,8 @@ public abstract class QueryPlan<PlanType extends QueryPlan<PlanType>> extends No
 
     @SuppressWarnings("unchecked")
     private static void doForEachExpression(Object arg, Consumer<Expression> traversal) {
-        if (arg instanceof Expression) {
-            traversal.accept((Expression) arg);
+        if (arg instanceof Expression exp) {
+            traversal.accept(exp);
         } else if (arg instanceof Collection<?> c) {
             for (Object o : c) {
                 doForEachExpression(o, traversal);

+ 13 - 12
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/tree/Node.java

@@ -109,7 +109,7 @@ public abstract class Node<T extends Node<T>> {
     protected <E> void forEachProperty(Class<E> typeToken, Consumer<? super E> rule) {
         for (Object prop : info().properties()) {
             // skip children (only properties are interesting)
-            if (prop != children && children.contains(prop) == false && typeToken.isInstance(prop)) {
+            if (prop != children && typeToken.isInstance(prop) && children.contains(prop) == false) {
                 rule.accept((E) prop);
             }
         }
@@ -202,20 +202,21 @@ public abstract class Node<T extends Node<T>> {
     protected <R extends Function<? super T, ? extends T>> T transformChildren(Function<T, ? extends T> traversalOperation) {
         boolean childrenChanged = false;
 
-        // stream() could be used but the code is just as complicated without any advantages
-        // further more, it would include bring in all the associated stream/collector object creation even though in
-        // most cases the immediate tree would be quite small (0,1,2 elements)
-        List<T> transformedChildren = new ArrayList<>(children().size());
+        // Avoid creating a new array of children if no change is needed.
+        // And when it happens, look at using replacement to minimize the amount of method invocations.
+        List<T> transformedChildren = null;
 
-        for (T child : children) {
+        for (int i = 0, s = children.size(); i < s; i++) {
+            T child = children.get(i);
             T next = traversalOperation.apply(child);
-            if (child.equals(next)) {
-                // use the initial value
-                next = child;
-            } else {
-                childrenChanged = true;
+            if (child.equals(next) == false) {
+                // lazy copy + replacement in place
+                if (childrenChanged == false) {
+                    childrenChanged = true;
+                    transformedChildren = new ArrayList<>(children);
+                }
+                transformedChildren.set(i, next);
             }
-            transformedChildren.add(next);
         }
 
         return (childrenChanged ? replaceChildrenSameSize(transformedChildren) : (T) this);

+ 1 - 1
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/tree/NodeInfo.java

@@ -52,7 +52,7 @@ public abstract class NodeInfo<T extends Node<?>> {
         List<?> children = node.children();
 
         Function<Object, Object> realRule = p -> {
-            if (p != children && false == children.contains(p) && (p == null || typeToken.isInstance(p))) {
+            if (p != children && (p == null || typeToken.isInstance(p)) && false == children.contains(p)) {
                 return rule.apply(typeToken.cast(p));
             }
             return p;