Selaa lähdekoodia

Make the trim function efficient by performing trim on BytesRef instead of converting to a string and invoking trim and then converting back to BytesRef.

Martijn van Groningen 2 vuotta sitten
vanhempi
commit
c58e262c8f

+ 7 - 4
x-pack/plugin/esql/src/main/java/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/TrimEvaluator.java

@@ -18,9 +18,12 @@ import org.elasticsearch.compute.operator.EvalOperator;
  * This class is generated. Do not edit it.
  */
 public final class TrimEvaluator implements EvalOperator.ExpressionEvaluator {
+  private final BytesRef scratch;
+
   private final EvalOperator.ExpressionEvaluator val;
 
-  public TrimEvaluator(EvalOperator.ExpressionEvaluator val) {
+  public TrimEvaluator(BytesRef scratch, EvalOperator.ExpressionEvaluator val) {
+    this.scratch = scratch;
     this.val = val;
   }
 
@@ -46,7 +49,7 @@ public final class TrimEvaluator implements EvalOperator.ExpressionEvaluator {
         result.appendNull();
         continue position;
       }
-      result.appendBytesRef(Trim.process(valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch)));
+      result.appendBytesRef(Trim.process(scratch, valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch)));
     }
     return result.build();
   }
@@ -55,13 +58,13 @@ public final class TrimEvaluator implements EvalOperator.ExpressionEvaluator {
     BytesRefVector.Builder result = BytesRefVector.newVectorBuilder(positionCount);
     BytesRef valScratch = new BytesRef();
     position: for (int p = 0; p < positionCount; p++) {
-      result.appendBytesRef(Trim.process(valVector.getBytesRef(p, valScratch)));
+      result.appendBytesRef(Trim.process(scratch, valVector.getBytesRef(p, valScratch)));
     }
     return result.build();
   }
 
   @Override
   public String toString() {
-    return "TrimEvaluator[" + "val=" + val + "]";
+    return "TrimEvaluator[" + "scratch=" + scratch + ", val=" + val + "]";
   }
 }

+ 15 - 5
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Trim.java

@@ -9,6 +9,7 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.string;
 
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.compute.ann.Evaluator;
+import org.elasticsearch.compute.ann.Fixed;
 import org.elasticsearch.compute.operator.EvalOperator;
 import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
 import org.elasticsearch.xpack.esql.planner.Mappable;
@@ -51,7 +52,7 @@ public final class Trim extends UnaryScalarFunction implements Mappable {
         Function<Expression, Supplier<EvalOperator.ExpressionEvaluator>> toEvaluator
     ) {
         Supplier<EvalOperator.ExpressionEvaluator> field = toEvaluator.apply(field());
-        return () -> new TrimEvaluator(field.get());
+        return () -> new TrimEvaluator(new BytesRef(), field.get());
     }
 
     @Override
@@ -65,9 +66,18 @@ public final class Trim extends UnaryScalarFunction implements Mappable {
     }
 
     @Evaluator
-    static BytesRef process(BytesRef val) {
-        // TODO: optimize
-        String str = val.utf8ToString();
-        return new BytesRef(str.trim());
+    static BytesRef process(@Fixed BytesRef scratch, BytesRef val) {
+        int offset = val.offset;
+        int length = val.length;
+        while ((offset < length) && ((val.bytes[offset] & 0xff) <= 0x20)) {
+            offset++;
+        }
+        while ((offset < length) && ((val.bytes[length - 1] & 0xff) <= 0x20)) {
+            length--;
+        }
+        scratch.bytes = val.bytes;
+        scratch.offset = offset;
+        scratch.length = length - offset;
+        return scratch;
     }
 }

+ 7 - 5
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TrimTests.java

@@ -47,7 +47,7 @@ public class TrimTests extends AbstractScalarFunctionTestCase {
 
     @Override
     protected String expectedEvaluatorSimpleToString() {
-        return "TrimEvaluator[val=Attribute[channel=0]]";
+        return "TrimEvaluator[scratch=[], val=Attribute[channel=0]]";
     }
 
     @Override
@@ -71,9 +71,11 @@ public class TrimTests extends AbstractScalarFunctionTestCase {
     }
 
     public void testTrim() {
-        String expected = randomUnicodeOfLength(8).trim();
-        BytesRef result = Trim.process(addRandomLeadingOrTrailingWhitespaces(expected));
-        assertThat(result.utf8ToString(), equalTo(expected));
+        for (int i = 0; i < 64; i++) {
+            String expected = randomUnicodeOfLength(8).trim();
+            BytesRef result = Trim.process(new BytesRef(), addRandomLeadingOrTrailingWhitespaces(expected));
+            assertThat(result.utf8ToString(), equalTo(expected));
+        }
     }
 
     BytesRef addRandomLeadingOrTrailingWhitespaces(String expected) {
@@ -93,7 +95,7 @@ public class TrimTests extends AbstractScalarFunctionTestCase {
 
     private static char[] randomWhiteSpace() {
         char[] randomWhitespace = new char[randomIntBetween(1, 8)];
-        Arrays.fill(randomWhitespace, randomFrom(' ', '\t', '\n'));
+        Arrays.fill(randomWhitespace, (char) randomIntBetween(0, 0x20));
         return randomWhitespace;
     }