Browse Source

Add trim function

This change adds a string `trim` function.
Martijn van Groningen 2 years ago
parent
commit
3c3963cc28

+ 2 - 0
docs/reference/esql/esql-functions.asciidoc

@@ -49,6 +49,7 @@ these functions:
 * <<esql-to_string>>
 * <<esql-to_unsigned_long>>
 * <<esql-to_version>>
+* <<esql-trim>>
 
 include::functions/abs.asciidoc[]
 include::functions/auto_bucket.asciidoc[]
@@ -89,3 +90,4 @@ include::functions/to_long.asciidoc[]
 include::functions/to_string.asciidoc[]
 include::functions/to_unsigned_long.asciidoc[]
 include::functions/to_version.asciidoc[]
+include::functions/trim.asciidoc[]

+ 10 - 0
docs/reference/esql/functions/trim.asciidoc

@@ -0,0 +1,10 @@
+[[esql-trim]]
+=== `TRIM`
+Removes leading and trailing whitespaces from strings.
+
+[source,esql]
+----
+FROM employees
+| KEEP first_name, last_name, height
+| EVAL trimmed_first_name = TRIM(first_name)
+----

+ 16 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

@@ -165,6 +165,22 @@ emp_no:integer | last_name:keyword | x:keyword | z:keyword
 10010 | Piveteau  | P | a
 ;
 
+trim
+from employees | sort emp_no | limit 10 | eval name = concat("      ", first_name) | eval name = trim(first_name) | keep emp_no, name;
+
+emp_no:integer | name:keyword
+10001 | Georgi
+10002 | Bezalel
+10003 | Parto
+10004 | Chirstian
+10005 | Kyoichi
+10006 | Anneke
+10007 | Tzvetan
+10008 | Saniya
+10009 | Sumant
+10010 | Duangkaew
+;
+
 concat
 from employees | sort emp_no | limit 10 | eval name = concat(first_name, " ", last_name) | keep emp_no, name;
 

+ 67 - 0
x-pack/plugin/esql/src/main/java/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/TrimEvaluator.java

@@ -0,0 +1,67 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import java.lang.Override;
+import java.lang.String;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.EvalOperator;
+
+/**
+ * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Trim}.
+ * This class is generated. Do not edit it.
+ */
+public final class TrimEvaluator implements EvalOperator.ExpressionEvaluator {
+  private final EvalOperator.ExpressionEvaluator val;
+
+  public TrimEvaluator(EvalOperator.ExpressionEvaluator val) {
+    this.val = val;
+  }
+
+  @Override
+  public Block eval(Page page) {
+    Block valUncastBlock = val.eval(page);
+    if (valUncastBlock.areAllValuesNull()) {
+      return Block.constantNullBlock(page.getPositionCount());
+    }
+    BytesRefBlock valBlock = (BytesRefBlock) valUncastBlock;
+    BytesRefVector valVector = valBlock.asVector();
+    if (valVector == null) {
+      return eval(page.getPositionCount(), valBlock);
+    }
+    return eval(page.getPositionCount(), valVector).asBlock();
+  }
+
+  public BytesRefBlock eval(int positionCount, BytesRefBlock valBlock) {
+    BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
+    BytesRef valScratch = new BytesRef();
+    position: for (int p = 0; p < positionCount; p++) {
+      if (valBlock.isNull(p) || valBlock.getValueCount(p) != 1) {
+        result.appendNull();
+        continue position;
+      }
+      result.appendBytesRef(Trim.process(valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch)));
+    }
+    return result.build();
+  }
+
+  public BytesRefVector eval(int positionCount, BytesRefVector valVector) {
+    BytesRefVector.Builder result = BytesRefVector.newVectorBuilder(positionCount);
+    BytesRef valScratch = new BytesRef();
+    position: for (int p = 0; p < positionCount; p++) {
+      result.appendBytesRef(Trim.process(valVector.getBytesRef(p, valScratch)));
+    }
+    return result.build();
+  }
+
+  @Override
+  public String toString() {
+    return "TrimEvaluator[" + "val=" + val + "]";
+  }
+}

+ 2 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

@@ -56,6 +56,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Trim;
 import org.elasticsearch.xpack.ql.expression.function.FunctionDefinition;
 import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry;
 import org.elasticsearch.xpack.ql.expression.predicate.nulls.IsNull;
@@ -103,6 +104,7 @@ public class EsqlFunctionRegistry extends FunctionRegistry {
                 def(Length.class, Length::new, "length"),
                 def(Substring.class, Substring::new, "substring"),
                 def(Concat.class, Concat::new, "concat"),
+                def(Trim.class, Trim::new, "trim"),
                 def(StartsWith.class, StartsWith::new, "starts_with") },
             // date
             new FunctionDefinition[] {

+ 73 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Trim.java

@@ -0,0 +1,73 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.ann.Evaluator;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
+import org.elasticsearch.xpack.esql.planner.Mappable;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.TypeResolutions;
+import org.elasticsearch.xpack.ql.tree.NodeInfo;
+import org.elasticsearch.xpack.ql.tree.Source;
+
+import java.util.List;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
+
+/**
+ * Removes leading and trailing whitespaces from a string.
+ */
+public final class Trim extends UnaryScalarFunction implements Mappable {
+
+    public Trim(Source source, Expression str) {
+        super(source, str);
+    }
+
+    @Override
+    protected TypeResolution resolveType() {
+        if (childrenResolved() == false) {
+            return new Expression.TypeResolution("Unresolved children");
+        }
+
+        return isString(field, sourceText(), TypeResolutions.ParamOrdinal.DEFAULT);
+    }
+
+    @Override
+    public Object fold() {
+        return Mappable.super.fold();
+    }
+
+    @Override
+    public Supplier<EvalOperator.ExpressionEvaluator> toEvaluator(
+        Function<Expression, Supplier<EvalOperator.ExpressionEvaluator>> toEvaluator
+    ) {
+        Supplier<EvalOperator.ExpressionEvaluator> field = toEvaluator.apply(field());
+        return () -> new TrimEvaluator(field.get());
+    }
+
+    @Override
+    public Expression replaceChildren(List<Expression> newChildren) {
+        return new Trim(source(), newChildren.get(0));
+    }
+
+    @Override
+    protected NodeInfo<? extends Expression> info() {
+        return NodeInfo.create(this, Trim::new, field());
+    }
+
+    @Evaluator
+    static BytesRef process(BytesRef val) {
+        // TODO: optimize
+        String str = val.utf8ToString();
+        return new BytesRef(str.trim());
+    }
+}

+ 4 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java

@@ -67,6 +67,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.StartsWith;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Trim;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
 import org.elasticsearch.xpack.esql.plan.logical.Dissect;
 import org.elasticsearch.xpack.esql.plan.logical.Dissect.Parser;
@@ -287,6 +288,7 @@ public final class PlanNamedTypes {
             of(ESQL_UNARY_SCLR_CLS, ToString.class, PlanNamedTypes::writeESQLUnaryScalar, PlanNamedTypes::readESQLUnaryScalar),
             of(ESQL_UNARY_SCLR_CLS, ToUnsignedLong.class, PlanNamedTypes::writeESQLUnaryScalar, PlanNamedTypes::readESQLUnaryScalar),
             of(ESQL_UNARY_SCLR_CLS, ToVersion.class, PlanNamedTypes::writeESQLUnaryScalar, PlanNamedTypes::readESQLUnaryScalar),
+            of(ESQL_UNARY_SCLR_CLS, Trim.class, PlanNamedTypes::writeESQLUnaryScalar, PlanNamedTypes::readESQLUnaryScalar),
             // ScalarFunction
             of(ScalarFunction.class, AutoBucket.class, PlanNamedTypes::writeAutoBucket, PlanNamedTypes::readAutoBucket),
             of(ScalarFunction.class, Case.class, PlanNamedTypes::writeCase, PlanNamedTypes::readCase),
@@ -956,7 +958,8 @@ public final class PlanNamedTypes {
         entry(name(ToLong.class), ToLong::new),
         entry(name(ToString.class), ToString::new),
         entry(name(ToUnsignedLong.class), ToUnsignedLong::new),
-        entry(name(ToVersion.class), ToVersion::new)
+        entry(name(ToVersion.class), ToVersion::new),
+        entry(name(Trim.class), Trim::new)
     );
 
     static UnaryScalarFunction readESQLUnaryScalar(PlanStreamInput in, String name) throws IOException {

+ 100 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TrimTests.java

@@ -0,0 +1,100 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.xpack.esql.expression.function.scalar.AbstractScalarFunctionTestCase;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.Literal;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.hamcrest.Matcher;
+import org.junit.Before;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class TrimTests extends AbstractScalarFunctionTestCase {
+
+    private DataType randomType;
+
+    @Before
+    public void setup() {
+        randomType = randomFrom(strings());
+    }
+
+    @Override
+    protected List<Object> simpleData() {
+        return List.of(addRandomLeadingOrTrailingWhitespaces(randomAlphaOfLength(4)));
+    }
+
+    @Override
+    protected Expression expressionForSimpleData() {
+        return new Trim(Source.EMPTY, field(randomAlphaOfLength(4), randomType));
+    }
+
+    @Override
+    protected Matcher<Object> resultMatcher(List<Object> data, DataType dataType) {
+        return equalTo(new BytesRef(((BytesRef) data.get(0)).utf8ToString().trim()));
+    }
+
+    @Override
+    protected String expectedEvaluatorSimpleToString() {
+        return "TrimEvaluator[val=Attribute[channel=0]]";
+    }
+
+    @Override
+    protected Expression constantFoldable(List<Object> data) {
+        return new Trim(Source.EMPTY, new Literal(Source.EMPTY, data.get(0), randomType));
+    }
+
+    @Override
+    protected Expression build(Source source, List<Literal> args) {
+        return new Trim(source, args.get(0));
+    }
+
+    @Override
+    protected List<ArgumentSpec> argSpec() {
+        return List.of(required(strings()));
+    }
+
+    @Override
+    protected DataType expectedType(List<DataType> argTypes) {
+        return argTypes.get(0);
+    }
+
+    public void testTrim() {
+        String expected = randomAlphaOfLength(4);
+        BytesRef result = Trim.process(addRandomLeadingOrTrailingWhitespaces(expected));
+        assertThat(result.utf8ToString(), equalTo(expected));
+    }
+
+    BytesRef addRandomLeadingOrTrailingWhitespaces(String expected) {
+        StringBuilder builder = new StringBuilder();
+        if (randomBoolean()) {
+            builder.append(randomWhiteSpace());
+            builder.append(expected);
+            if (randomBoolean()) {
+                builder.append(randomWhiteSpace());
+            }
+        } else {
+            builder.append(expected);
+            builder.append(randomWhiteSpace());
+        }
+        return new BytesRef(builder.toString());
+    }
+
+    private static char[] randomWhiteSpace() {
+        char[] randomWhitespace = new char[randomIntBetween(1, 8)];
+        Arrays.fill(randomWhitespace, ' ');
+        return randomWhitespace;
+    }
+
+}