Browse Source

Implement the 'left' function in issue #98545 (#98942)

@nik9000  Recheck out the main branch. Refactor the 'left' function to
cut the prefix string in place. But I meet a adversity that left failed
the test case 'testEvaluateInManyThreads'. I find that in multiple
thread situation,  `  EvalOperator.ExpressionEvaluator eval =
evalSupplier.get(); for (int c = 0; c < count; c++) {      
assertThat(toJavaObject(eval.eval(page), 0), testCase.getMatcher()); } `
toJavaObject function return a BytesRef with length=2, content is
[81,89]. However, assertThat function in junit4 receive the BytesRef
parameters that its length is 10. Can you give me some clues? I can't
find which variable is mutual.

Rerun failed test case's command: `gradlew ':x-pack:plugin:esql:test'
--tests
"org.elasticsearch.xpack.esql.expression.function.scalar.string.LeftTests.testEvaluateInManyThreads
{TestCase=Left basic test}" -Dtests.seed=44459C172243712
-Dtests.locale=lv-LV -Dtests.timezone=Asia/Irkutsk -Druntime.java=20`
dreamquster 2 years ago
parent
commit
2644ccbb8a

+ 5 - 0
docs/changelog/98942.yaml

@@ -0,0 +1,5 @@
+pr: 98942
+summary: "ESQL: LEFT function"
+area: ES|QL
+type: feature
+issues: []

+ 2 - 0
docs/reference/esql/esql-functions.asciidoc

@@ -53,6 +53,7 @@ these functions:
 * <<esql-split>>
 * <<esql-starts_with>>
 * <<esql-substring>>
+* <<esql-left>>
 * <<esql-tan>>
 * <<esql-tanh>>
 * <<esql-tau>>
@@ -115,6 +116,7 @@ include::functions/split.asciidoc[]
 include::functions/sqrt.asciidoc[]
 include::functions/starts_with.asciidoc[]
 include::functions/substring.asciidoc[]
+include::functions/left.asciidoc[]
 include::functions/tan.asciidoc[]
 include::functions/tanh.asciidoc[]
 include::functions/tau.asciidoc[]

+ 14 - 0
docs/reference/esql/functions/left.asciidoc

@@ -0,0 +1,14 @@
+[[esql-left]]
+=== `LEFT`
+
+Return the substring that extract 'length' chars
+from string starting from 0.
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=left]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/string.csv-spec[tag=left-result]
+|===

+ 1 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec

@@ -36,6 +36,7 @@ is_finite                |is_finite(arg1)
 is_infinite              |is_infinite(arg1)
 is_nan                   |is_nan(arg1)
 least                    |least(first, rest...)
+left                     |left(arg1, arg2)
 length                   |length(arg1)
 log10                    |log10(n)
 ltrim                    |ltrim(arg1)

+ 20 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

@@ -700,3 +700,23 @@ Gateway     | instances
 Gateway     | instances   
 null        | null
 ;
+
+left
+// tag::left[]
+FROM employees
+| KEEP last_name
+| EVAL left = LEFT(last_name, 3)
+| SORT last_name ASC
+| LIMIT 5
+// end::left[]
+;
+
+// tag::left-result[]
+last_name:keyword | left:keyword
+Awdeh             |Awd
+Azuma             |Azu
+Baek              |Bae
+Bamford           |Bam
+Bernatsky         |Ber
+// end::left-result[]
+;

+ 105 - 0
x-pack/plugin/esql/src/main/java/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/LeftEvaluator.java

@@ -0,0 +1,105 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import java.lang.IllegalArgumentException;
+import java.lang.Override;
+import java.lang.String;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.xpack.esql.expression.function.Warnings;
+import org.elasticsearch.xpack.ql.tree.Source;
+
+/**
+ * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Left}.
+ * This class is generated. Do not edit it.
+ */
+public final class LeftEvaluator implements EvalOperator.ExpressionEvaluator {
+  private final Warnings warnings;
+
+  private final BytesRef out;
+
+  private final EvalOperator.ExpressionEvaluator str;
+
+  private final EvalOperator.ExpressionEvaluator length;
+
+  public LeftEvaluator(Source source, BytesRef out, EvalOperator.ExpressionEvaluator str,
+      EvalOperator.ExpressionEvaluator length) {
+    this.warnings = new Warnings(source);
+    this.out = out;
+    this.str = str;
+    this.length = length;
+  }
+
+  @Override
+  public Block eval(Page page) {
+    Block strUncastBlock = str.eval(page);
+    if (strUncastBlock.areAllValuesNull()) {
+      return Block.constantNullBlock(page.getPositionCount());
+    }
+    BytesRefBlock strBlock = (BytesRefBlock) strUncastBlock;
+    Block lengthUncastBlock = length.eval(page);
+    if (lengthUncastBlock.areAllValuesNull()) {
+      return Block.constantNullBlock(page.getPositionCount());
+    }
+    IntBlock lengthBlock = (IntBlock) lengthUncastBlock;
+    BytesRefVector strVector = strBlock.asVector();
+    if (strVector == null) {
+      return eval(page.getPositionCount(), strBlock, lengthBlock);
+    }
+    IntVector lengthVector = lengthBlock.asVector();
+    if (lengthVector == null) {
+      return eval(page.getPositionCount(), strBlock, lengthBlock);
+    }
+    return eval(page.getPositionCount(), strVector, lengthVector);
+  }
+
+  public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, IntBlock lengthBlock) {
+    BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
+    BytesRef strScratch = new BytesRef();
+    position: for (int p = 0; p < positionCount; p++) {
+      if (strBlock.isNull(p) || strBlock.getValueCount(p) != 1) {
+        result.appendNull();
+        continue position;
+      }
+      if (lengthBlock.isNull(p) || lengthBlock.getValueCount(p) != 1) {
+        result.appendNull();
+        continue position;
+      }
+      try {
+        result.appendBytesRef(Left.process(out, strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), lengthBlock.getInt(lengthBlock.getFirstValueIndex(p))));
+      } catch (IllegalArgumentException e) {
+        warnings.registerException(e);
+        result.appendNull();
+      }
+    }
+    return result.build();
+  }
+
+  public BytesRefBlock eval(int positionCount, BytesRefVector strVector, IntVector lengthVector) {
+    BytesRefBlock.Builder result = BytesRefBlock.newBlockBuilder(positionCount);
+    BytesRef strScratch = new BytesRef();
+    position: for (int p = 0; p < positionCount; p++) {
+      try {
+        result.appendBytesRef(Left.process(out, strVector.getBytesRef(p, strScratch), lengthVector.getInt(p)));
+      } catch (IllegalArgumentException e) {
+        warnings.registerException(e);
+        result.appendNull();
+      }
+    }
+    return result.build();
+  }
+
+  @Override
+  public String toString() {
+    return "LeftEvaluator[" + "out=" + out + ", str=" + str + ", length=" + length + "]";
+  }
+}

+ 2 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

@@ -71,6 +71,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
 import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split;
@@ -141,6 +142,7 @@ public class EsqlFunctionRegistry extends FunctionRegistry {
                 def(LTrim.class, LTrim::new, "ltrim"),
                 def(RTrim.class, RTrim::new, "rtrim"),
                 def(Trim.class, Trim::new, "trim"),
+                def(Left.class, Left::new, "left"),
                 def(StartsWith.class, StartsWith::new, "starts_with") },
             // date
             new FunctionDefinition[] {

+ 127 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Left.java

@@ -0,0 +1,127 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.compute.ann.Evaluator;
+import org.elasticsearch.compute.ann.Fixed;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
+import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
+import org.elasticsearch.xpack.ql.tree.NodeInfo;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.elasticsearch.xpack.ql.type.DataTypes;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
+
+/**
+ * left(foo, len) is a alias that substring(foo, 0, len)
+ */
+public class Left extends ScalarFunction implements EvaluatorMapper {
+
+    private final Source source;
+
+    private final Expression str;
+
+    private final Expression length;
+
+    public Left(Source source, Expression str, Expression length) {
+        super(source, Arrays.asList(str, length));
+        this.source = source;
+        this.str = str;
+        this.length = length;
+    }
+
+    @Evaluator(warnExceptions = IllegalArgumentException.class)
+    static BytesRef process(@Fixed BytesRef out, BytesRef str, int length) {
+        out.bytes = str.bytes;
+        out.offset = str.offset;
+        out.length = str.length;
+        int curLenStart = 0;
+        UnicodeUtil.UTF8CodePoint cp = new UnicodeUtil.UTF8CodePoint();
+        for (int i = 0; i < length && curLenStart < out.length; i++, curLenStart += cp.numBytes) {
+            UnicodeUtil.codePointAt(out.bytes, out.offset + curLenStart, cp);
+        }
+        out.length = Math.min(curLenStart, out.length);
+        return out;
+    }
+
+    @Override
+    public Supplier<EvalOperator.ExpressionEvaluator> toEvaluator(
+        Function<Expression, Supplier<EvalOperator.ExpressionEvaluator>> toEvaluator
+    ) {
+
+        Supplier<EvalOperator.ExpressionEvaluator> strSupplier = toEvaluator.apply(str);
+        Supplier<EvalOperator.ExpressionEvaluator> lengthSupplier = toEvaluator.apply(length);
+        return () -> {
+            BytesRef out = new BytesRef();
+            return new LeftEvaluator(source, out, strSupplier.get(), lengthSupplier.get());
+        };
+    }
+
+    @Override
+    public Expression replaceChildren(List<Expression> newChildren) {
+        return new Left(source(), newChildren.get(0), newChildren.get(1));
+    }
+
+    @Override
+    protected NodeInfo<? extends Expression> info() {
+        return NodeInfo.create(this, Left::new, str, length);
+    }
+
+    @Override
+    public DataType dataType() {
+        return DataTypes.KEYWORD;
+    }
+
+    @Override
+    protected TypeResolution resolveType() {
+        if (childrenResolved() == false) {
+            return new TypeResolution("Unresolved children");
+        }
+
+        TypeResolution resolution = isString(str, sourceText(), FIRST);
+        if (resolution.unresolved()) {
+            return resolution;
+        }
+
+        resolution = isInteger(length, sourceText(), SECOND);
+        if (resolution.unresolved()) {
+            return resolution;
+        }
+
+        return TypeResolution.TYPE_RESOLVED;
+    }
+
+    @Override
+    public boolean foldable() {
+        return str.foldable() && length.foldable();
+    }
+
+    @Override
+    public Object fold() {
+        return EvaluatorMapper.super.fold();
+    }
+
+    @Override
+    public ScriptTemplate asScript() {
+        throw new UnsupportedOperationException();
+    }
+}

+ 13 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java

@@ -82,6 +82,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
 import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Split;
@@ -343,6 +344,7 @@ public final class PlanNamedTypes {
             of(ScalarFunction.class, Pow.class, PlanNamedTypes::writePow, PlanNamedTypes::readPow),
             of(ScalarFunction.class, StartsWith.class, PlanNamedTypes::writeStartsWith, PlanNamedTypes::readStartsWith),
             of(ScalarFunction.class, Substring.class, PlanNamedTypes::writeSubstring, PlanNamedTypes::readSubstring),
+            of(ScalarFunction.class, Left.class, PlanNamedTypes::writeLeft, PlanNamedTypes::readLeft),
             of(ScalarFunction.class, Split.class, PlanNamedTypes::writeSplit, PlanNamedTypes::readSplit),
             of(ScalarFunction.class, Tau.class, PlanNamedTypes::writeNoArgScalar, PlanNamedTypes::readNoArgScalar),
             // ArithmeticOperations
@@ -1273,6 +1275,17 @@ public final class PlanNamedTypes {
         out.writeOptionalWriteable(fields.size() == 3 ? o -> out.writeExpression(fields.get(2)) : null);
     }
 
+    static Left readLeft(PlanStreamInput in) throws IOException {
+        return new Left(Source.EMPTY, in.readExpression(), in.readExpression());
+    }
+
+    static void writeLeft(PlanStreamOutput out, Left left) throws IOException {
+        List<Expression> fields = left.children();
+        assert fields.size() == 2;
+        out.writeExpression(fields.get(0));
+        out.writeExpression(fields.get(1));
+    }
+
     static Split readSplit(PlanStreamInput in) throws IOException {
         return new Split(Source.EMPTY, in.readExpression(), in.readExpression());
     }

+ 110 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/LeftTests.java

@@ -0,0 +1,110 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.xpack.esql.expression.function.scalar.AbstractScalarFunctionTestCase;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.Literal;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.elasticsearch.xpack.ql.type.DataTypes;
+import org.hamcrest.Matcher;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
+import static org.hamcrest.Matchers.equalTo;
+
+public class LeftTests extends AbstractScalarFunctionTestCase {
+    public LeftTests(@Name("TestCase") Supplier<TestCase> testCaseSupplier) {
+        this.testCase = testCaseSupplier.get();
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() {
+        List<TestCaseSupplier> suppliers = new ArrayList<>();
+        suppliers.add(new TestCaseSupplier("long", () -> {
+            int length = between(1, 10);
+            String text = randomAlphaOfLength(10);
+            return new TestCase(
+                List.of(new TypedData(new BytesRef(text), DataTypes.KEYWORD, "str"), new TypedData(length, DataTypes.INTEGER, "length")),
+                "LeftEvaluator[out=[], str=Attribute[channel=0], length=Attribute[channel=1]]",
+                DataTypes.KEYWORD,
+                equalTo(new BytesRef(text.substring(0, length)))
+            );
+        }));
+        suppliers.add(new TestCaseSupplier("short", () -> {
+            int length = between(2, 10);
+            String text = randomAlphaOfLength(1);
+            return new TestCase(
+                List.of(new TypedData(new BytesRef(text), DataTypes.KEYWORD, "str"), new TypedData(length, DataTypes.INTEGER, "length")),
+                "LeftEvaluator[out=[], str=Attribute[channel=0], length=Attribute[channel=1]]",
+                DataTypes.KEYWORD,
+                equalTo(new BytesRef(text))
+            );
+        }));
+        return parameterSuppliersFromTypedData(suppliers);
+    }
+
+    @Override
+    protected Expression build(Source source, List<Expression> args) {
+        return new Left(source, args.get(0), args.get(1));
+    }
+
+    @Override
+    protected List<ArgumentSpec> argSpec() {
+        return List.of(required(strings()), required(integers()));
+    }
+
+    @Override
+    protected DataType expectedType(List<DataType> argTypes) {
+        return DataTypes.KEYWORD;
+    }
+
+    public Matcher<Object> resultsMatcher(List<TypedData> typedData) {
+        String str = ((BytesRef) typedData.get(0).data()).utf8ToString();
+        int length = (Integer) typedData.get(1).data();
+        return equalTo(new BytesRef(str.substring(0, length)));
+    }
+
+    public void testReasonableLength() {
+        assertThat(process("a fox call", 5), equalTo("a fox"));
+    }
+
+    public void testMassiveLength() {
+        assertThat(process("a fox call", 10), equalTo("a fox call"));
+    }
+
+    public void testNegativeLength() {
+        assertThat(process("a fox call", -1), equalTo(""));
+    }
+
+    public void testUnicode() {
+        final String s = "a\ud83c\udf09tiger";
+        assert s.codePointCount(0, s.length()) == 7;
+        assertThat(process(s, 2), equalTo("a\ud83c\udf09"));
+    }
+
+    private String process(String str, int length) {
+        Block result = evaluator(
+            new Left(Source.EMPTY, field("str", DataTypes.KEYWORD), new Literal(Source.EMPTY, length, DataTypes.INTEGER))
+        ).get().eval(row(List.of(new BytesRef(str))));
+        if (null == result) {
+            return null;
+        }
+        BytesRef resultByteRef = ((BytesRef) toJavaObject(result, 0));
+        return resultByteRef == null ? null : resultByteRef.utf8ToString();
+    }
+}