Browse Source

Add ES|QL Locate function (#106899)

* Add ES|QL Locate function
Tommaso Teofili 1 year ago
parent
commit
54eeb622d5

+ 6 - 0
docs/changelog/106899.yaml

@@ -0,0 +1,6 @@
+pr: 106899
+summary: Add ES|QL Locate function
+area: ES|QL
+type: enhancement
+issues:
+ - 106818

+ 5 - 0
docs/reference/esql/functions/description/locate.asciidoc

@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Returns an integer that indicates the position of a keyword substring within another string

+ 14 - 0
docs/reference/esql/functions/layout/locate.asciidoc

@@ -0,0 +1,14 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-locate]]
+=== `LOCATE`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/locate.svg[Embedded,opts=inline]
+
+include::../parameters/locate.asciidoc[]
+include::../description/locate.asciidoc[]
+include::../types/locate.asciidoc[]

+ 12 - 0
docs/reference/esql/functions/parameters/locate.asciidoc

@@ -0,0 +1,12 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`string`::
+An input string
+
+`substring`::
+A substring to locate in the input string
+
+`start`::
+The start index

+ 1 - 0
docs/reference/esql/functions/signature/locate.svg

@@ -0,0 +1 @@
+<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="600" height="46" viewbox="0 0 600 46"><defs><style type="text/css">#guide .c{fill:none;stroke:#222222;}#guide .k{fill:#000000;font-family:Roboto Mono,Sans-serif;font-size:20px;}#guide .s{fill:#e4f4ff;stroke:#222222;}#guide .syn{fill:#8D8D8D;font-family:Roboto Mono,Sans-serif;font-size:20px;}</style></defs><path class="c" d="M0 31h5m92 0h10m32 0h10m92 0h10m32 0h10m128 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="92" height="36"/><text class="k" x="15" y="31">LOCATE</text><rect class="s" x="107" y="5" width="32" height="36" rx="7"/><text class="syn" x="117" y="31">(</text><rect class="s" x="149" y="5" width="92" height="36" rx="7"/><text class="k" x="159" y="31">string</text><rect class="s" x="251" y="5" width="32" height="36" rx="7"/><text class="syn" x="261" y="31">,</text><rect class="s" x="293" y="5" width="128" height="36" rx="7"/><text class="k" x="303" y="31">substring</text><rect class="s" x="431" y="5" width="32" height="36" rx="7"/><text class="syn" x="441" y="31">,</text><rect class="s" x="473" y="5" width="80" height="36" rx="7"/><text class="k" x="483" y="31">start</text><rect class="s" x="563" y="5" width="32" height="36" rx="7"/><text class="syn" x="573" y="31">)</text></svg>

+ 12 - 0
docs/reference/esql/functions/types/locate.asciidoc

@@ -0,0 +1,12 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+string | substring | start | result
+keyword | keyword | integer | integer
+keyword | text | integer | integer
+text | keyword | integer | integer
+text | text | integer | integer
+|===

+ 5 - 1
x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec

@@ -30,6 +30,7 @@ double e()
 "integer|long|double|boolean|keyword|text|ip|version least(first:integer|long|double|boolean|keyword|text|ip|version, ?rest...:integer|long|double|boolean|keyword|text|ip|version)"
 "keyword left(string:keyword|text, length:integer)"
 "integer length(string:keyword|text)"
+"integer locate(string:keyword|text, substring:keyword|text, ?start:integer)"
 "double log(?base:integer|unsigned_long|long|double, number:integer|unsigned_long|long|double)"
 "double log10(number:double|integer|long|unsigned_long)"
 "keyword|text ltrim(string:keyword|text)"
@@ -138,6 +139,7 @@ greatest      |first                               |"integer|long|double|boolean
 least         |first                               |"integer|long|double|boolean|keyword|text|ip|version"                                                                             |[""]
 left          |[string, length]                    |["keyword|text", integer]                                                                                                         |[The string from which to return a substring., The number of characters to return.]
 length        |string                              |"keyword|text"                                                                                                                    |[""]
+locate        |[string, substring, start]          |["keyword|text", "keyword|text", "integer"]                                                                                       |[An input string, A substring to locate in the input string, The start index]
 log           |[base, number]                      |["integer|unsigned_long|long|double", "integer|unsigned_long|long|double"]                                                        |["Base of logarithm. If `null`\, the function returns `null`. If not provided\, this function returns the natural logarithm (base e) of a value.", "Numeric expression. If `null`\, the function returns `null`."]
 log10         |number                              |"double|integer|long|unsigned_long"                                                                                               |Numeric expression. If `null`, the function returns `null`.
 ltrim         |string                              |"keyword|text"                                                                                                                    |[""]
@@ -247,6 +249,7 @@ greatest      |Returns the maximum value from many columns.
 least         |Returns the minimum value from many columns.
 left          |Returns the substring that extracts 'length' chars from 'string' starting from the left.
 length        |Returns the character length of a string.
+locate        |Returns an integer that indicates the position of a keyword substring within another string
 log           |Returns the logarithm of a value to a base. The input can be any numeric value, the return value is always a double.  Logs of zero, negative numbers, and base of one return `null` as well as a warning.
 log10         |Returns the logarithm of a value to base 10. The input can be any numeric value, the return value is always a double.  Logs of 0 and negative numbers return `null` as well as a warning.
 ltrim         |Removes leading whitespaces from a string.
@@ -357,6 +360,7 @@ greatest      |"integer|long|double|boolean|keyword|text|ip|version"
 least         |"integer|long|double|boolean|keyword|text|ip|version"                                                                       |false                       |true            |false
 left          |keyword                                                                                                                     |[false, false]              |false           |false
 length        |integer                                                                                                                     |false                       |false           |false
+locate        |integer                                                                                                                     |[false, false, true]        |false           |false
 log           |double                                                                                                                      |[true, false]               |false           |false
 log10         |double                                                                                                                      |false                       |false           |false
 ltrim         |"keyword|text"                                                                                                              |false                       |false           |false
@@ -447,5 +451,5 @@ countFunctions#[skip:-8.13.99]
 meta functions |  stats  a = count(*), b = count(*), c = count(*) |  mv_expand c;
 
 a:long | b:long | c:long
-100    | 100    | 100
+101    | 101    | 101
 ;

+ 112 - 0
x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

@@ -1168,3 +1168,115 @@ from employees | where emp_no == 10001 | eval split = split("fooMbar", gender) |
 gender:keyword | split:keyword 
 M              | [foo, bar]
 ;
+
+locate#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "ll");
+
+a:keyword | a_ll:integer
+hello | 3
+;
+
+locateFail#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "int");
+
+a:keyword | a_ll:integer
+hello | 0
+;
+
+locateZeroStart#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "ll", 0);
+
+a:keyword | a_ll:integer
+hello | 3
+;
+
+locateExactStart#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "ll", 3);
+
+a:keyword | a_ll:integer
+hello | 3
+;
+
+locateLongerStart#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "ll", 10);
+
+a:keyword | a_ll:integer
+hello | 0
+;
+
+locateLongerSubstr#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "farewell");
+
+a:keyword | a_ll:integer
+hello | 0
+;
+
+locateSame#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(a, "hello");
+
+a:keyword | a_ll:integer
+hello | 1
+;
+
+locateWithSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
+from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = locate(last_name, f_s) | keep emp_no, last_name, f_s, f_l;
+ignoreOrder:true
+
+emp_no:integer | last_name:keyword | f_s:keyword | f_l:integer
+10001 | Facello   | acello | 2
+10002 | Simmel    | immel | 2
+10003 | Bamford   | amford | 2
+10004 | Koblick   | oblick | 2
+10005 | Maliniak  | aliniak | 2
+10006 | Preusig   | reusig | 2
+10007 | Zielinski | ielinski | 2
+10008 | Kalloufi  | alloufi | 2
+10009 | Peac      | eac | 2
+10010 | Piveteau  | iveteau | 2
+;
+
+locateUtf16Emoji#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 3) | eval f_l = locate(a, f_s);
+
+a:keyword | f_s:keyword | f_l:integer
+🐱Meow!🐶Woof! | Meow!🐶Woof! | 3
+;
+
+locateNestedSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = substring(a, locate(a, "ll"));
+
+a:keyword | a_ll:keyword
+hello | llo
+;
+
+locateNestSubstring#[skip:-8.13.99,reason:new string function added in 8.14]
+row a = "hello" | eval a_ll = locate(substring(a, 2), "ll");
+
+a:keyword | a_ll:integer
+hello | 2
+;
+
+locateStats#[skip:-8.13.99,reason:new string function added in 8.14]
+from employees | where emp_no <= 10010 | eval f_l = locate(last_name, "ll") | stats min(f_l), max(f_l) by job_positions | sort job_positions | limit 5;
+
+min(f_l):integer | max(f_l):integer | job_positions:keyword
+5               | 5               | Accountant
+0               | 0               | Architect
+0               | 0               | Head Human Resources
+0               | 3               | Internship
+3               | 3               | Junior Developer
+;
+
+locateWarnings#[skip:-8.13.99,reason:new string function added in 8.14]
+required_feature: esql.mv_warn
+
+from hosts | where host=="epsilon" | eval l1 = locate(host_group, "ate"), l2 = locate(description, "ate") | keep l1, l2;
+ignoreOrder:true
+warning:Line 1:80: evaluation of [locate(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded.
+warning:Line 1:80: java.lang.IllegalArgumentException: single-value function encountered multi-value
+
+l1:integer | l2:integer
+2          | null
+2          | null
+null       | 0
+;

+ 166 - 0
x-pack/plugin/esql/src/main/java/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/LocateEvaluator.java

@@ -0,0 +1,166 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License
+// 2.0; you may not use this file except in compliance with the Elastic License
+// 2.0.
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import java.lang.IllegalArgumentException;
+import java.lang.Override;
+import java.lang.String;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.xpack.esql.expression.function.Warnings;
+import org.elasticsearch.xpack.ql.tree.Source;
+
+/**
+ * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Locate}.
+ * This class is generated. Do not edit it.
+ */
+public final class LocateEvaluator implements EvalOperator.ExpressionEvaluator {
+  private final Warnings warnings;
+
+  private final EvalOperator.ExpressionEvaluator str;
+
+  private final EvalOperator.ExpressionEvaluator substr;
+
+  private final EvalOperator.ExpressionEvaluator start;
+
+  private final DriverContext driverContext;
+
+  public LocateEvaluator(Source source, EvalOperator.ExpressionEvaluator str,
+      EvalOperator.ExpressionEvaluator substr, EvalOperator.ExpressionEvaluator start,
+      DriverContext driverContext) {
+    this.warnings = new Warnings(source);
+    this.str = str;
+    this.substr = substr;
+    this.start = start;
+    this.driverContext = driverContext;
+  }
+
+  @Override
+  public Block eval(Page page) {
+    try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) {
+      try (BytesRefBlock substrBlock = (BytesRefBlock) substr.eval(page)) {
+        try (IntBlock startBlock = (IntBlock) start.eval(page)) {
+          BytesRefVector strVector = strBlock.asVector();
+          if (strVector == null) {
+            return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
+          }
+          BytesRefVector substrVector = substrBlock.asVector();
+          if (substrVector == null) {
+            return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
+          }
+          IntVector startVector = startBlock.asVector();
+          if (startVector == null) {
+            return eval(page.getPositionCount(), strBlock, substrBlock, startBlock);
+          }
+          return eval(page.getPositionCount(), strVector, substrVector, startVector).asBlock();
+        }
+      }
+    }
+  }
+
+  public IntBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock substrBlock,
+      IntBlock startBlock) {
+    try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) {
+      BytesRef strScratch = new BytesRef();
+      BytesRef substrScratch = new BytesRef();
+      position: for (int p = 0; p < positionCount; p++) {
+        if (strBlock.isNull(p)) {
+          result.appendNull();
+          continue position;
+        }
+        if (strBlock.getValueCount(p) != 1) {
+          if (strBlock.getValueCount(p) > 1) {
+            warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
+          }
+          result.appendNull();
+          continue position;
+        }
+        if (substrBlock.isNull(p)) {
+          result.appendNull();
+          continue position;
+        }
+        if (substrBlock.getValueCount(p) != 1) {
+          if (substrBlock.getValueCount(p) > 1) {
+            warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
+          }
+          result.appendNull();
+          continue position;
+        }
+        if (startBlock.isNull(p)) {
+          result.appendNull();
+          continue position;
+        }
+        if (startBlock.getValueCount(p) != 1) {
+          if (startBlock.getValueCount(p) > 1) {
+            warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value"));
+          }
+          result.appendNull();
+          continue position;
+        }
+        result.appendInt(Locate.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), substrBlock.getBytesRef(substrBlock.getFirstValueIndex(p), substrScratch), startBlock.getInt(startBlock.getFirstValueIndex(p))));
+      }
+      return result.build();
+    }
+  }
+
+  public IntVector eval(int positionCount, BytesRefVector strVector, BytesRefVector substrVector,
+      IntVector startVector) {
+    try(IntVector.Builder result = driverContext.blockFactory().newIntVectorBuilder(positionCount)) {
+      BytesRef strScratch = new BytesRef();
+      BytesRef substrScratch = new BytesRef();
+      position: for (int p = 0; p < positionCount; p++) {
+        result.appendInt(Locate.process(strVector.getBytesRef(p, strScratch), substrVector.getBytesRef(p, substrScratch), startVector.getInt(p)));
+      }
+      return result.build();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
+  }
+
+  @Override
+  public void close() {
+    Releasables.closeExpectNoException(str, substr, start);
+  }
+
+  static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
+    private final Source source;
+
+    private final EvalOperator.ExpressionEvaluator.Factory str;
+
+    private final EvalOperator.ExpressionEvaluator.Factory substr;
+
+    private final EvalOperator.ExpressionEvaluator.Factory start;
+
+    public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str,
+        EvalOperator.ExpressionEvaluator.Factory substr,
+        EvalOperator.ExpressionEvaluator.Factory start) {
+      this.source = source;
+      this.str = str;
+      this.substr = substr;
+      this.start = start;
+    }
+
+    @Override
+    public LocateEvaluator get(DriverContext context) {
+      return new LocateEvaluator(source, str.get(context), substr.get(context), start.get(context), context);
+    }
+
+    @Override
+    public String toString() {
+      return "LocateEvaluator[" + "str=" + str + ", substr=" + substr + ", start=" + start + "]";
+    }
+  }
+}

+ 3 - 1
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

@@ -90,6 +90,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Right;
@@ -174,7 +175,8 @@ public final class EsqlFunctionRegistry extends FunctionRegistry {
                 def(StartsWith.class, StartsWith::new, "starts_with"),
                 def(EndsWith.class, EndsWith::new, "ends_with"),
                 def(ToLower.class, ToLower::new, "to_lower"),
-                def(ToUpper.class, ToUpper::new, "to_upper") },
+                def(ToUpper.class, ToUpper::new, "to_upper"),
+                def(Locate.class, Locate::new, "locate") },
             // date
             new FunctionDefinition[] {
                 def(DateDiff.class, DateDiff::new, "date_diff"),

+ 140 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Locate.java

@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.compute.ann.Evaluator;
+import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
+import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
+import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.ql.tree.NodeInfo;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.elasticsearch.xpack.ql.type.DataTypes;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Function;
+
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.FIRST;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.SECOND;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.ParamOrdinal.THIRD;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isInteger;
+import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
+
+/**
+ * Locate function, given a string 'a' and a substring 'b', it returns the index of the first occurrence of the substring 'b' in 'a'.
+ */
+public class Locate extends EsqlScalarFunction implements OptionalArgument {
+
+    private final Expression str;
+    private final Expression substr;
+    private final Expression start;
+
+    @FunctionInfo(
+        returnType = "integer",
+        description = "Returns an integer that indicates the position of a keyword substring within another string"
+    )
+    public Locate(
+        Source source,
+        @Param(name = "string", type = { "keyword", "text" }, description = "An input string") Expression str,
+        @Param(
+            name = "substring",
+            type = { "keyword", "text" },
+            description = "A substring to locate in the input string"
+        ) Expression substr,
+        @Param(optional = true, name = "start", type = { "integer" }, description = "The start index") Expression start
+    ) {
+        super(source, start == null ? Arrays.asList(str, substr) : Arrays.asList(str, substr, start));
+        this.str = str;
+        this.substr = substr;
+        this.start = start;
+    }
+
+    @Override
+    public DataType dataType() {
+        return DataTypes.INTEGER;
+    }
+
+    @Override
+    protected TypeResolution resolveType() {
+        if (childrenResolved() == false) {
+            return new TypeResolution("Unresolved children");
+        }
+
+        TypeResolution resolution = isString(str, sourceText(), FIRST);
+        if (resolution.unresolved()) {
+            return resolution;
+        }
+        resolution = isString(substr, sourceText(), SECOND);
+        if (resolution.unresolved()) {
+            return resolution;
+        }
+
+        return start == null ? TypeResolution.TYPE_RESOLVED : isInteger(start, sourceText(), THIRD);
+    }
+
+    @Override
+    public boolean foldable() {
+        return str.foldable() && substr.foldable() && (start == null || start.foldable());
+    }
+
+    @Evaluator
+    static int process(BytesRef str, BytesRef substr, int start) {
+        if (str == null || substr == null || str.length < substr.length) {
+            return 0;
+        }
+        int codePointCount = UnicodeUtil.codePointCount(str);
+        int indexStart = indexStart(codePointCount, start);
+        String utf8ToString = str.utf8ToString();
+        return 1 + utf8ToString.indexOf(substr.utf8ToString(), utf8ToString.offsetByCodePoints(0, indexStart));
+    }
+
+    @Evaluator(extraName = "NoStart")
+    static int process(BytesRef str, BytesRef substr) {
+        return process(str, substr, 0);
+    }
+
+    private static int indexStart(int codePointCount, int start) {
+        // esql is 1-based when it comes to string manipulation. We treat start = 0 and 1 the same
+        // a negative value is relative to the end of the string
+        int indexStart;
+        if (start > 0) {
+            indexStart = start - 1;
+        } else if (start < 0) {
+            indexStart = codePointCount + start; // start is negative, so this is a subtraction
+        } else {
+            indexStart = start; // start == 0
+        }
+        return Math.min(Math.max(0, indexStart), codePointCount); // sanitise string start index
+    }
+
+    @Override
+    public Expression replaceChildren(List<Expression> newChildren) {
+        return new Locate(source(), newChildren.get(0), newChildren.get(1), start == null ? null : newChildren.get(2));
+    }
+
+    @Override
+    protected NodeInfo<? extends Expression> info() {
+        return NodeInfo.create(this, Locate::new, str, substr, start);
+    }
+
+    @Override
+    public ExpressionEvaluator.Factory toEvaluator(Function<Expression, ExpressionEvaluator.Factory> toEvaluator) {
+        ExpressionEvaluator.Factory strExpr = toEvaluator.apply(str);
+        ExpressionEvaluator.Factory substrExpr = toEvaluator.apply(substr);
+        if (start == null) {
+            return new LocateNoStartEvaluator.Factory(source(), strExpr, substrExpr);
+        }
+        return new LocateEvaluator.Factory(source(), strExpr, substrExpr, toEvaluator.apply(start));
+    }
+}

+ 15 - 0
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java

@@ -113,6 +113,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.RLike;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace;
@@ -400,6 +401,7 @@ public final class PlanNamedTypes {
             of(ScalarFunction.class, SpatialContains.class, PlanNamedTypes::writeSpatialRelatesFunction, PlanNamedTypes::readContains),
             of(ScalarFunction.class, SpatialWithin.class, PlanNamedTypes::writeSpatialRelatesFunction, PlanNamedTypes::readWithin),
             of(ScalarFunction.class, Substring.class, PlanNamedTypes::writeSubstring, PlanNamedTypes::readSubstring),
+            of(ScalarFunction.class, Locate.class, PlanNamedTypes::writeLocate, PlanNamedTypes::readLocate),
             of(ScalarFunction.class, Left.class, PlanNamedTypes::writeLeft, PlanNamedTypes::readLeft),
             of(ScalarFunction.class, Right.class, PlanNamedTypes::writeRight, PlanNamedTypes::readRight),
             of(ScalarFunction.class, Split.class, PlanNamedTypes::writeSplit, PlanNamedTypes::readSplit),
@@ -1592,6 +1594,19 @@ public final class PlanNamedTypes {
         out.writeOptionalWriteable(fields.size() == 3 ? o -> out.writeExpression(fields.get(2)) : null);
     }
 
+    static Locate readLocate(PlanStreamInput in) throws IOException {
+        return new Locate(in.readSource(), in.readExpression(), in.readExpression(), in.readOptionalNamed(Expression.class));
+    }
+
+    static void writeLocate(PlanStreamOutput out, Locate locate) throws IOException {
+        out.writeSource(locate.source());
+        List<Expression> fields = locate.children();
+        assert fields.size() == 2 || fields.size() == 3;
+        out.writeExpression(fields.get(0));
+        out.writeExpression(fields.get(1));
+        out.writeOptionalWriteable(fields.size() == 3 ? o -> out.writeExpression(fields.get(2)) : null);
+    }
+
     static Replace readReplace(PlanStreamInput in) throws IOException {
         return new Replace(Source.EMPTY, in.readExpression(), in.readExpression(), in.readExpression());
     }

+ 175 - 0
x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/LocateTests.java

@@ -0,0 +1,175 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase;
+import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.expression.Literal;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.elasticsearch.xpack.ql.type.DataTypes;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
+import static org.hamcrest.Matchers.equalTo;
+
+/**
+ * Tests for {@link Locate} function.
+ */
+public class LocateTests extends AbstractFunctionTestCase {
+    public LocateTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> testCaseSupplier) {
+        this.testCase = testCaseSupplier.get();
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() {
+        List<TestCaseSupplier> suppliers = new ArrayList<>();
+        suppliers.add(
+            supplier(
+                "keywords",
+                DataTypes.KEYWORD,
+                DataTypes.KEYWORD,
+                () -> randomRealisticUnicodeOfCodepointLength(10),
+                () -> randomRealisticUnicodeOfCodepointLength(2),
+                () -> 0
+            )
+        );
+        suppliers.add(
+            supplier(
+                "mixed keyword, text",
+                DataTypes.KEYWORD,
+                DataTypes.TEXT,
+                () -> randomRealisticUnicodeOfCodepointLength(10),
+                () -> randomRealisticUnicodeOfCodepointLength(2),
+                () -> 0
+            )
+        );
+        suppliers.add(
+            supplier(
+                "texts",
+                DataTypes.TEXT,
+                DataTypes.TEXT,
+                () -> randomRealisticUnicodeOfCodepointLength(10),
+                () -> randomRealisticUnicodeOfCodepointLength(2),
+                () -> 0
+            )
+        );
+        suppliers.add(
+            supplier(
+                "mixed text, keyword",
+                DataTypes.TEXT,
+                DataTypes.KEYWORD,
+                () -> randomRealisticUnicodeOfCodepointLength(10),
+                () -> randomRealisticUnicodeOfCodepointLength(2),
+                () -> 0
+            )
+        );
+        return parameterSuppliersFromTypedData(errorsForCasesWithoutExamples(anyNullIsNull(true, suppliers)));
+    }
+
+    public void testToString() {
+        assertThat(
+            evaluator(
+                new Locate(
+                    Source.EMPTY,
+                    field("str", DataTypes.KEYWORD),
+                    field("substr", DataTypes.KEYWORD),
+                    field("start", DataTypes.INTEGER)
+                )
+            ).get(driverContext()).toString(),
+            equalTo("LocateEvaluator[str=Attribute[channel=0], substr=Attribute[channel=1], start=Attribute[channel=2]]")
+        );
+    }
+
+    @Override
+    protected Expression build(Source source, List<Expression> args) {
+        return new Locate(source, args.get(0), args.get(1), args.size() < 3 ? null : args.get(2));
+    }
+
+    public void testPrefixString() {
+        assertThat(process("a tiger", "a t", 0), equalTo(1));
+        assertThat(process("a tiger", "a", 0), equalTo(1));
+        assertThat(process("界世", "界", 0), equalTo(1));
+    }
+
+    public void testSuffixString() {
+        assertThat(process("a tiger", "er", 0), equalTo(6));
+        assertThat(process("a tiger", "r", 0), equalTo(7));
+        assertThat(process("世界", "界", 0), equalTo(2));
+    }
+
+    public void testMidString() {
+        assertThat(process("a tiger", "ti", 0), equalTo(3));
+        assertThat(process("a tiger", "ige", 0), equalTo(4));
+        assertThat(process("世界世", "界", 0), equalTo(2));
+    }
+
+    public void testOutOfRange() {
+        assertThat(process("a tiger", "tigers", 0), equalTo(0));
+        assertThat(process("a tiger", "ipa", 0), equalTo(0));
+        assertThat(process("世界世", "\uD83C\uDF0D", 0), equalTo(0));
+    }
+
+    public void testExactString() {
+        assertThat(process("a tiger", "a tiger", 0), equalTo(1));
+        assertThat(process("tigers", "tigers", 0), equalTo(1));
+        assertThat(process("界世", "界世", 0), equalTo(1));
+    }
+
+    private Integer process(String str, String substr, Integer start) {
+        try (
+            EvalOperator.ExpressionEvaluator eval = evaluator(
+                new Locate(
+                    Source.EMPTY,
+                    field("str", DataTypes.KEYWORD),
+                    field("substr", DataTypes.KEYWORD),
+                    new Literal(Source.EMPTY, start, DataTypes.INTEGER)
+                )
+            ).get(driverContext());
+            Block block = eval.eval(row(List.of(new BytesRef(str), new BytesRef(substr))))
+        ) {
+            return block.isNull(0) ? Integer.valueOf(0) : ((Integer) toJavaObject(block, 0));
+        }
+    }
+
+    private static TestCaseSupplier supplier(
+        String name,
+        DataType firstType,
+        DataType secondType,
+        Supplier<String> strValueSupplier,
+        Supplier<String> substrValueSupplier,
+        Supplier<Integer> startSupplier
+    ) {
+        return new TestCaseSupplier(name, List.of(firstType, secondType), () -> {
+            List<TestCaseSupplier.TypedData> values = new ArrayList<>();
+            String expectedToString = "LocateEvaluator[str=Attribute[channel=0], substr=Attribute[channel=1], start=Attribute[channel=2]]";
+
+            String value = strValueSupplier.get();
+            values.add(new TestCaseSupplier.TypedData(new BytesRef(value), firstType, "0"));
+
+            String substrValue = substrValueSupplier.get();
+            values.add(new TestCaseSupplier.TypedData(new BytesRef(substrValue), secondType, "1"));
+
+            Integer startValue = startSupplier.get();
+            values.add(new TestCaseSupplier.TypedData(startValue, DataTypes.INTEGER, "2"));
+
+            int expectedValue = 1 + value.indexOf(substrValue);
+            return new TestCaseSupplier.TestCase(values, expectedToString, DataTypes.INTEGER, equalTo(expectedValue));
+        });
+    }
+}