1
0
Эх сурвалжийг харах

SQL: Implement TRIM function (#57518)

Add `TRIM` function which combines the functionality of both
`LTRIM` and `RTRIM` by stripping both leading and trailing
whitespaces.

Refers to #41195
Marios Trivyzas 5 жил өмнө
parent
commit
6c86c919e1

+ 1 - 0
docs/reference/sql/functions/index.asciidoc

@@ -129,6 +129,7 @@
 ** <<sql-functions-string-rtrim>>
 ** <<sql-functions-string-space>>
 ** <<sql-functions-string-substring>>
+** <<sql-functions-string-trim>>
 ** <<sql-functions-string-ucase>>
 * <<sql-functions-type-conversion>>
 ** <<sql-functions-type-conversion-cast>>

+ 20 - 0
docs/reference/sql/functions/string.asciidoc

@@ -477,6 +477,26 @@ SUBSTRING(
 --------------------------------------------------
 include-tagged::{sql-specs}/docs/docs.csv-spec[stringSubString]
 --------------------------------------------------
+[[sql-functions-string-trim]]
+==== `TRIM`
+
+.Synopsis:
+[source, sql]
+--------------------------------------------------
+TRIM(string_exp) <1>
+--------------------------------------------------
+*Input*:
+
+<1> string expression
+
+*Output*: string
+
+*Description*: Returns the characters of `string_exp`, with leading and trailing blanks removed.
+
+[source, sql]
+--------------------------------------------------
+include-tagged::{sql-specs}/docs/docs.csv-spec[stringTrim]
+--------------------------------------------------
 
 [[sql-functions-string-ucase]]
 ==== `UCASE`

+ 2 - 1
x-pack/plugin/sql/qa/server/src/main/resources/command.csv-spec

@@ -147,7 +147,8 @@ RIGHT            |SCALAR
 RTRIM            |SCALAR         
 SPACE            |SCALAR         
 STARTS_WITH      |SCALAR
-SUBSTRING        |SCALAR         
+SUBSTRING        |SCALAR
+TRIM             |SCALAR
 UCASE            |SCALAR
 CAST             |SCALAR
 CONVERT          |SCALAR

+ 12 - 1
x-pack/plugin/sql/qa/server/src/main/resources/docs/docs.csv-spec

@@ -343,7 +343,8 @@ RIGHT            |SCALAR
 RTRIM            |SCALAR         
 SPACE            |SCALAR         
 STARTS_WITH      |SCALAR
-SUBSTRING        |SCALAR         
+SUBSTRING        |SCALAR
+TRIM             |SCALAR
 UCASE            |SCALAR
 CAST             |SCALAR
 CONVERT          |SCALAR
@@ -1842,6 +1843,16 @@ Elastic
 // end::stringSubString
 ;
 
+stringTrim
+// tag::stringTrim
+SELECT TRIM('   Elastic   ') AS trimmed;
+
+trimmed
+--------------
+Elastic
+// end::stringTrim
+;
+
 stringUCase
 // tag::stringUCase
 SELECT UCASE('Elastic');

+ 17 - 0
x-pack/plugin/sql/qa/server/src/main/resources/string-functions.sql-spec

@@ -154,6 +154,12 @@ SELECT SUBSTRING('Elasticsearch', 1, 15) sub;
 substringInline3
 SELECT SUBSTRING('Elasticsearch', 10, 10) sub;
 
+trimFilter
+SELECT TRIM(CONCAT(CONCAT('   ', first_name), '   ')) trimmed FROM "test_emp" WHERE TRIM(CONCAT(CONCAT('   ', first_name), '   ')) = 'Bob';
+
+trimInline
+SELECT TRIM('   Elastic   ') trimmed1, TRIM('             ') trimmed2;
+
 ucaseFilter
 SELECT UCASE(gender) uppercased, COUNT(*) count FROM "test_emp" WHERE UCASE(gender) = 'F' GROUP BY UCASE(gender);
 
@@ -188,6 +194,17 @@ SELECT RTRIM(first_name) rt FROM "test_emp" GROUP BY RTRIM(first_name) HAVING CO
 ltrimGroupByAndOrderBy
 SELECT LTRIM(first_name) lt FROM "test_emp" GROUP BY LTRIM(first_name) HAVING COUNT(*)>1;
 
+trimOrderBy
+SELECT TRIM(CONCAT(CONCAT('    ', first_name), '    ')) trimmed FROM "test_emp" ORDER BY 1;
+
+trimGroupBy
+SELECT TRIM(CONCAT(CONCAT('    ', first_name), '    ')) trimmed FROM "test_emp" GROUP BY TRIM(CONCAT(CONCAT('    ', first_name), '    ')) ORDER BY 1;
+
+// Having on MAX/MIN(<string>) not supported: https://github.com/elastic/elasticsearch/issues/37938
+trimGroupByAndHaving-Ignore
+SELECT MAX(CONCAT(CONCAT('   ', first_name), '    ')) max trimmed FROM "test_emp" GROUP BY gender HAVING MAX(CONCAT(CONCAT('   ', gender), '    ')) = 'Zvonko' ORDER BY 1;
+
+
 spaceGroupByWithCharLength
 SELECT CAST(CHAR_LENGTH(SPACE(languages)) AS INT) cls FROM "test_emp" GROUP BY CHAR_LENGTH(SPACE(languages)) ORDER BY CHAR_LENGTH(SPACE(languages)) ASC;
 

+ 2 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/SqlFunctionRegistry.java

@@ -106,6 +106,7 @@ import org.elasticsearch.xpack.sql.expression.function.scalar.string.Replace;
 import org.elasticsearch.xpack.sql.expression.function.scalar.string.Right;
 import org.elasticsearch.xpack.sql.expression.function.scalar.string.Space;
 import org.elasticsearch.xpack.sql.expression.function.scalar.string.Substring;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.Trim;
 import org.elasticsearch.xpack.sql.expression.function.scalar.string.UCase;
 import org.elasticsearch.xpack.sql.expression.predicate.conditional.Case;
 import org.elasticsearch.xpack.sql.expression.predicate.conditional.Coalesce;
@@ -243,6 +244,7 @@ public class SqlFunctionRegistry extends FunctionRegistry {
                 def(Space.class, Space::new, "SPACE"),
                 def(StartsWith.class, StartsWith::new, "STARTS_WITH"),
                 def(Substring.class, Substring::new, "SUBSTRING"),
+                def(Trim.class, Trim::new, "TRIM"),
                 def(UCase.class, UCase::new, "UCASE")
             },
         // DataType conversion

+ 5 - 3
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/string/StringFunctionUtils.java

@@ -7,7 +7,9 @@ package org.elasticsearch.xpack.sql.expression.function.scalar.string;
 
 import static org.elasticsearch.common.Strings.hasLength;
 
-abstract class StringFunctionUtils {
+final class StringFunctionUtils {
+
+    private StringFunctionUtils() {}
 
     /**
      * Extract a substring from the given string, using start index and length of the extracted substring.
@@ -41,7 +43,7 @@ abstract class StringFunctionUtils {
      * @return the resulting String
      */
     static String trimTrailingWhitespaces(String s) {
-        if (!hasLength(s)) {
+        if (hasLength(s) == false) {
             return s;
         }
 
@@ -60,7 +62,7 @@ abstract class StringFunctionUtils {
      * @return the resulting String
      */
     static String trimLeadingWhitespaces(String s) {
-        if (!hasLength(s)) {
+        if (hasLength(s) == false) {
             return s;
         }
 

+ 2 - 1
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/string/StringProcessor.java

@@ -53,6 +53,7 @@ public class StringProcessor implements Processor {
         LENGTH((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s).length()),
         RTRIM((String s) -> StringFunctionUtils.trimTrailingWhitespaces(s)),
         LTRIM((String s) -> StringFunctionUtils.trimLeadingWhitespaces(s)),
+        TRIM(String::trim),
         SPACE((Number n) -> {
             int i = n.intValue();
             if (i < 0) {
@@ -141,4 +142,4 @@ public class StringProcessor implements Processor {
     public String toString() {
         return processor.toString();
     }
-}
+}

+ 43 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/string/Trim.java

@@ -0,0 +1,43 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.sql.expression.function.scalar.string;
+
+import org.elasticsearch.xpack.ql.expression.Expression;
+import org.elasticsearch.xpack.ql.tree.NodeInfo;
+import org.elasticsearch.xpack.ql.tree.Source;
+import org.elasticsearch.xpack.ql.type.DataType;
+import org.elasticsearch.xpack.ql.type.DataTypes;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.StringProcessor.StringOperation;
+
+/**
+ * Trims both leading and trailing whitespaces.
+ */
+public class Trim extends UnaryStringFunction {
+
+    public Trim(Source source, Expression field) {
+        super(source, field);
+    }
+
+    @Override
+    protected NodeInfo<Trim> info() {
+        return NodeInfo.create(this, Trim::new, field());
+    }
+
+    @Override
+    protected Trim replaceChild(Expression newChild) {
+        return new Trim(source(), newChild);
+    }
+
+    @Override
+    protected StringOperation operation() {
+        return StringOperation.TRIM;
+    }
+
+    @Override
+    public DataType dataType() {
+        return DataTypes.KEYWORD;
+    }
+}

+ 4 - 0
x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/whitelist/InternalSqlScriptUtils.java

@@ -425,6 +425,10 @@ public class InternalSqlScriptUtils extends InternalQlScriptUtils {
         return (String) SubstringFunctionProcessor.doProcess(s, start, length);
     }
 
+    public static String trim(String s) {
+        return (String) StringOperation.TRIM.apply(s);
+    }
+
     public static String ucase(String s) {
         return (String) StringOperation.UCASE.apply(s);
     }

+ 1 - 0
x-pack/plugin/sql/src/main/resources/org/elasticsearch/xpack/sql/plugin/sql_whitelist.txt

@@ -164,6 +164,7 @@ class org.elasticsearch.xpack.sql.expression.function.scalar.whitelist.InternalS
   String  rtrim(String)
   String  space(Number)
   String  substring(String, Number, Number)
+  String  trim(String)
   String  ucase(String)
 
 #

+ 41 - 6
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/function/scalar/string/StringFunctionProcessorTests.java

@@ -148,9 +148,10 @@ public class StringFunctionProcessorTests extends AbstractWireSerializingTestCas
         assertNull(proc.process(null));
         assertEquals("foo bar", proc.process("foo bar"));
         assertEquals("", proc.process(""));
-        assertEquals("", proc.process("    "));
-        assertEquals("foo bar", proc.process("foo bar   "));
-        assertEquals("   foo bar", proc.process("   foo bar   "));
+        assertEquals("", proc.process(withRandomWhitespaces(" \t  \r\n \n ", true, true)));
+        assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", false, true)));
+        assertEquals("    foo   bar", proc.process(withRandomWhitespaces("    foo   bar", false, true)));
+        assertEquals(" \t \n \r\n foo \t \r\n \n bar", proc.process(withRandomWhitespaces(" \t \n \r\n foo \t \r\n \n bar", false, true)));
         assertEquals("f", proc.process('f'));
 
         stringCharInputValidation(proc);
@@ -161,9 +162,25 @@ public class StringFunctionProcessorTests extends AbstractWireSerializingTestCas
         assertNull(proc.process(null));
         assertEquals("foo bar", proc.process("foo bar"));
         assertEquals("", proc.process(""));
-        assertEquals("", proc.process("    "));
-        assertEquals("foo bar", proc.process("   foo bar"));
-        assertEquals("foo bar   ", proc.process("   foo bar   "));
+        assertEquals("", proc.process(withRandomWhitespaces(" \t  \r\n \n ", true, true)));
+        assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", true, false)));
+        assertEquals("foo   bar   ", proc.process(withRandomWhitespaces("foo   bar   ", true, false)));
+        assertEquals("foo \t \r\n \n bar \t \r\n \n ", proc.process(withRandomWhitespaces("foo \t \r\n \n bar \t \r\n \n ", true, false)));
+        assertEquals("f", proc.process('f'));
+
+        stringCharInputValidation(proc);
+    }
+
+    public void testTrim() {
+        StringProcessor proc = new StringProcessor(StringOperation.TRIM);
+        assertNull(proc.process(null));
+        assertEquals("foo bar", proc.process("foo bar"));
+        assertEquals("", proc.process(""));
+        assertEquals("", proc.process(withRandomWhitespaces(" \t  \r\n \n ", true, true)));
+        assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar", true, false)));
+        assertEquals("foo   bar", proc.process(withRandomWhitespaces("foo   bar", false, true)));
+        assertEquals("foo bar", proc.process(withRandomWhitespaces("foo bar",true, true)));
+        assertEquals("foo \t \r\n \n bar", proc.process(withRandomWhitespaces("foo \t \r\n \n bar", true, true)));
         assertEquals("f", proc.process('f'));
 
         stringCharInputValidation(proc);
@@ -215,4 +232,22 @@ public class StringFunctionProcessorTests extends AbstractWireSerializingTestCas
 
         stringCharInputValidation(proc);
     }
+
+    private static String withRandomWhitespaces(String str, boolean prefix, boolean suffix) {
+        if (prefix == false && suffix == false) {
+            return str;
+        }
+
+        StringBuilder sb = new StringBuilder(str);
+        int noWhitespaces = randomIntBetween(1, 100);
+        for (int i = 0; i < noWhitespaces; i++) {
+            if (prefix) {
+                sb.insert(0, randomFrom(" ", "\t", "\n", "\r", "\r\n"));
+            }
+            if (suffix) {
+                sb.append(randomFrom(" ", "\t", "\n", "\r", "\r\n"));
+            }
+        }
+        return sb.toString();
+    }
 }

+ 50 - 0
x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

@@ -56,6 +56,10 @@ import org.elasticsearch.xpack.sql.expression.function.scalar.Cast;
 import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeProcessor.DateTimeExtractor;
 import org.elasticsearch.xpack.sql.expression.function.scalar.math.MathProcessor.MathOperation;
 import org.elasticsearch.xpack.sql.expression.function.scalar.math.Round;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.LTrim;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.RTrim;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.Trim;
+import org.elasticsearch.xpack.sql.expression.function.scalar.string.UnaryStringFunction;
 import org.elasticsearch.xpack.sql.optimizer.Optimizer;
 import org.elasticsearch.xpack.sql.parser.SqlParser;
 import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
@@ -741,6 +745,52 @@ public class QueryTranslatorTests extends ESTestCase {
         assertEquals("[{v=keyword}, {v=xyz}, {v=true}]", sq.script().params().toString());
     }
 
+    @SuppressWarnings("unchecked")
+    public void testTrim_WhereClause_Painless() {
+        Class<? extends UnaryStringFunction> trimFunction = randomFrom(Trim.class, LTrim.class, RTrim.class);
+        String trimFunctionName = trimFunction.getSimpleName().toUpperCase(Locale.ROOT);
+        LogicalPlan p = plan("SELECT " + trimFunctionName + "(keyword) trimmed FROM test WHERE " + trimFunctionName + "(keyword) = 'foo'");
+
+        assertTrue(p instanceof Project);
+        p = ((Project) p).child();
+        assertTrue(p instanceof Filter);
+        Expression condition = ((Filter) p).condition();
+        QueryTranslation qt = translate(condition);
+        assertTrue(qt.query instanceof ScriptQuery);
+        ScriptQuery sc = (ScriptQuery) qt.query;
+        assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.eq(InternalSqlScriptUtils." +
+                trimFunctionName.toLowerCase(Locale.ROOT) + "(InternalQlScriptUtils.docValue(doc,params.v0)),params.v1))",
+            sc.script().toString());
+        assertEquals("[{v=keyword}, {v=foo}]", sc.script().params().toString());
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testTrim_GroupBy_Painless() {
+        Class<? extends UnaryStringFunction> trimFunction = randomFrom(Trim.class, LTrim.class, RTrim.class);
+        String trimFunctionName = trimFunction.getSimpleName().toUpperCase(Locale.ROOT);
+        LogicalPlan p = plan("SELECT " + trimFunctionName + "(keyword) trimmed, count(*) FROM test GROUP BY " +
+            trimFunctionName + "(keyword)");
+
+        assertEquals(Aggregate.class, p.getClass());
+        Aggregate agg = (Aggregate) p;
+        assertEquals(1, agg.groupings().size());
+        assertEquals(2, agg.aggregates().size());
+        assertEquals(trimFunction, agg.groupings().get(0).getClass());
+        assertEquals(trimFunction, ((Alias) agg.aggregates().get(0)).child().getClass());
+        assertEquals(Count.class,((Alias) agg.aggregates().get(1)).child().getClass());
+
+        UnaryStringFunction trim = (UnaryStringFunction) agg.groupings().get(0);
+        assertEquals(1, trim.children().size());
+
+        GroupingContext groupingContext = QueryFolder.FoldAggregate.groupBy(agg.groupings());
+        assertNotNull(groupingContext);
+        ScriptTemplate scriptTemplate = groupingContext.tail.script();
+        assertEquals("InternalSqlScriptUtils." + trimFunctionName.toLowerCase(Locale.ROOT) +
+                "(InternalQlScriptUtils.docValue(doc,params.v0))",
+            scriptTemplate.toString());
+        assertEquals("[{v=keyword}]", scriptTemplate.params().toString());
+    }
+
     public void testTranslateNotExpression_WhereClause_Painless() {
         LogicalPlan p = plan("SELECT * FROM test WHERE NOT(POSITION('x', keyword) = 0)");
         assertTrue(p instanceof Project);