Browse Source

Add support for multi-value input to DISSECT command (ESQL-1484)

Luigi Dell'Aquila 2 years ago
parent
commit
1e3f92b7c9

+ 46 - 9
x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/StringExtractOperator.java

@@ -72,18 +72,55 @@ public class StringExtractOperator extends AbstractPageMappingOperator {
                 continue;
             }
 
-            // For now more than a single input value will just read the first one
             int position = input.getFirstValueIndex(row);
-            Map<String, String> items = parser.apply(input.getBytesRef(position, spare).utf8ToString());
-            if (items == null) {
+            int valueCount = input.getValueCount(row);
+            if (valueCount == 1) {
+                Map<String, String> items = parser.apply(input.getBytesRef(position, spare).utf8ToString());
+                if (items == null) {
+                    for (int i = 0; i < fieldNames.length; i++) {
+                        blockBuilders[i].appendNull();
+                    }
+                    continue;
+                }
                 for (int i = 0; i < fieldNames.length; i++) {
-                    blockBuilders[i].appendNull();
+                    String val = items.get(fieldNames[i]);
+                    BlockUtils.appendValue(blockBuilders[i], val, ElementType.BYTES_REF);
+                }
+            } else {
+                // multi-valued input
+                String[] firstValues = new String[fieldNames.length];
+                boolean[] positionEntryOpen = new boolean[fieldNames.length];
+                for (int c = 0; c < valueCount; c++) {
+                    Map<String, String> items = parser.apply(input.getBytesRef(position + c, spare).utf8ToString());
+                    if (items == null) {
+                        continue;
+                    }
+                    for (int i = 0; i < fieldNames.length; i++) {
+                        String val = items.get(fieldNames[i]);
+                        if (val == null) {
+                            continue;
+                        }
+                        if (firstValues[i] == null) {
+                            firstValues[i] = val;
+                        } else {
+                            if (positionEntryOpen[i] == false) {
+                                positionEntryOpen[i] = true;
+                                blockBuilders[i].beginPositionEntry();
+                                BlockUtils.appendValue(blockBuilders[i], firstValues[i], ElementType.BYTES_REF);
+                            }
+                            BlockUtils.appendValue(blockBuilders[i], val, ElementType.BYTES_REF);
+                        }
+                    }
+                }
+                for (int i = 0; i < fieldNames.length; i++) {
+                    if (positionEntryOpen[i]) {
+                        blockBuilders[i].endPositionEntry();
+                    } else if (firstValues[i] == null) {
+                        blockBuilders[i].appendNull();
+                    } else {
+                        BlockUtils.appendValue(blockBuilders[i], firstValues[i], ElementType.BYTES_REF);
+                    }
                 }
-                continue;
-            }
-            for (int i = 0; i < fieldNames.length; i++) {
-                String val = items.get(fieldNames[i]);
-                BlockUtils.appendValue(blockBuilders[i], val, ElementType.BYTES_REF);
             }
         }
 

+ 39 - 0
x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/StringExtractOperatorTests.java

@@ -10,6 +10,7 @@ package org.elasticsearch.compute.operator;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.data.Block;
 import org.elasticsearch.compute.data.BytesRefBlock;
 import org.elasticsearch.compute.data.Page;
 
@@ -20,6 +21,8 @@ import java.util.function.Supplier;
 import java.util.stream.Collectors;
 import java.util.stream.LongStream;
 
+import static org.hamcrest.Matchers.equalTo;
+
 public class StringExtractOperatorTests extends OperatorTestCase {
     @Override
     protected SourceOperator simpleInput(int end) {
@@ -71,4 +74,40 @@ public class StringExtractOperatorTests extends OperatorTestCase {
         assumeTrue("doesn't use big arrays so can't break", false);
         return null;
     }
+
+    public void testMultivalueDissectInput() {
+
+        StringExtractOperator operator = new StringExtractOperator(
+            new String[] { "test" },
+            (page) -> page.getBlock(0),
+            new FirstWord("test")
+        );
+
+        BytesRefBlock.Builder builder = BytesRefBlock.newBlockBuilder(1);
+        builder.beginPositionEntry();
+        builder.appendBytesRef(new BytesRef("foo1 bar1"));
+        builder.appendBytesRef(new BytesRef("foo2 bar2"));
+        builder.endPositionEntry();
+        builder.beginPositionEntry();
+        builder.appendBytesRef(new BytesRef("foo3 bar3"));
+        builder.appendBytesRef(new BytesRef("foo4 bar4"));
+        builder.appendBytesRef(new BytesRef("foo5 bar5"));
+        builder.endPositionEntry();
+        Page page = new Page(builder.build());
+
+        Page result = operator.process(page);
+        Block resultBlock = result.getBlock(1);
+        assertThat(resultBlock.getPositionCount(), equalTo(2));
+        assertThat(resultBlock.getValueCount(0), equalTo(2));
+        assertThat(resultBlock.getValueCount(1), equalTo(3));
+        BytesRefBlock brb = (BytesRefBlock) resultBlock;
+        BytesRef spare = new BytesRef("");
+        int idx = brb.getFirstValueIndex(0);
+        assertThat(brb.getBytesRef(idx, spare).utf8ToString(), equalTo("foo1"));
+        assertThat(brb.getBytesRef(idx + 1, spare).utf8ToString(), equalTo("foo2"));
+        idx = brb.getFirstValueIndex(1);
+        assertThat(brb.getBytesRef(idx, spare).utf8ToString(), equalTo("foo3"));
+        assertThat(brb.getBytesRef(idx + 1, spare).utf8ToString(), equalTo("foo4"));
+        assertThat(brb.getBytesRef(idx + 2, spare).utf8ToString(), equalTo("foo5"));
+    }
 }

+ 7 - 7
x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec

@@ -146,11 +146,11 @@ Bezalel Simmel    | Bezalel        | Simmel
 multivalueInput
 from employees | where emp_no <= 10006 | dissect job_positions "%{a} %{b} %{c}" | sort emp_no | keep emp_no, a, b, c;
 
-emp_no:integer | a:keyword | b:keyword | c:keyword
-10001          | null      | null      | null
-10002          | Senior    | Team      | Lead
-10003          | null      | null      | null
-10004          | Head      | Human     | Resources
-10005          | null      | null      | null 
-10006          | Principal | Support   | Engineer
+emp_no:integer | a:keyword            | b:keyword         | c:keyword
+10001          | Senior               | Python            | Developer
+10002          | Senior               | Team              | Lead
+10003          | null                 | null              | null
+10004          | Head                 | Human             | Resources
+10005          | null                 | null              | null 
+10006          | [Principal, Senior]  | [Support, Team]   | [Engineer, Lead]
 ;