Selaa lähdekoodia

Add option to split processor for preserving trailing empty fields (#48664)

Dan Hermann 6 vuotta sitten
vanhempi
commit
fcc18dc19b

+ 18 - 0
docs/reference/ingest/processors/split.asciidoc

@@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
 | `separator`       | yes       | -        | A regex which matches the separator, eg `,` or `\s+`
 | `target_field`    | no        | `field`  | The field to assign the split value to, by default `field` is updated in-place
 | `ignore_missing`  | no        | `false`  | If `true` and `field` does not exist, the processor quietly exits without modifying the document
+| `preserve_trailing`| no       | `false`  | Preserves empty trailing fields, if any.
 include::common-options.asciidoc[]
 |======
 
@@ -25,3 +26,20 @@ include::common-options.asciidoc[]
 --------------------------------------------------
 // NOTCONSOLE
 <1> Treat all consecutive whitespace characters as a single separator
+
+If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
+in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
+`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
+empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.
+
+[source,js]
+--------------------------------------------------
+{
+  "split": {
+    "field": "my_field",
+    "separator": ",",
+    "preserve_trailing": true
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE

+ 9 - 4
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java

@@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
     private final String field;
     private final String separator;
     private final boolean ignoreMissing;
+    private final boolean preserveTrailing;
     private final String targetField;
 
-    SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
+    SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
         super(tag);
         this.field = field;
         this.separator = separator;
         this.ignoreMissing = ignoreMissing;
+        this.preserveTrailing = preserveTrailing;
         this.targetField = targetField;
     }
 
@@ -63,6 +65,8 @@ public final class SplitProcessor extends AbstractProcessor {
         return ignoreMissing;
     }
 
+    boolean isPreserveTrailing() { return preserveTrailing; }
+
     String getTargetField() {
         return targetField;
     }
@@ -77,7 +81,7 @@ public final class SplitProcessor extends AbstractProcessor {
             throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
         }
 
-        String[] strings = oldVal.split(separator);
+        String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
         List<String> splitList = new ArrayList<>(strings.length);
         Collections.addAll(splitList, strings);
         document.setFieldValue(targetField, splitList);
@@ -95,9 +99,10 @@ public final class SplitProcessor extends AbstractProcessor {
                                      Map<String, Object> config) throws Exception {
             String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
             boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
+            boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
             String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
-            return new SplitProcessor(processorTag, field,
-                ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
+            String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
+            return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
         }
     }
 }

+ 18 - 0
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java

@@ -79,6 +79,24 @@ public class SplitProcessorFactoryTests extends ESTestCase {
         assertThat(splitProcessor.getField(), equalTo("field1"));
         assertThat(splitProcessor.getSeparator(), equalTo("\\."));
         assertFalse(splitProcessor.isIgnoreMissing());
+        assertFalse(splitProcessor.isPreserveTrailing());
         assertThat(splitProcessor.getTargetField(), equalTo("target"));
     }
+
+    public void testCreateWithPreserveTrailing() throws Exception {
+        SplitProcessor.Factory factory = new SplitProcessor.Factory();
+        Map<String, Object> config = new HashMap<>();
+        config.put("field", "field1");
+        config.put("separator", "\\.");
+        config.put("target_field", "target");
+        config.put("preserve_trailing", true);
+        String processorTag = randomAlphaOfLength(10);
+        SplitProcessor splitProcessor = factory.create(null, processorTag, config);
+        assertThat(splitProcessor.getTag(), equalTo(processorTag));
+        assertThat(splitProcessor.getField(), equalTo("field1"));
+        assertThat(splitProcessor.getSeparator(), equalTo("\\."));
+        assertFalse(splitProcessor.isIgnoreMissing());
+        assertThat(splitProcessor.getTargetField(), equalTo("target"));
+    }
+
 }

+ 23 - 7
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java

@@ -39,7 +39,7 @@ public class SplitProcessorTests extends ESTestCase {
     public void testSplit() throws Exception {
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
         String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
         processor.execute(ingestDocument);
         assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
     }
@@ -47,7 +47,7 @@ public class SplitProcessorTests extends ESTestCase {
     public void testSplitFieldNotFound() throws Exception {
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
         String fieldName = RandomDocumentPicks.randomFieldName(random());
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
         try {
             processor.execute(ingestDocument);
             fail("split processor should have failed");
@@ -59,7 +59,7 @@ public class SplitProcessorTests extends ESTestCase {
     public void testSplitNullValue() throws Exception {
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
             Collections.singletonMap("field", null));
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
         try {
             processor.execute(ingestDocument);
             fail("split processor should have failed");
@@ -73,7 +73,7 @@ public class SplitProcessorTests extends ESTestCase {
         IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
             Collections.singletonMap(fieldName, null));
         IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
         processor.execute(ingestDocument);
         assertIngestDocument(originalIngestDocument, ingestDocument);
     }
@@ -81,7 +81,7 @@ public class SplitProcessorTests extends ESTestCase {
     public void testSplitNonExistentWithIgnoreMissing() throws Exception {
         IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
         IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
         processor.execute(ingestDocument);
         assertIngestDocument(originalIngestDocument, ingestDocument);
     }
@@ -90,7 +90,7 @@ public class SplitProcessorTests extends ESTestCase {
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
         String fieldName = RandomDocumentPicks.randomFieldName(random());
         ingestDocument.setFieldValue(fieldName, randomInt());
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
         try {
             processor.execute(ingestDocument);
             fail("split processor should have failed");
@@ -121,8 +121,24 @@ public class SplitProcessorTests extends ESTestCase {
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
         String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
         String targetFieldName = fieldName + randomAlphaOfLength(5);
-        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
         processor.execute(ingestDocument);
         assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
     }
+
+    public void testSplitWithPreserveTrailing() throws Exception {
+        doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
+    }
+
+    public void testSplitWithoutPreserveTrailing() throws Exception {
+        doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
+    }
+
+    private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
+        String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
+        Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
+        processor.execute(ingestDocument);
+        assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
+    }
 }