Browse Source

Support array for all string ingest processors

bellengao 5 years ago
parent
commit
8ffe5d1f94

+ 1 - 1
docs/reference/ingest/processors/bytes.asciidoc

@@ -1,6 +1,6 @@
 [[bytes-processor]]
 === Bytes Processor
-Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024).
+Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). If the field is an array of strings, all members of the array will be converted.
 
 Supported human readable units are "b", "kb", "mb", "gb", "tb", "pb" case insensitive. An error will occur if
 the field is not a supported format or resultant value exceeds 2^63.

+ 1 - 1
docs/reference/ingest/processors/gsub.asciidoc

@@ -1,7 +1,7 @@
 [[gsub-processor]]
 === Gsub Processor
 Converts a string field by applying a regular expression and a replacement.
-If the field is not a string, the processor will throw an exception.
+If the field is an array of string, all members of the array will be converted. If any non-string values are encountered, the processor will throw an exception. 
 
 [[gsub-options]]
 .Gsub Options

+ 1 - 1
docs/reference/ingest/processors/html_strip.asciidoc

@@ -1,6 +1,6 @@
 [[htmlstrip-processor]]
 === HTML Strip Processor
-Removes HTML from field.
+Removes HTML tags from the field. If the field is an array of strings, HTML tags will be removed from all members of the array.
 
 NOTE: Each HTML tag is replaced with a `\n` character.
 

+ 1 - 1
docs/reference/ingest/processors/lowercase.asciidoc

@@ -1,6 +1,6 @@
 [[lowercase-processor]]
 === Lowercase Processor
-Converts a string to its lowercase equivalent.
+Converts a string to its lowercase equivalent. If the field is an array of strings, all members of the array will be converted.
 
 [[lowercase-options]]
 .Lowercase Options

+ 1 - 1
docs/reference/ingest/processors/trim.asciidoc

@@ -1,6 +1,6 @@
 [[trim-processor]]
 === Trim Processor
-Trims whitespace from field.
+Trims whitespace from field. If the field is an array of strings, all members of the array will be trimmed.
 
 NOTE: This only works on leading and trailing whitespace.
 

+ 1 - 1
docs/reference/ingest/processors/uppercase.asciidoc

@@ -1,6 +1,6 @@
 [[uppercase-processor]]
 === Uppercase Processor
-Converts a string to its uppercase equivalent.
+Converts a string to its uppercase equivalent. If the field is an array of strings, all members of the array will be converted.
 
 [[uppercase-options]]
 .Uppercase Options

+ 1 - 1
docs/reference/ingest/processors/url-decode.asciidoc

@@ -1,6 +1,6 @@
 [[urldecode-processor]]
 === URL Decode Processor
-URL-decodes a string
+URL-decodes a string. If the field is an array of strings, all members of the array will be decoded.
 
 [[urldecode-options]]
 .URL Decode Options

+ 27 - 2
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java

@@ -24,6 +24,8 @@ import org.elasticsearch.ingest.ConfigurationUtils;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.Processor;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -58,7 +60,8 @@ abstract class AbstractStringProcessor<T> extends AbstractProcessor {
 
     @Override
     public final IngestDocument execute(IngestDocument document) {
-        String val = document.getFieldValue(field, String.class, ignoreMissing);
+        Object val = document.getFieldValue(field, Object.class, ignoreMissing);
+        Object newValue;
 
         if (val == null && ignoreMissing) {
             return document;
@@ -66,7 +69,29 @@ abstract class AbstractStringProcessor<T> extends AbstractProcessor {
             throw new IllegalArgumentException("field [" + field + "] is null, cannot process it.");
         }
 
-        document.setFieldValue(targetField, process(val));
+        if (val instanceof List) {
+            List<?> list = (List<?>) val;
+            List<Object> newList = new ArrayList<>(list.size());
+            for (Object value : list) {
+                if (value instanceof String) {
+                    newList.add(process((String) value));
+                } else {
+                    throw new IllegalArgumentException("value [" + value + "] of type [" + value.getClass().getName() +
+                        "] in list field [" + field + "] cannot be cast to [" + String.class.getName() + "]");
+                }
+            }
+            newValue = newList;
+        } else {
+            if (val instanceof String) {
+                newValue = process((String) val);
+            } else {
+                throw new IllegalArgumentException("field [" + field + "] of type [" + val.getClass().getName() + "] cannot be cast to [" +
+                    String.class.getName() + "]");
+            }
+
+        }
+
+        document.setFieldValue(targetField, newValue);
         return document;
     }
 

+ 32 - 1
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java

@@ -24,8 +24,10 @@ import org.elasticsearch.ingest.Processor;
 import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.test.ESTestCase;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 
 import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
 import static org.hamcrest.Matchers.containsString;
@@ -41,7 +43,7 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
 
     protected abstract T expectedResult(String input);
 
-    protected Class<?> expectedResultType(){
+    protected Class<?> expectedResultType() {
         return String.class;  // most results types are Strings
     }
 
@@ -52,6 +54,19 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
         Processor processor = newProcessor(fieldName, randomBoolean(), fieldName);
         processor.execute(ingestDocument);
         assertThat(ingestDocument.getFieldValue(fieldName, expectedResultType()), equalTo(expectedResult(fieldValue)));
+
+        int numItems = randomIntBetween(1, 10);
+        List<String> fieldValueList = new ArrayList<>();
+        List<T> expectedList = new ArrayList<>();
+        for (int i = 0; i < numItems; i++) {
+            String randomString = RandomDocumentPicks.randomString(random());
+            fieldValueList.add(modifyInput(randomString));
+            expectedList.add(expectedResult(randomString));
+        }
+        String multiValueFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValueList);
+        Processor multiValueProcessor = newProcessor(multiValueFieldName, randomBoolean(), multiValueFieldName);
+        multiValueProcessor.execute(ingestDocument);
+        assertThat(ingestDocument.getFieldValue(multiValueFieldName, List.class), equalTo(expectedList));
     }
 
     public void testFieldNotFound() throws Exception {
@@ -94,6 +109,14 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
         Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
         assertThat(e.getMessage(), equalTo("field [" + fieldName +
             "] of type [java.lang.Integer] cannot be cast to [java.lang.String]"));
+
+        List<Integer> fieldValueList = new ArrayList<>();
+        int randomValue = randomInt();
+        fieldValueList.add(randomValue);
+        ingestDocument.setFieldValue(fieldName, fieldValueList);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName +
+            "] cannot be cast to [java.lang.String]"));
     }
 
     public void testNonStringValueWithIgnoreMissing() throws Exception {
@@ -104,6 +127,14 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
         Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
         assertThat(e.getMessage(), equalTo("field [" + fieldName +
             "] of type [java.lang.Integer] cannot be cast to [java.lang.String]"));
+
+        List<Integer> fieldValueList = new ArrayList<>();
+        int randomValue = randomInt();
+        fieldValueList.add(randomValue);
+        ingestDocument.setFieldValue(fieldName, fieldValueList);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName +
+            "] cannot be cast to [java.lang.String]"));
     }
 
     public void testTargetField() throws Exception {