Sfoglia il codice sorgente

Json processor: allow duplicate keys (#74956)

Felix Barnsteiner 4 anni fa
parent
commit
67fbc337ea

+ 6 - 4
docs/reference/ingest/processors/json.asciidoc

@@ -10,10 +10,12 @@ Converts a JSON string into a structured JSON object.
 .Json Options
 [options="header"]
 |======
-| Name           | Required  | Default  | Description
-| `field`        | yes       | -        | The field to be parsed.
-| `target_field` | no        | `field`  | The field that the converted structured object will be written into. Any existing content in this field will be overwritten.
-| `add_to_root`  | no        | false    | Flag that forces the serialized json to be injected into the top level of the document. `target_field` must not be set when this option is chosen.
+| Name                         | Required  | Default  | Description
+| `field`                      | yes       | -        | The field to be parsed.
+| `target_field`               | no        | `field`  | The field that the converted structured object will be written into. Any existing content in this field will be overwritten.
+| `add_to_root`                | no        | false    | Flag that forces the serialized json to be injected into the top level of the document. `target_field` must not be set when this option is chosen.
+| `allow_duplicate_keys`       | no        | false    | When set to `true`, the JSON parser will not fail if the JSON contains duplicate keys.
+                                                        Instead, the latest value wins. Allowing duplicate keys also improves execution time.
 include::common-options.asciidoc[]
 |======
 

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/FilterXContentParser.java

@@ -33,6 +33,11 @@ public abstract class FilterXContentParser implements XContentParser {
         return in.contentType();
     }
 
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        in.allowDuplicateKeys(allowDuplicateKeys);
+    }
+
     @Override
     public Token nextToken() throws IOException {
         return in.nextToken();

+ 2 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java

@@ -112,6 +112,8 @@ public interface XContentParser extends Closeable {
 
     XContentType contentType();
 
+    void allowDuplicateKeys(boolean allowDuplicateKeys);
+
     Token nextToken() throws IOException;
 
     void skipChildren() throws IOException;

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentSubParser.java

@@ -43,6 +43,11 @@ public class XContentSubParser implements XContentParser {
         return parser.contentType();
     }
 
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        parser.allowDuplicateKeys(allowDuplicateKeys);
+    }
+
     @Override
     public Token nextToken() throws IOException {
         if (level > 0) {

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java

@@ -32,4 +32,9 @@ public class CborXContentParser extends JsonXContentParser {
     public XContentType contentType() {
         return XContentType.CBOR;
     }
+
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        throw new UnsupportedOperationException("Allowing duplicate keys after the parser has been created is not possible for CBOR");
+    }
 }

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java

@@ -44,6 +44,11 @@ public class JsonXContentParser extends AbstractXContentParser {
         return XContentType.JSON;
     }
 
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        parser.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, allowDuplicateKeys == false);
+    }
+
     @Override
     public Token nextToken() throws IOException {
         return convertToken(parser.nextToken());

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContentParser.java

@@ -32,4 +32,9 @@ public class SmileXContentParser extends JsonXContentParser {
     public XContentType contentType() {
         return XContentType.SMILE;
     }
+
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        throw new UnsupportedOperationException("Allowing duplicate keys after the parser has been created is not possible for Smile");
+    }
 }

+ 5 - 0
libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/MapXContentParser.java

@@ -90,6 +90,11 @@ public class MapXContentParser extends AbstractXContentParser {
         return xContentType;
     }
 
+    @Override
+    public void allowDuplicateKeys(boolean allowDuplicateKeys) {
+        throw new UnsupportedOperationException("Allowing duplicate keys is not possible for maps");
+    }
+
     @Override
     public Token nextToken() throws IOException {
         if (iterator == null) {

+ 12 - 8
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java

@@ -36,12 +36,14 @@ public final class JsonProcessor extends AbstractProcessor {
     private final String field;
     private final String targetField;
     private final boolean addToRoot;
+    private final boolean allowDuplicateKeys;
 
-    JsonProcessor(String tag, String description, String field, String targetField, boolean addToRoot) {
+    JsonProcessor(String tag, String description, String field, String targetField, boolean addToRoot, boolean allowDuplicateKeys) {
         super(tag, description);
         this.field = field;
         this.targetField = targetField;
         this.addToRoot = addToRoot;
+        this.allowDuplicateKeys = allowDuplicateKeys;
     }
 
     public String getField() {
@@ -56,11 +58,12 @@ public final class JsonProcessor extends AbstractProcessor {
         return addToRoot;
     }
 
-    public static Object apply(Object fieldValue) {
+    public static Object apply(Object fieldValue, boolean allowDuplicateKeys) {
         BytesReference bytesRef = fieldValue == null ? new BytesArray("null") : new BytesArray(fieldValue.toString());
         try (InputStream stream = bytesRef.streamInput();
              XContentParser parser = JsonXContent.jsonXContent
                  .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, stream)) {
+            parser.allowDuplicateKeys(allowDuplicateKeys);
             XContentParser.Token token = parser.nextToken();
             Object value = null;
             if (token == XContentParser.Token.VALUE_NULL) {
@@ -84,12 +87,12 @@ public final class JsonProcessor extends AbstractProcessor {
         }
     }
 
-    public static void apply(Map<String, Object> ctx, String fieldName) {
-        Object value = apply(ctx.get(fieldName));
+    public static void apply(Map<String, Object> ctx, String fieldName, boolean allowDuplicateKeys) {
+        Object value = apply(ctx.get(fieldName), allowDuplicateKeys);
         if (value instanceof Map) {
             @SuppressWarnings("unchecked")
                 Map<String, Object> map = (Map<String, Object>) value;
-                ctx.putAll(map);
+            ctx.putAll(map);
         } else {
             throw new IllegalArgumentException("cannot add non-map fields to root of document");
         }
@@ -98,9 +101,9 @@ public final class JsonProcessor extends AbstractProcessor {
     @Override
     public IngestDocument execute(IngestDocument document) throws Exception {
         if (addToRoot) {
-           apply(document.getSourceAndMetadata(), field);
+            apply(document.getSourceAndMetadata(), field, allowDuplicateKeys);
         } else {
-            document.setFieldValue(targetField, apply(document.getFieldValue(field, Object.class)));
+            document.setFieldValue(targetField, apply(document.getFieldValue(field, Object.class), allowDuplicateKeys));
         }
         return document;
     }
@@ -117,6 +120,7 @@ public final class JsonProcessor extends AbstractProcessor {
             String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
             String targetField = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "target_field");
             boolean addToRoot = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "add_to_root", false);
+            boolean allowDuplicateKeys = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "allow_duplicate_keys", false);
 
             if (addToRoot && targetField != null) {
                 throw newConfigurationException(TYPE, processorTag, "target_field",
@@ -127,7 +131,7 @@ public final class JsonProcessor extends AbstractProcessor {
                 targetField = field;
             }
 
-            return new JsonProcessor(processorTag, description, field, targetField, addToRoot);
+            return new JsonProcessor(processorTag, description, field, targetField, addToRoot, allowDuplicateKeys);
         }
     }
 }

+ 2 - 2
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Processors.java

@@ -59,7 +59,7 @@ public final class Processors {
      * @return structured JSON object
      */
     public static Object json(Object fieldValue) {
-        return JsonProcessor.apply(fieldValue);
+        return JsonProcessor.apply(fieldValue, false);
     }
 
     /**
@@ -72,7 +72,7 @@ public final class Processors {
      *             contains the JSON string
      */
     public static void json(Map<String, Object> map, String field) {
-        JsonProcessor.apply(map, field);
+        JsonProcessor.apply(map, field, false);
     }
 
     /**

+ 34 - 12
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/JsonProcessorTests.java

@@ -32,7 +32,7 @@ public class JsonProcessorTests extends ESTestCase {
         String processorTag = randomAlphaOfLength(3);
         String randomField = randomAlphaOfLength(3);
         String randomTargetField = randomAlphaOfLength(2);
-        JsonProcessor jsonProcessor = new JsonProcessor(processorTag, null, randomField, randomTargetField, false);
+        JsonProcessor jsonProcessor = new JsonProcessor(processorTag, null, randomField, randomTargetField, false, false);
         Map<String, Object> document = new HashMap<>();
 
         Map<String, Object> randomJsonMap = RandomDocumentPicks.randomSource(random());
@@ -47,7 +47,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testInvalidValue() {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         document.put("field", "blah blah");
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
@@ -58,7 +58,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testByteArray() {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         document.put("field", new byte[] { 0, 1 });
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
@@ -73,7 +73,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testNull() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         document.put("field", null);
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
@@ -82,7 +82,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testBoolean() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         boolean value = true;
         document.put("field", value);
@@ -92,7 +92,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testInteger() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         int value = 3;
         document.put("field", value);
@@ -102,7 +102,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testDouble() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         double value = 3.0;
         document.put("field", value);
@@ -112,7 +112,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testString() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         String value = "hello world";
         document.put("field", "\"" + value + "\"");
@@ -122,7 +122,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testArray() throws Exception {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         List<Boolean> value = Arrays.asList(true, true, false);
         document.put("field", value.toString());
@@ -132,7 +132,7 @@ public class JsonProcessorTests extends ESTestCase {
     }
 
     public void testFieldMissing() {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", false, false);
         Map<String, Object> document = new HashMap<>();
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
 
@@ -143,7 +143,7 @@ public class JsonProcessorTests extends ESTestCase {
     public void testAddToRoot() throws Exception {
         String processorTag = randomAlphaOfLength(3);
         String randomTargetField = randomAlphaOfLength(2);
-        JsonProcessor jsonProcessor = new JsonProcessor(processorTag, null, "a", randomTargetField, true);
+        JsonProcessor jsonProcessor = new JsonProcessor(processorTag, null, "a", randomTargetField, true, false);
         Map<String, Object> document = new HashMap<>();
 
         String json = "{\"a\": 1, \"b\": 2}";
@@ -159,8 +159,30 @@ public class JsonProcessorTests extends ESTestCase {
         assertEquals("see", sourceAndMetadata.get("c"));
     }
 
+    public void testDuplicateKeys() throws Exception {
+        String processorTag = randomAlphaOfLength(3);
+        JsonProcessor lenientJsonProcessor = new JsonProcessor(processorTag, null, "a", null, true, true);
+
+        Map<String, Object> document = new HashMap<>();
+        String json = "{\"a\": 1, \"a\": 2}";
+        document.put("a", json);
+        document.put("c", "see");
+
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
+        lenientJsonProcessor.execute(ingestDocument);
+
+        Map<String, Object> sourceAndMetadata = ingestDocument.getSourceAndMetadata();
+        assertEquals(2, sourceAndMetadata.get("a"));
+        assertEquals("see", sourceAndMetadata.get("c"));
+
+        JsonProcessor strictJsonProcessor = new JsonProcessor(processorTag, null, "a", null, true, false);
+        Exception exception = expectThrows(IllegalArgumentException.class, () ->
+            strictJsonProcessor.execute(RandomDocumentPicks.randomIngestDocument(random(), document)));
+        assertThat(exception.getMessage(), containsString("Duplicate field 'a'"));
+    }
+
     public void testAddBoolToRoot() {
-        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", true);
+        JsonProcessor jsonProcessor = new JsonProcessor("tag", null, "field", "target_field", true, false);
         Map<String, Object> document = new HashMap<>();
         document.put("field", true);
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);

+ 37 - 0
modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/140_json.yml

@@ -4,6 +4,10 @@ teardown:
       ingest.delete_pipeline:
         id: "1"
         ignore: 404
+  - do:
+      ingest.delete_pipeline:
+        id: "2"
+        ignore: 404
 
 ---
 "Test JSON Processor":
@@ -71,3 +75,36 @@ teardown:
   - match: { _source.foo_number: 3 }
   - is_true:  _source.foo_boolean
   - is_false: _source.foo_null
+
+---
+"Test JSON Processor duplicate keys":
+  - do:
+      ingest.put_pipeline:
+        id: "2"
+        body: {
+            "processors": [
+              {
+                "json" : {
+                  "field" : "json",
+                  "add_to_root": true,
+                  "allow_duplicate_keys": true
+                }
+              }
+            ]
+          }
+  - match: { acknowledged: true }
+
+  - do:
+      index:
+        index: test
+        id: 2
+        pipeline: "2"
+        body: {
+          json: "{\"dupe\": 1, \"dupe\": 2}",
+        }
+
+  - do:
+      get:
+        index: test
+        id: 2
+  - match: { _source.dupe: 2 }

+ 13 - 1
server/src/test/java/org/elasticsearch/common/xcontent/BaseXContentTestCase.java

@@ -80,7 +80,7 @@ public abstract class BaseXContentTestCase extends ESTestCase {
 
     protected abstract XContentType xcontentType();
 
-    private XContentBuilder builder() throws IOException {
+    protected XContentBuilder builder() throws IOException {
         return XContentBuilder.builder(xcontentType().xContent());
     }
 
@@ -1149,6 +1149,18 @@ public abstract class BaseXContentTestCase extends ESTestCase {
         }
     }
 
+    public void testAllowsDuplicates() throws Exception {
+        XContentBuilder builder = builder()
+                .startObject()
+                    .field("key", 1)
+                    .field("key", 2)
+                .endObject();
+        try (XContentParser xParser = createParser(builder)) {
+            xParser.allowDuplicateKeys(true);
+            assertThat(xParser.map(), equalTo(Map.of("key", 2)));
+        }
+    }
+
     public void testNamedObject() throws IOException {
         Object test1 = new Object();
         Object test2 = new Object();

+ 7 - 0
server/src/test/java/org/elasticsearch/common/xcontent/cbor/CborXContentTests.java

@@ -12,6 +12,7 @@ import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
 
 import org.elasticsearch.common.xcontent.BaseXContentTestCase;
+import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
 
 import java.io.ByteArrayOutputStream;
@@ -28,4 +29,10 @@ public class CborXContentTests extends BaseXContentTestCase {
         JsonGenerator generator = new CBORFactory().createGenerator(os);
         doTestBigInteger(generator, os);
     }
+
+    public void testAllowsDuplicates() throws Exception {
+        try (XContentParser xParser = createParser(builder().startObject().endObject())) {
+            expectThrows(UnsupportedOperationException.class, () -> xParser.allowDuplicateKeys(true));
+        }
+    }
 }

+ 7 - 0
server/src/test/java/org/elasticsearch/common/xcontent/smile/SmileXContentTests.java

@@ -12,6 +12,7 @@ import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.dataformat.smile.SmileFactory;
 
 import org.elasticsearch.common.xcontent.BaseXContentTestCase;
+import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
 
 import java.io.ByteArrayOutputStream;
@@ -28,4 +29,10 @@ public class SmileXContentTests extends BaseXContentTestCase {
         JsonGenerator generator = new SmileFactory().createGenerator(os);
         doTestBigInteger(generator, os);
     }
+
+    public void testAllowsDuplicates() throws Exception {
+        try (XContentParser xParser = createParser(builder().startObject().endObject())) {
+            expectThrows(UnsupportedOperationException.class, () -> xParser.allowDuplicateKeys(true));
+        }
+    }
 }