Browse Source

add URL-Decode Processor to Ingest (#26045)

closes #25837

Adds a URL Decoder Processor to Ingest

this will decode urls like:

https%3a%2f%2felastic.co%2 to https://elastic.co/
Tal Levy 8 years ago
parent
commit
872526cad3

+ 25 - 1
docs/reference/ingest/ingest-node.asciidoc

@@ -2005,7 +2005,7 @@ into:
 --------------------------------------------------
 {
   "foo" : {
-     "bar" : "value"
+    "bar" : "value"
   }
 }
 --------------------------------------------------
@@ -2081,3 +2081,27 @@ pipeline should be used:
 
 The reason for this is that Ingest doesn't know how to automatically cast
 a scalar field to an object field.
+
+[[urldecode-processor]]
+=== URL Decode Processor
+URL-decodes a string
+
+[[urldecode-options]]
+.URL Decode Options
+[options="header"]
+|======
+| Name             | Required  | Default  | Description
+| `field`          | yes       | -        | The field to decode
+| `target_field`   | no        | `field`  | The field to assign the converted value to, by default `field` is updated in-place
+| `ignore_missing` | no        | `false`  | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
+|======
+
+[source,js]
+--------------------------------------------------
+{
+  "urldecode": {
+    "field": "my_url_to_decode"
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE

+ 1 - 0
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java

@@ -91,6 +91,7 @@ public class IngestCommonPlugin extends Plugin implements ActionPlugin, IngestPl
         processors.put(DotExpanderProcessor.TYPE, new DotExpanderProcessor.Factory());
         processors.put(JsonProcessor.TYPE, new JsonProcessor.Factory());
         processors.put(KeyValueProcessor.TYPE, new KeyValueProcessor.Factory());
+        processors.put(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory());
         return Collections.unmodifiableMap(processors);
     }
 

+ 65 - 0
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/URLDecodeProcessor.java

@@ -0,0 +1,65 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.ingest.common;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
+import java.util.Map;
+
+/**
+ * Processor that URL-decodes a string
+ */
+public final class URLDecodeProcessor extends AbstractStringProcessor {
+
+    public static final String TYPE = "urldecode";
+
+    URLDecodeProcessor(String processorTag, String field, boolean ignoreMissing, String targetField) {
+        super(processorTag, field, ignoreMissing, targetField);
+    }
+
+    @Override
+    protected String process(String value) {
+        try {
+            return URLDecoder.decode(value, "UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            throw new IllegalArgumentException("could not URL-decode field[" + getField() + "]", e);
+        }
+    }
+
+    @Override
+    public String getType() {
+        return TYPE;
+    }
+
+    public static final class Factory extends AbstractStringProcessor.Factory {
+
+        public Factory() {
+            super(TYPE);
+        }
+
+        @Override
+        protected URLDecodeProcessor newProcessor(String tag, Map<String, Object> config, String field,
+                                                  boolean ignoreMissing, String targetField) {
+            return new URLDecodeProcessor(tag, field, ignoreMissing, targetField);
+        }
+    }
+}

+ 27 - 0
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/URLDecodeProcessorFactoryTests.java

@@ -0,0 +1,27 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.ingest.common;
+
+public class URLDecodeProcessorFactoryTests extends AbstractStringProcessorFactoryTestCase {
+    @Override
+    protected AbstractStringProcessor.Factory newFactory() {
+        return new URLDecodeProcessor.Factory();
+    }
+}

+ 44 - 0
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/URLDecodeProcessorTests.java

@@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.ingest.common;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+
+public class URLDecodeProcessorTests extends AbstractStringProcessorTestCase {
+    @Override
+    protected String modifyInput(String input) {
+        return "Hello%20G%C3%BCnter" + input;
+    }
+
+    @Override
+    protected AbstractStringProcessor newProcessor(String field, boolean ignoreMissing, String targetField) {
+        return new URLDecodeProcessor(randomAlphaOfLength(10), field, ignoreMissing, targetField);
+    }
+
+    @Override
+    protected String expectedResult(String input) {
+        try {
+            return "Hello Günter" + URLDecoder.decode(input, "UTF-8");
+        } catch (UnsupportedEncodingException e) {
+            throw new IllegalArgumentException("invalid");
+        }
+    }
+}

+ 40 - 0
modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/160_urldecode.yml

@@ -0,0 +1,40 @@
+---
+teardown:
+  - do:
+      ingest.delete_pipeline:
+        id: "1"
+        ignore: 404
+
+---
+"Test KV Processor":
+  - do:
+      ingest.put_pipeline:
+        id: "1"
+        body:  >
+          {
+            "processors": [
+              {
+                "urldecode" : {
+                  "field" : "my_url"
+                }
+              }
+            ]
+          }
+  - match: { acknowledged: true }
+
+  - do:
+      index:
+        index: test
+        type: test
+        id: 1
+        pipeline: "1"
+        body: {
+          my_url: "https%3a%2f%2felastic.co%2f"
+        }
+
+  - do:
+      get:
+        index: test
+        type: test
+        id: 1
+  - match: { _source.my_url: "https://elastic.co/" }