Browse Source

add `ignore_missing` flag to ingest plugins (#22273)

added `ignore_missing` flag to:

- Attachment Processor
- GeoIP Processor
- User-Agent Processor
Tal Levy 8 years ago
parent
commit
5a90d9d7e6

+ 18 - 2
core/src/main/java/org/elasticsearch/ingest/IngestDocument.java

@@ -145,8 +145,24 @@ public final class IngestDocument {
      * or if the field that is found at the provided path is not of the expected type.
      */
     public byte[] getFieldValueAsBytes(String path) {
-        Object object = getFieldValue(path, Object.class);
-        if (object instanceof byte[]) {
+        return getFieldValueAsBytes(path, false);
+    }
+
+    /**
+     * Returns the value contained in the document for the provided path as a byte array.
+     * If the path value is a string, a base64 decode operation will happen.
+     * If the path value is a byte array, it is just returned
+     * @param path The path within the document in dot-notation
+     * @param ignoreMissing The flag to determine whether to throw an exception when `path` is not found in the document.
+     * @return the byte array for the provided path if existing
+     * @throws IllegalArgumentException if the path is null, empty, invalid, if the field doesn't exist
+     * or if the field that is found at the provided path is not of the expected type.
+     */
+    public byte[] getFieldValueAsBytes(String path, boolean ignoreMissing) {
+        Object object = getFieldValue(path, Object.class, ignoreMissing);
+        if (object == null) {
+            return null;
+        } else if (object instanceof byte[]) {
             return (byte[]) object;
         } else if (object instanceof String) {
             return Base64.getDecoder().decode(object.toString());

+ 1 - 0
docs/plugins/ingest-attachment.asciidoc

@@ -53,6 +53,7 @@ The node must be stopped before removing the plugin.
 | `target_field`         | no        | attachment       | The field that will hold the attachment information
 | `indexed_chars`        | no        | 100000           | The number of chars being used for extraction to prevent huge fields. Use `-1` for no limit.
 | `properties`           | no        | all              | Properties to select to be stored. Can be `content`, `title`, `name`, `author`, `keywords`, `date`, `content_type`, `content_length`, `language`
+| `ignore_missing`       | no        | `false`          | If `true` and `field` does not exist, the processor quietly exits without modifying the document
 |======
 
 For example, this:

+ 1 - 0
docs/plugins/ingest-geoip.asciidoc

@@ -54,6 +54,7 @@ The node must be stopped before removing the plugin.
 | `target_field`         | no        | geoip                                                                              | The field that will hold the geographical information looked up from the Maxmind database.
 | `database_file`        | no        | GeoLite2-City.mmdb                                                                 | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb.gz and GeoLite2-Country.mmdb.gz files.
 | `properties`           | no        | [`continent_name`, `country_iso_code`, `region_name`, `city_name`, `location`] *   | Controls what properties are added to the `target_field` based on the geoip lookup.
+| `ignore_missing`       | no        | `false`                                                                            | If `true` and `field` does not exist, the processor quietly exits without modifying the document
 |======
 
 *Depends on what is available in `database_field`:

+ 5 - 4
docs/plugins/ingest-user-agent.asciidoc

@@ -43,11 +43,12 @@ The node must be stopped before removing the plugin.
 .User-agent options
 [options="header"]
 |======
-| Name                   | Required  | Default                                                                            | Description
-| `field`                | yes       | -                                                                                  | The field containing the user agent string.
-| `target_field`         | no        | user_agent                                                                          | The field that will be filled with the user agent details.
-| `regex_file`           | no        | -                                                                                  | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
+| Name                   | Required  | Default                                                                                         | Description
+| `field`                | yes       | -                                                                                               | The field containing the user agent string.
+| `target_field`         | no        | user_agent                                                                                      | The field that will be filled with the user agent details.
+| `regex_file`           | no        | -                                                                                               | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
 | `properties`           | no        | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
+| `ignore_missing`       | no        | `false`                                                                                         | If `true` and `field` does not exist, the processor quietly exits without modifying the document
 |======
 
 Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:

+ 18 - 3
plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java

@@ -38,6 +38,7 @@ import java.util.Map;
 import java.util.Set;
 
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
+import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
 import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
@@ -52,23 +53,36 @@ public final class AttachmentProcessor extends AbstractProcessor {
     private final String targetField;
     private final Set<Property> properties;
     private final int indexedChars;
+    private final boolean ignoreMissing;
 
     AttachmentProcessor(String tag, String field, String targetField, Set<Property> properties,
-                        int indexedChars) throws IOException {
+                        int indexedChars, boolean ignoreMissing) throws IOException {
         super(tag);
         this.field = field;
         this.targetField = targetField;
         this.properties = properties;
         this.indexedChars = indexedChars;
+        this.ignoreMissing = ignoreMissing;
+    }
+
+    boolean isIgnoreMissing() {
+        return ignoreMissing;
     }
 
     @Override
     public void execute(IngestDocument ingestDocument) {
         Map<String, Object> additionalFields = new HashMap<>();
 
+        byte[] input = ingestDocument.getFieldValueAsBytes(field, ignoreMissing);
+
+        if (input == null && ignoreMissing) {
+            return;
+        } else if (input == null) {
+            throw new IllegalArgumentException("field [" + field + "] is null, cannot parse.");
+        }
+
         try {
             Metadata metadata = new Metadata();
-            byte[] input = ingestDocument.getFieldValueAsBytes(field);
             String parsedContent = TikaImpl.parse(input, metadata, indexedChars);
 
             if (properties.contains(Property.CONTENT) && Strings.hasLength(parsedContent)) {
@@ -166,6 +180,7 @@ public final class AttachmentProcessor extends AbstractProcessor {
             String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "attachment");
             List<String> properyNames = readOptionalList(TYPE, processorTag, config, "properties");
             int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED);
+            boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
 
             final Set<Property> properties;
             if (properyNames != null) {
@@ -182,7 +197,7 @@ public final class AttachmentProcessor extends AbstractProcessor {
                 properties = DEFAULT_PROPERTIES;
             }
 
-            return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars);
+            return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars, ignoreMissing);
         }
     }
 

+ 19 - 0
plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java

@@ -52,6 +52,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getTargetField(), equalTo("attachment"));
         assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testConfigureIndexedChars() throws Exception {
@@ -64,6 +65,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         AttachmentProcessor processor = factory.create(null, processorTag, config);
         assertThat(processor.getTag(), equalTo(processorTag));
         assertThat(processor.getIndexedChars(), is(indexedChars));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildTargetField() throws Exception {
@@ -73,6 +75,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         AttachmentProcessor processor = factory.create(null, null, config);
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getTargetField(), equalTo("_field"));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildFields() throws Exception {
@@ -90,6 +93,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         AttachmentProcessor processor = factory.create(null, null, config);
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getProperties(), equalTo(properties));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildIllegalFieldOption() throws Exception {
@@ -117,4 +121,19 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
             assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
         }
     }
+
+    public void testIgnoreMissing() throws Exception {
+        Map<String, Object> config = new HashMap<>();
+        config.put("field", "_field");
+        config.put("ignore_missing", true);
+
+        String processorTag = randomAsciiOfLength(10);
+
+        AttachmentProcessor processor = factory.create(null, processorTag, config);
+        assertThat(processor.getTag(), equalTo(processorTag));
+        assertThat(processor.getField(), equalTo("_field"));
+        assertThat(processor.getTargetField(), equalTo("attachment"));
+        assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
+        assertTrue(processor.isIgnoreMissing());
+    }
 }

+ 40 - 2
plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java

@@ -22,6 +22,7 @@ package org.elasticsearch.ingest.attachment;
 import org.apache.commons.io.IOUtils;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.ingest.IngestDocument;
+import org.elasticsearch.ingest.Processor;
 import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.test.ESTestCase;
 import org.junit.Before;
@@ -30,14 +31,17 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Base64;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
@@ -52,7 +56,7 @@ public class AttachmentProcessorTests extends ESTestCase {
     @Before
     public void createStandardProcessor() throws IOException {
         processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field",
-            "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000);
+            "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false);
     }
 
     public void testEnglishTextDocument() throws Exception {
@@ -85,7 +89,7 @@ public class AttachmentProcessorTests extends ESTestCase {
             selectedProperties.add(AttachmentProcessor.Property.DATE);
         }
         processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field",
-            "target_field", selectedProperties, 10000);
+            "target_field", selectedProperties, 10000, false);
 
         Map<String, Object> attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor);
         assertThat(attachmentData.keySet(), hasSize(selectedFieldNames.length));
@@ -199,6 +203,40 @@ public class AttachmentProcessorTests extends ESTestCase {
         assertThat(attachmentData.get("content_length"), is(notNullValue()));
     }
 
+    public void testNullValueWithIgnoreMissing() throws Exception {
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
+    }
+
+    public void testNonExistentWithIgnoreMissing() throws Exception {
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
+    }
+
+    public void testNullWithoutIgnoreMissing() throws Exception {
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse."));
+    }
+
+    public void testNonExistentWithoutIgnoreMissing() throws Exception {
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
+    }
+
     private Map<String, Object> parseDocument(String file, AttachmentProcessor processor) throws Exception {
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", getAsBase64(file));

+ 19 - 3
plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java

@@ -50,6 +50,7 @@ import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.Processor;
 
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
+import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
 import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
 
@@ -63,18 +64,32 @@ public final class GeoIpProcessor extends AbstractProcessor {
     private final String targetField;
     private final DatabaseReader dbReader;
     private final Set<Property> properties;
+    private final boolean ignoreMissing;
 
-    GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties) throws IOException {
+    GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties,
+                   boolean ignoreMissing) throws IOException {
         super(tag);
         this.field = field;
         this.targetField = targetField;
         this.dbReader = dbReader;
         this.properties = properties;
+        this.ignoreMissing = ignoreMissing;
+    }
+
+    boolean isIgnoreMissing() {
+        return ignoreMissing;
     }
 
     @Override
     public void execute(IngestDocument ingestDocument) {
-        String ip = ingestDocument.getFieldValue(field, String.class);
+        String ip = ingestDocument.getFieldValue(field, String.class, ignoreMissing);
+
+        if (ip == null && ignoreMissing) {
+            return;
+        } else if (ip == null) {
+            throw new IllegalArgumentException("field [" + field + "] is null, cannot extract geoip information.");
+        }
+
         final InetAddress ipAddress = InetAddresses.forString(ip);
 
         Map<String, Object> geoData;
@@ -268,6 +283,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
             String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "geoip");
             String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb.gz");
             List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
+            boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
 
             DatabaseReader databaseReader = databaseReaders.get(databaseFile);
             if (databaseReader == null) {
@@ -298,7 +314,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
                 }
             }
 
-            return new GeoIpProcessor(processorTag, ipField, databaseReader, targetField, properties);
+            return new GeoIpProcessor(processorTag, ipField, databaseReader, targetField, properties, ignoreMissing);
         }
     }
 

+ 22 - 0
plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java

@@ -85,6 +85,24 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTargetField(), equalTo("geoip"));
         assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-City"));
         assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_CITY_PROPERTIES));
+        assertFalse(processor.isIgnoreMissing());
+    }
+
+    public void testSetIgnoreMissing() throws Exception {
+        GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
+
+        Map<String, Object> config = new HashMap<>();
+        config.put("field", "_field");
+        config.put("ignore_missing", true);
+        String processorTag = randomAsciiOfLength(10);
+
+        GeoIpProcessor processor = factory.create(null, processorTag, config);
+        assertThat(processor.getTag(), equalTo(processorTag));
+        assertThat(processor.getField(), equalTo("_field"));
+        assertThat(processor.getTargetField(), equalTo("geoip"));
+        assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-City"));
+        assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_CITY_PROPERTIES));
+        assertTrue(processor.isIgnoreMissing());
     }
 
     public void testCountryBuildDefaults() throws Exception {
@@ -102,6 +120,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTargetField(), equalTo("geoip"));
         assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-Country"));
         assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_COUNTRY_PROPERTIES));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildTargetField() throws Exception {
@@ -112,6 +131,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
         GeoIpProcessor processor = factory.create(null, null, config);
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getTargetField(), equalTo("_field"));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildDbFile() throws Exception {
@@ -124,6 +144,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTargetField(), equalTo("geoip"));
         assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-Country"));
         assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_COUNTRY_PROPERTIES));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildWithCountryDbAndCityFields() throws Exception {
@@ -174,6 +195,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
         GeoIpProcessor processor = factory.create(null, null, config);
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getProperties(), equalTo(properties));
+        assertFalse(processor.isIgnoreMissing());
     }
 
     public void testBuildIllegalFieldOption() throws Exception {

+ 52 - 7
plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java

@@ -20,17 +20,20 @@
 package org.elasticsearch.ingest.geoip;
 
 import com.maxmind.geoip2.DatabaseReader;
+import org.elasticsearch.ingest.Processor;
 import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.zip.GZIPInputStream;
 
+import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
@@ -40,7 +43,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testCity() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "8.8.8.8");
@@ -64,10 +67,52 @@ public class GeoIpProcessorTests extends ESTestCase {
         assertThat(geoData.get("location"), equalTo(location));
     }
 
+    public void testNullValueWithIgnoreMissing() throws Exception {
+        InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
+        GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), true);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
+    }
+
+    public void testNonExistentWithIgnoreMissing() throws Exception {
+        InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
+        GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), true);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
+    }
+
+    public void testNullWithoutIgnoreMissing() throws Exception {
+        InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
+        GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot extract geoip information."));
+    }
+
+    public void testNonExistentWithoutIgnoreMissing() throws Exception {
+        InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
+        GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
+    }
+
     public void testCity_withIpV6() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         String address = "2602:306:33d3:8000::3257:9652";
         Map<String, Object> document = new HashMap<>();
@@ -95,7 +140,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testCityWithMissingLocation() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "93.114.45.13");
@@ -112,7 +157,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testCountry() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "82.170.213.79");
@@ -132,7 +177,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testCountryWithMissingLocation() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+            new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "93.114.45.13");
@@ -149,7 +194,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testAddressIsNotInTheDatabase() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "127.0.0.1");
@@ -162,7 +207,7 @@ public class GeoIpProcessorTests extends ESTestCase {
     public void testInvalid() throws Exception {
         InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
         GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
-                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
+                new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
 
         Map<String, Object> document = new HashMap<>();
         document.put("source_field", "www.google.com");

+ 23 - 9
plugins/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java

@@ -34,6 +34,7 @@ import java.util.Map;
 import java.util.Set;
 
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
+import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
 import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
 
@@ -44,20 +45,32 @@ public class UserAgentProcessor extends AbstractProcessor {
     private final String field;
     private final String targetField;
     private final Set<Property> properties;
-    
     private final UserAgentParser parser;
+    private final boolean ignoreMissing;
 
-    public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties) {
+    public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties,
+                              boolean ignoreMissing) {
         super(tag);
         this.field = field;
         this.targetField = targetField;
         this.parser = parser;
         this.properties = properties;
+        this.ignoreMissing = ignoreMissing;
+    }
+
+    boolean isIgnoreMissing() {
+        return ignoreMissing;
     }
 
     @Override
     public void execute(IngestDocument ingestDocument) throws Exception {
-        String userAgent = ingestDocument.getFieldValue(field, String.class);
+        String userAgent = ingestDocument.getFieldValue(field, String.class, ignoreMissing);
+
+        if (userAgent == null && ignoreMissing) {
+            return;
+        } else if (userAgent == null) {
+            throw new IllegalArgumentException("field [" + field + "] is null, cannot parse user-agent.");
+        }
 
         Details uaClient = parser.parse(userAgent);
 
@@ -99,7 +112,7 @@ public class UserAgentProcessor extends AbstractProcessor {
                     else {
                         uaDetails.put("os", "Other");
                     }
-                    
+
                     break;
                 case OS_NAME:
                     if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
@@ -168,7 +181,7 @@ public class UserAgentProcessor extends AbstractProcessor {
     public String getType() {
         return TYPE;
     }
-    
+
     String getField() {
         return field;
     }
@@ -180,7 +193,7 @@ public class UserAgentProcessor extends AbstractProcessor {
     Set<Property> getProperties() {
         return properties;
     }
-    
+
     UserAgentParser getUaParser() {
         return parser;
     }
@@ -188,7 +201,7 @@ public class UserAgentProcessor extends AbstractProcessor {
     public static final class Factory implements Processor.Factory {
 
         private final Map<String, UserAgentParser> userAgentParsers;
-        
+
         public Factory(Map<String, UserAgentParser> userAgentParsers) {
             this.userAgentParsers = userAgentParsers;
         }
@@ -200,13 +213,14 @@ public class UserAgentProcessor extends AbstractProcessor {
             String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "user_agent");
             String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
             List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
+            boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
 
             UserAgentParser parser = userAgentParsers.get(regexFilename);
             if (parser == null) {
                 throw newConfigurationException(TYPE, processorTag,
                         "regex_file", "regex file [" + regexFilename + "] doesn't exist (has to exist at node startup)");
             }
-            
+
             final Set<Property> properties;
             if (propertyNames != null) {
                 properties = EnumSet.noneOf(Property.class);
@@ -221,7 +235,7 @@ public class UserAgentProcessor extends AbstractProcessor {
                 properties = EnumSet.allOf(Property.class);
             }
 
-            return new UserAgentProcessor(processorTag, field, targetField, parser, properties);
+            return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing);
         }
     }
 

+ 21 - 0
plugins/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorFactoryTests.java

@@ -89,6 +89,27 @@ public class UserAgentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
         assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
         assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
+        assertFalse(processor.isIgnoreMissing());
+    }
+
+    public void testBuildWithIgnoreMissing() throws Exception {
+        UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
+
+        Map<String, Object> config = new HashMap<>();
+        config.put("field", "_field");
+        config.put("ignore_missing", true);
+
+        String processorTag = randomAsciiOfLength(10);
+
+        UserAgentProcessor processor = factory.create(null, processorTag, config);
+        assertThat(processor.getTag(), equalTo(processorTag));
+        assertThat(processor.getField(), equalTo("_field"));
+        assertThat(processor.getTargetField(), equalTo("user_agent"));
+        assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
+        assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
+        assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
+        assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
+        assertTrue(processor.isIgnoreMissing());
     }
 
     public void testBuildTargetField() throws Exception {

+ 65 - 24
plugins/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java

@@ -27,55 +27,96 @@ import org.junit.BeforeClass;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.Map;
 
+import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasKey;
 import static org.hamcrest.Matchers.is;
 
 public class UserAgentProcessorTests extends ESTestCase {
 
     private static UserAgentProcessor processor;
-    
+
     @BeforeClass
     public static void setupProcessor() throws IOException {
         InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yaml");
         assertNotNull(regexStream);
-        
+
         UserAgentParser parser = new UserAgentParser(randomAsciiOfLength(10), regexStream, new UserAgentCache(1000));
-        
+
         processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", parser,
-                EnumSet.allOf(UserAgentProcessor.Property.class));
+                EnumSet.allOf(UserAgentProcessor.Property.class), false);
+    }
+
+    public void testNullValueWithIgnoreMissing() throws Exception {
+        UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
+            EnumSet.allOf(UserAgentProcessor.Property.class), true);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
+    }
+
+    public void testNonExistentWithIgnoreMissing() throws Exception {
+        UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
+            EnumSet.allOf(UserAgentProcessor.Property.class), true);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        processor.execute(ingestDocument);
+        assertIngestDocument(originalIngestDocument, ingestDocument);
     }
-    
+
+    public void testNullWithoutIgnoreMissing() throws Exception {
+        UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
+            EnumSet.allOf(UserAgentProcessor.Property.class), false);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
+            Collections.singletonMap("source_field", null));
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse user-agent."));
+    }
+
+    public void testNonExistentWithoutIgnoreMissing() throws Exception {
+        UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
+            EnumSet.allOf(UserAgentProcessor.Property.class), false);
+        IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
+        IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
+        Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
+        assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
+    }
+
     @SuppressWarnings("unchecked")
     public void testCommonBrowser() throws Exception {
         Map<String, Object> document = new HashMap<>();
         document.put("source_field",
             "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36");
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
-        
+
         processor.execute(ingestDocument);
         Map<String, Object> data = ingestDocument.getSourceAndMetadata();
 
         assertThat(data, hasKey("target_field"));
         Map<String, Object> target = (Map<String, Object>) data.get("target_field");
-        
+
         assertThat(target.get("name"), is("Chrome"));
         assertThat(target.get("major"), is("33"));
         assertThat(target.get("minor"), is("0"));
         assertThat(target.get("patch"), is("1750"));
         assertNull(target.get("build"));
-        
+
         assertThat(target.get("os"), is("Mac OS X 10.9.2"));
         assertThat(target.get("os_name"), is("Mac OS X"));
         assertThat(target.get("os_major"), is("10"));
         assertThat(target.get("os_minor"), is("9"));
-        
+
         assertThat(target.get("device"), is("Other"));
     }
-    
+
     @SuppressWarnings("unchecked")
     public void testUncommonDevice() throws Exception {
         Map<String, Object> document = new HashMap<>();
@@ -83,78 +124,78 @@ public class UserAgentProcessorTests extends ESTestCase {
                 "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10+ "
                 + "(KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2");
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
-        
+
         processor.execute(ingestDocument);
         Map<String, Object> data = ingestDocument.getSourceAndMetadata();
 
         assertThat(data, hasKey("target_field"));
         Map<String, Object> target = (Map<String, Object>) data.get("target_field");
-        
+
         assertThat(target.get("name"), is("Android"));
         assertThat(target.get("major"), is("3"));
         assertThat(target.get("minor"), is("0"));
         assertNull(target.get("patch"));
         assertNull(target.get("build"));
-        
+
         assertThat(target.get("os"), is("Android 3.0"));
         assertThat(target.get("os_name"), is("Android"));
         assertThat(target.get("os_major"), is("3"));
         assertThat(target.get("os_minor"), is("0"));
-        
+
         assertThat(target.get("device"), is("Motorola Xoom"));
     }
-    
+
     @SuppressWarnings("unchecked")
     public void testSpider() throws Exception {
         Map<String, Object> document = new HashMap<>();
         document.put("source_field",
             "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)");
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
-        
+
         processor.execute(ingestDocument);
         Map<String, Object> data = ingestDocument.getSourceAndMetadata();
 
         assertThat(data, hasKey("target_field"));
         Map<String, Object> target = (Map<String, Object>) data.get("target_field");
-        
+
         assertThat(target.get("name"), is("EasouSpider"));
         assertNull(target.get("major"));
         assertNull(target.get("minor"));
         assertNull(target.get("patch"));
         assertNull(target.get("build"));
-        
+
         assertThat(target.get("os"), is("Other"));
         assertThat(target.get("os_name"), is("Other"));
         assertNull(target.get("os_major"));
         assertNull(target.get("os_minor"));
-        
+
         assertThat(target.get("device"), is("Spider"));
     }
-    
+
     @SuppressWarnings("unchecked")
     public void testUnknown() throws Exception {
         Map<String, Object> document = new HashMap<>();
         document.put("source_field",
             "Something I made up v42.0.1");
         IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
-        
+
         processor.execute(ingestDocument);
         Map<String, Object> data = ingestDocument.getSourceAndMetadata();
 
         assertThat(data, hasKey("target_field"));
         Map<String, Object> target = (Map<String, Object>) data.get("target_field");
-        
+
         assertThat(target.get("name"), is("Other"));
         assertNull(target.get("major"));
         assertNull(target.get("minor"));
         assertNull(target.get("patch"));
         assertNull(target.get("build"));
-        
+
         assertThat(target.get("os"), is("Other"));
         assertThat(target.get("os_name"), is("Other"));
         assertNull(target.get("os_major"));
         assertNull(target.get("os_minor"));
-        
+
         assertThat(target.get("device"), is("Other"));
     }
 }