Browse Source

Deprecate 'remove_binary' default of false for ingest attachment processor (#90460)

This commit adds deprecation warning for when the `remove_binary`
setting is unset. In the future we want to change the default to `true`
(it is currently `false`), so this will let a user know they should be
explicit about setting this to ensure the behavior does not change in a
future (breaking) release.

Relates to #86014
Lee Hinman 3 years ago
parent
commit
4fe9fc488c

+ 10 - 0
docs/changelog/90460.yaml

@@ -0,0 +1,10 @@
+pr: 90460
+summary: Deprecate 'remove_binary' default of false for ingest attachment processor
+area: Ingest Node
+type: deprecation
+issues: []
+deprecation:
+  title: Deprecate 'remove_binary' default of false for ingest attachment processor
+  area: CRUD
+  details: The default "remove_binary" option for the attachment processor will be changed from false to true in a later Elasticsearch release. This means that the binary file sent to Elasticsearch will not be retained.
+  impact: Users should update the "remove_binary" option to be explicitly true or false, instead of relying on the default value, so that no default value changes will affect Elasticsearch.

+ 12 - 6
docs/reference/ingest/processors/attachment.asciidoc

@@ -57,7 +57,8 @@ PUT _ingest/pipeline/attachment
   "processors" : [
     {
       "attachment" : {
-        "field" : "data"
+        "field" : "data",
+        "remove_binary": false
       }
     }
   ]
@@ -141,7 +142,8 @@ PUT _ingest/pipeline/attachment
     {
       "attachment" : {
         "field" : "data",
-        "properties": [ "content", "title" ]
+        "properties": [ "content", "title" ],
+        "remove_binary": false
       }
     }
   ]
@@ -167,7 +169,8 @@ PUT _ingest/pipeline/cbor-attachment
   "processors" : [
     {
       "attachment" : {
-        "field" : "data"
+        "field" : "data",
+        "remove_binary": false
       }
     }
   ]
@@ -222,7 +225,8 @@ PUT _ingest/pipeline/attachment
       "attachment" : {
         "field" : "data",
         "indexed_chars" : 11,
-        "indexed_chars_field" : "max_size"
+        "indexed_chars_field" : "max_size",
+        "remove_binary": false
       }
     }
   ]
@@ -269,7 +273,8 @@ PUT _ingest/pipeline/attachment
       "attachment" : {
         "field" : "data",
         "indexed_chars" : 11,
-        "indexed_chars_field" : "max_size"
+        "indexed_chars_field" : "max_size",
+        "remove_binary": false
       }
     }
   ]
@@ -352,7 +357,8 @@ PUT _ingest/pipeline/attachment
         "processor": {
           "attachment": {
             "target_field": "_ingest._value.attachment",
-            "field": "_ingest._value.data"
+            "field": "_ingest._value.data",
+            "remove_binary": false
           }
         }
       }

+ 27 - 3
modules/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java

@@ -14,7 +14,10 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.ingest.AbstractProcessor;
 import org.elasticsearch.ingest.IngestDocument;
 import org.elasticsearch.ingest.Processor;
@@ -30,16 +33,18 @@ import java.util.Set;
 import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
 import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty;
+import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalBooleanProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
 import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty;
 import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
 
 public final class AttachmentProcessor extends AbstractProcessor {
 
-    public static final String TYPE = "attachment";
-
+    private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(AttachmentProcessor.class);
     private static final int NUMBER_OF_CHARS_INDEXED = 100000;
 
+    public static final String TYPE = "attachment";
+
     private final String field;
     private final String targetField;
     private final Set<Property> properties;
@@ -221,6 +226,15 @@ public final class AttachmentProcessor extends AbstractProcessor {
 
         static final Set<Property> DEFAULT_PROPERTIES = EnumSet.allOf(Property.class);
 
+        static {
+            if (Version.CURRENT.major >= 9) {
+                throw new IllegalStateException(
+                    "[poison pill] update the [remove_binary] default to be 'true' assuming "
+                        + "enough time has passed. Deprecated in September 2022."
+                );
+            }
+        }
+
         @Override
         public AttachmentProcessor create(
             Map<String, Processor.Factory> registry,
@@ -235,7 +249,17 @@ public final class AttachmentProcessor extends AbstractProcessor {
             int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED);
             boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
             String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field");
-            boolean removeBinary = readBooleanProperty(TYPE, processorTag, config, "remove_binary", false);
+            Boolean removeBinary = readOptionalBooleanProperty(TYPE, processorTag, config, "remove_binary");
+            if (removeBinary == null) {
+                DEPRECATION_LOGGER.warn(
+                    DeprecationCategory.PARSING,
+                    "attachment-remove-binary",
+                    "The default [remove_binary] value of 'false' is deprecated and will be "
+                        + "set to 'true' in a future release. Set [remove_binary] explicitly to "
+                        + "'true' or 'false' to ensure no behavior change."
+                );
+                removeBinary = false;
+            }
 
             final Set<Property> properties;
             if (propertyNames != null) {

+ 36 - 0
modules/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java

@@ -41,6 +41,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTargetField(), equalTo("attachment"));
         assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
         assertFalse(processor.isIgnoreMissing());
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testConfigureIndexedChars() throws Exception {
@@ -54,6 +60,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTag(), equalTo(processorTag));
         assertThat(processor.getIndexedChars(), is(indexedChars));
         assertFalse(processor.isIgnoreMissing());
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testBuildTargetField() throws Exception {
@@ -64,6 +76,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getTargetField(), equalTo("_field"));
         assertFalse(processor.isIgnoreMissing());
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testBuildFields() throws Exception {
@@ -82,6 +100,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getField(), equalTo("_field"));
         assertThat(processor.getProperties(), equalTo(properties));
         assertFalse(processor.isIgnoreMissing());
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testBuildIllegalFieldOption() throws Exception {
@@ -108,6 +132,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         } catch (ElasticsearchParseException e) {
             assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
         }
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testIgnoreMissing() throws Exception {
@@ -123,6 +153,12 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
         assertThat(processor.getTargetField(), equalTo("attachment"));
         assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
         assertTrue(processor.isIgnoreMissing());
+
+        assertWarnings(
+            "The default [remove_binary] value of 'false' is deprecated "
+                + "and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true'"
+                + " or 'false' to ensure no behavior change."
+        );
     }
 
     public void testRemoveBinary() throws Exception {

+ 8 - 4
modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/20_attachment_processor.yml

@@ -9,7 +9,8 @@
             "processors": [
               {
                 "attachment" : {
-                  "field" : "field1"
+                  "field" : "field1",
+                  "remove_binary": false
                 }
               }
             ]
@@ -50,7 +51,8 @@
               {
                 "attachment" : {
                   "field" : "field1",
-                  "properties" : ["language"]
+                  "properties" : ["language"],
+                  "remove_binary": false
                 }
               }
             ]
@@ -84,7 +86,8 @@
               {
                 "attachment" : {
                   "field" : "field1",
-                  "indexed_chars": 30
+                  "indexed_chars": 30,
+                  "remove_binary": true
                 }
               }
             ]
@@ -120,7 +123,8 @@
                 "attachment" : {
                   "field" : "field1",
                   "indexed_chars": 30,
-                  "indexed_chars_field": "max_size"
+                  "indexed_chars_field": "max_size",
+                  "remove_binary": true
                 }
               }
             ]

+ 4 - 2
modules/ingest-attachment/src/yamlRestTest/resources/rest-api-spec/test/ingest_attachment/30_files_supported.yml

@@ -12,7 +12,8 @@
             "processors": [
               {
                 "attachment" : {
-                  "field" : "field1"
+                  "field" : "field1",
+                  "remove_binary": true
                 }
               }
             ]
@@ -55,7 +56,8 @@
             "processors": [
               {
                 "attachment" : {
-                  "field" : "field1"
+                  "field" : "field1",
+                  "remove_binary": true
                 }
               }
             ]

+ 12 - 0
server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java

@@ -14,6 +14,7 @@ import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.script.ScriptType;
@@ -216,6 +217,17 @@ public final class ConfigurationUtils {
         }
     }
 
+    @Nullable
+    public static Boolean readOptionalBooleanProperty(
+        String processorType,
+        String processorTag,
+        Map<String, Object> configuration,
+        String propertyName
+    ) {
+        Object value = configuration.remove(propertyName);
+        return readBoolean(processorType, processorTag, propertyName, value);
+    }
+
     private static Boolean readBoolean(String processorType, String processorTag, String propertyName, Object value) {
         if (value == null) {
             return null;