Browse Source

Catching StackOverflowErrors from bad regexes in GsubProcessor and rethrowing as an Exception (#106851)

Keith Massey 1 year ago
parent
commit
ef16be9303

+ 5 - 0
docs/changelog/106851.yaml

@@ -0,0 +1,5 @@
+pr: 106851
+summary: Catching `StackOverflowErrors` from bad regexes in `GsubProcessor`
+area: Ingest Node
+type: bug
+issues: []

+ 17 - 1
modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GsubProcessor.java

@@ -8,6 +8,9 @@
 
 package org.elasticsearch.ingest.common;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -21,6 +24,7 @@ import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
 public final class GsubProcessor extends AbstractStringProcessor<String> {
 
     public static final String TYPE = "gsub";
+    private static final Logger logger = LogManager.getLogger(GsubProcessor.class);
 
     private final Pattern pattern;
     private final String replacement;
@@ -49,7 +53,19 @@ public final class GsubProcessor extends AbstractStringProcessor<String> {
 
     @Override
     protected String process(String value) {
-        return pattern.matcher(value).replaceAll(replacement);
+        try {
+            return pattern.matcher(value).replaceAll(replacement);
+        } catch (StackOverflowError e) {
+            /*
+             * A bad regex on problematic data can trigger a StackOverflowError. In this case we can safely recover from the
+             * StackOverflowError, so we rethrow it as an Exception instead. This way the document fails this processor, but processing
+             * can carry on. The value would be useful to log here, but we do not do so for because we do not want to write potentially
+             * sensitive data to the logs.
+             */
+            String message = "Caught a StackOverflowError while processing gsub pattern: [" + pattern + "]";
+            logger.trace(message, e);
+            throw new IllegalArgumentException(message);
+        }
     }
 
     @Override

+ 23 - 0
modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GsubProcessorTests.java

@@ -8,6 +8,10 @@
 
 package org.elasticsearch.ingest.common;
 
+import org.elasticsearch.ingest.IngestDocument;
+import org.elasticsearch.ingest.RandomDocumentPicks;
+
+import java.util.Map;
 import java.util.regex.Pattern;
 
 public class GsubProcessorTests extends AbstractStringProcessorTestCase<String> {
@@ -26,4 +30,23 @@ public class GsubProcessorTests extends AbstractStringProcessorTestCase<String>
     protected String expectedResult(String input) {
         return "127-0-0-1";
     }
+
+    public void testStackOverflow() {
+        // This tests that we rethrow StackOverflowErrors as ElasticsearchExceptions so that we don't take down the node
+        String badRegex = "( (?=(?:[^'\"]|'[^']*'|\"[^\"]*\")*$))";
+        GsubProcessor processor = new GsubProcessor(
+            randomAlphaOfLength(10),
+            null,
+            "field",
+            Pattern.compile(badRegex),
+            "-",
+            false,
+            "targetField"
+        );
+        StringBuilder badSourceBuilder = new StringBuilder("key1=x key2=");
+        badSourceBuilder.append("x".repeat(3000));
+        Map<String, Object> source = Map.of("field", badSourceBuilder.toString());
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), source);
+        IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument));
+    }
 }