Browse Source

Safeguard RegExp use against StackOverflowError (#84624)

Closes #82923 Closes #82923
Yannick Welsch 3 years ago
parent
commit
a00a85cd8f

+ 3 - 0
build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt

@@ -57,6 +57,9 @@ org.apache.lucene.index.NoMergePolicy#INSTANCE @ explicit use of NoMergePolicy r
 org.apache.lucene.search.TimeLimitingCollector#getGlobalTimerThread()
 org.apache.lucene.search.TimeLimitingCollector#getGlobalCounter()
 
+@defaultMessage use @org.elasticsearch.common.lucene.RegExp instead to avoid StackOverflowError
+org.apache.lucene.util.automaton.RegExp
+
 @defaultMessage Don't interrupt threads use FutureUtils#cancel(Future<T>) instead
 java.util.concurrent.Future#cancel(boolean)
 

+ 6 - 0
docs/changelog/84624.yaml

@@ -0,0 +1,6 @@
+pr: 84624
+summary: Safeguard `RegExp` use against `StackOverflowError`
+area: Search
+type: bug
+issues:
+ - 82923

+ 1 - 1
modules/apm/src/main/java/org/elasticsearch/tracing/apm/APMTracer.java

@@ -24,9 +24,9 @@ import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.component.AbstractLifecycleComponent;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;

+ 135 - 0
server/src/main/java/org/elasticsearch/common/lucene/RegExp.java

@@ -0,0 +1,135 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.common.lucene;
+
+import org.apache.lucene.util.automaton.Automaton;
+import org.elasticsearch.core.SuppressForbidden;
+
+/**
+ * Simple wrapper for {@link org.apache.lucene.util.automaton.RegExp} that
+ * avoids throwing {@link StackOverflowError}s when working with regular
+ * expressions as this will crash an Elasticsearch node. Instead, these
+ * {@linkplain StackOverflowError}s are turned into
+ * {@link IllegalArgumentException}s which elasticsearch returns as
+ * a http 400.
+ */
+public class RegExp {
+
+    @SuppressForbidden(reason = "this class is the trusted wrapper")
+    private final org.apache.lucene.util.automaton.RegExp wrapped;
+
+    @SuppressForbidden(reason = "catches StackOverflowError")
+    public RegExp(String s) {
+        try {
+            wrapped = new org.apache.lucene.util.automaton.RegExp(s);
+        } catch (StackOverflowError e) {
+            throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s);
+        }
+    }
+
+    @SuppressForbidden(reason = "catches StackOverflowError")
+    public RegExp(String s, int syntax_flags) {
+        try {
+            wrapped = new org.apache.lucene.util.automaton.RegExp(s, syntax_flags);
+        } catch (StackOverflowError e) {
+            throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s);
+        }
+    }
+
+    @SuppressForbidden(reason = "catches StackOverflowError")
+    public RegExp(String s, int syntax_flags, int match_flags) {
+        try {
+            wrapped = new org.apache.lucene.util.automaton.RegExp(s, syntax_flags, match_flags);
+        } catch (StackOverflowError e) {
+            throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s);
+        }
+    }
+
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    private RegExp(org.apache.lucene.util.automaton.RegExp wrapped) {
+        this.wrapped = wrapped;
+    }
+
+    @SuppressForbidden(reason = "catches StackOverflowError")
+    public Automaton toAutomaton() {
+        try {
+            return wrapped.toAutomaton();
+        } catch (StackOverflowError e) {
+            throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + this);
+        }
+    }
+
+    @SuppressForbidden(reason = "catches StackOverflowError")
+    public Automaton toAutomaton(int determinizeWorkLimit) {
+        try {
+            return wrapped.toAutomaton(determinizeWorkLimit);
+        } catch (StackOverflowError e) {
+            throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + this);
+        }
+    }
+
+    @SuppressForbidden(reason = "his class is the trusted wrapper")
+    public String getOriginalString() {
+        return wrapped.getOriginalString();
+    }
+
+    @Override
+    public String toString() {
+        // don't call wrapped.toString() to avoid StackOverflowError
+        return getOriginalString();
+    }
+
+    /**
+     * The type of expression.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public org.apache.lucene.util.automaton.RegExp.Kind kind() {
+        return wrapped.kind;
+    }
+
+    /**
+     * Child expressions held by a container type expression.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public RegExp exp1() {
+        return new RegExp(wrapped.exp1);
+    }
+
+    /**
+     * Child expressions held by a container type expression.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public RegExp exp2() {
+        return new RegExp(wrapped.exp2);
+    }
+
+    /**
+     * Limits for repeatable type expressions.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public int min() {
+        return wrapped.min;
+    }
+
+    /**
+     * String expression.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public String s() {
+        return wrapped.s;
+    }
+
+    /**
+     * Character expression.
+     */
+    @SuppressForbidden(reason = "we are the trusted wrapper")
+    public int c() {
+        return wrapped.c;
+    }
+}

+ 1 - 1
server/src/main/java/org/elasticsearch/indices/AssociatedIndexDescriptor.java

@@ -10,8 +10,8 @@ package org.elasticsearch.indices;
 
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.cluster.metadata.Metadata;
+import org.elasticsearch.common.lucene.RegExp;
 
 import java.util.List;
 import java.util.Objects;

+ 1 - 1
server/src/main/java/org/elasticsearch/indices/SystemIndexDescriptor.java

@@ -11,11 +11,11 @@ package org.elasticsearch.indices;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.common.xcontent.XContentHelper;

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java

@@ -19,13 +19,13 @@ import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
 import org.apache.lucene.util.hppc.BitMixer;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.search.DocValueFormat;

+ 1 - 1
server/src/main/java/org/elasticsearch/search/runtime/StringScriptFieldRegexpQuery.java

@@ -9,7 +9,7 @@
 package org.elasticsearch.search.runtime;
 
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
-import org.apache.lucene.util.automaton.RegExp;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.StringFieldScript;
 

+ 10 - 0
server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java

@@ -8,6 +8,8 @@
 
 package org.elasticsearch.search.aggregations.support;
 
+import joptsimple.internal.Strings;
+
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.BytesRef;
@@ -30,6 +32,8 @@ import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import static org.hamcrest.Matchers.equalTo;
+
 public class IncludeExcludeTests extends ESTestCase {
     public void testEmptyTermsWithOrds() throws IOException {
         IncludeExclude inexcl = new IncludeExclude(null, null, new TreeSet<>(Set.of(new BytesRef("foo"))), null);
@@ -356,4 +360,10 @@ public class IncludeExcludeTests extends ESTestCase {
         expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(regex, null, values, null));
         expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(null, regex, null, values));
     }
+
+    public void testLongIncludeExclude() {
+        String longString = Strings.repeat('a', 100000);
+        IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(longString, null, null, null));
+        assertThat(iae.getMessage(), equalTo("failed to parse regexp due to stack overflow: " + longString));
+    }
 }

+ 1 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java

@@ -11,9 +11,9 @@ import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.common.cache.Cache;
 import org.elasticsearch.common.cache.CacheBuilder;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.set.Sets;

+ 1 - 1
x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java

@@ -18,9 +18,9 @@ import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
-import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.common.geo.ShapeRelation;
 import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.time.DateMathParser;
 import org.elasticsearch.common.unit.Fuzziness;

+ 1 - 1
x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/regex/RLikePattern.java

@@ -7,7 +7,7 @@
 package org.elasticsearch.xpack.ql.expression.predicate.regex;
 
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.RegExp;
+import org.elasticsearch.common.lucene.RegExp;
 
 public class RLikePattern extends AbstractStringPattern {
 

+ 14 - 15
x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java

@@ -38,13 +38,12 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-import org.apache.lucene.util.automaton.RegExp.Kind;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.geo.ShapeRelation;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.lucene.RegExp;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.time.DateMathParser;
 import org.elasticsearch.common.unit.Fuzziness;
@@ -392,7 +391,7 @@ public class WildcardFieldMapper extends FieldMapper {
         // * Anything else is a concrete query that should be run on the ngram index.
         public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException {
             Query result = null;
-            switch (r.kind) {
+            switch (r.kind()) {
                 case REGEXP_UNION:
                     result = createUnionQuery(r);
                     break;
@@ -400,11 +399,11 @@ public class WildcardFieldMapper extends FieldMapper {
                     result = createConcatenationQuery(r);
                     break;
                 case REGEXP_STRING:
-                    String normalizedString = toLowerCase(r.s);
+                    String normalizedString = toLowerCase(r.s());
                     result = new TermQuery(new Term("", normalizedString));
                     break;
                 case REGEXP_CHAR:
-                    String cs = Character.toString(r.c);
+                    String cs = Character.toString(r.c());
                     String normalizedChar = toLowerCase(cs);
                     result = new TermQuery(new Term("", normalizedChar));
                     break;
@@ -415,8 +414,8 @@ public class WildcardFieldMapper extends FieldMapper {
 
                 case REGEXP_REPEAT_MIN:
                 case REGEXP_REPEAT_MINMAX:
-                    if (r.min > 0) {
-                        result = toApproximationQuery(r.exp1);
+                    if (r.min() > 0) {
+                        result = toApproximationQuery(r.exp1());
                         if (result instanceof TermQuery) {
                             // Wrap the repeating expression so that it is not concatenated by a parent which concatenates
                             // plain TermQuery objects together. Boolean queries are interpreted as a black box and not
@@ -454,8 +453,8 @@ public class WildcardFieldMapper extends FieldMapper {
         private static Query createConcatenationQuery(RegExp r) {
             // Create ANDs of expressions plus collapse consecutive TermQuerys into single longer ones
             ArrayList<Query> queries = new ArrayList<>();
-            findLeaves(r.exp1, Kind.REGEXP_CONCATENATION, queries);
-            findLeaves(r.exp2, Kind.REGEXP_CONCATENATION, queries);
+            findLeaves(r.exp1(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries);
+            findLeaves(r.exp2(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries);
             BooleanQuery.Builder bAnd = new BooleanQuery.Builder();
             StringBuilder sequence = new StringBuilder();
             for (Query query : queries) {
@@ -484,8 +483,8 @@ public class WildcardFieldMapper extends FieldMapper {
         private static Query createUnionQuery(RegExp r) {
             // Create an OR of clauses
             ArrayList<Query> queries = new ArrayList<>();
-            findLeaves(r.exp1, Kind.REGEXP_UNION, queries);
-            findLeaves(r.exp2, Kind.REGEXP_UNION, queries);
+            findLeaves(r.exp1(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
+            findLeaves(r.exp2(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries);
             BooleanQuery.Builder bOr = new BooleanQuery.Builder();
             HashSet<Query> uniqueClauses = new HashSet<>();
             for (Query query : queries) {
@@ -508,10 +507,10 @@ public class WildcardFieldMapper extends FieldMapper {
             return new MatchAllDocsQuery();
         }
 
-        private static void findLeaves(RegExp exp, Kind kind, List<Query> queries) {
-            if (exp.kind == kind) {
-                findLeaves(exp.exp1, kind, queries);
-                findLeaves(exp.exp2, kind, queries);
+        private static void findLeaves(RegExp exp, org.apache.lucene.util.automaton.RegExp.Kind kind, List<Query> queries) {
+            if (exp.kind() == kind) {
+                findLeaves(exp.exp1(), kind, queries);
+                findLeaves(exp.exp2(), kind, queries);
             } else {
                 queries.add(toApproximationQuery(exp));
             }