Browse Source

Reject regex search if regex string is too long (#28542)

* Reject regex search if regex string is too long (#28344)

* Add docs

* Introduce index level setting `index.max_regex_length`
 to control the maximum length of the regular expression

Closes #28344
Ke Li 7 years ago
parent
commit
a77273fc01

+ 4 - 0
docs/reference/index-modules.asciidoc

@@ -209,6 +209,10 @@ specific index module:
     The maximum number of terms that can be used in Terms Query.
     Defaults to `65536`.
 
+ `index.max_regex_length`::
+
+    The maximum length of regex that can be used in Regexp Query.
+    Defaults to `1000`.
 
 [float]
 === Settings in other index modules

+ 8 - 0
docs/reference/migration/migrate_7_0/search.asciidoc

@@ -58,3 +58,11 @@ as each additional term demands extra processing and memory.
 To safeguard against this, the maximum number of terms that can be used in a
 Terms Query request has been limited to 65536. This default maximum can be changed
 for a particular index with the index setting `index.max_terms_count`.
+
+
+==== Limiting the length of regex that can be used in a Regexp Query request
+
+Executing a Regexp Query with a long regex string may degrade search performance.
+To safeguard against this, the maximum length of regex that can be used in a
+Regexp Query request has been limited to 1000. This default maximum can be changed
+for a particular index with the index setting `index.max_regex_length`.

+ 4 - 0
docs/reference/query-dsl/regexp-query.asciidoc

@@ -91,4 +91,8 @@ GET /_search
 --------------------------------------------------
 // CONSOLE
 
+NOTE: By default the maximum length of regex string allowed in a Regexp Query 
+is limited to 1000. You can update the `index.max_regex_length` index setting 
+to bypass this limit.
+
 include::regexp-syntax.asciidoc[]

+ 28 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search/30_limits.yml

@@ -85,3 +85,31 @@ setup:
             "test1" : { "script" : { "lang": "painless", "source": "1;" }}
             "test2" : { "script" : { "lang": "painless", "source": "1;" }}
             "test3" : { "script" : { "lang": "painless", "source": "1;" }}
+
+---
+"Regexp length limit":
+  - skip:
+      version: " - 6.99.99"
+      reason: "The regex length limit was introduced in 7.0.0"
+
+  - do:
+      catch: /The length of regex \[1110\] used in the Regexp Query request has exceeded the allowed maximum of \[1000\]\. This maximum can be set by changing the \[index.max_regex_length\] index level setting\./
+      search:
+        index: test_1
+        body:
+          query:
+            regexp: 
+              foo: "^\\[\\]\\r\\\\]|\\\\.)*\\](?:(?:\\r\\n)?[\\t])*)(?:\\.(?:(?:\\r\\n)?[\\t])*(?:[^()<>@,;:\\\\\" |
+                   .\\[\\]\\000-\\031]+(?:(?:(?:\\r\\n)?[\\t])+|\\Z|(?=[\\[\"()<>@,;:\\\\\".\\[\\]]))|\\[([^\\[\\ |
+                   ]\\r\\\\]|\\\\.)*\\](?:(?:\\r\\n)?[\\t])*))*(?:,@(?:(?:\\r\\n)?[ \\t])*(?:[^()<>@,;:\\\\\".\\  |
+                   [\\]\\000-\\031]+(?:(?:(?:\\r\\n)?[\\t])+|\\Z|(?=[\\[\"()<>@,;:\\\\\".\\[\\]]))|\\[([^\\[\\]\\ |
+                   r\\\\]|\\\\.)*\\](?:(?:\\r\\n)?[\\t])*)(?:\\.(?:(?:\\r\\n)?[\\t])*(?:[^()<>@,;:\\\\\".\\[\\]   |
+                   \\000-\\031]+(?:(?:(?:\\r\\n)?[\\t])+|\\Z|(?=[\\[\"()<>@,;:\\\\\".\\[\\]]))|\\[([^\\[\\]\\r\\\\] |
+                   |\\\\.)*\\](?:(?:\\r\\n)?[\\t])*))*)*:(?:(?:\\r\\n)?[\\t])*)?(?:[^()<>@,;:\\\\\".\\[\\] \\0 |
+                   00-\\031]+(?:(?:(?:\\r\\n)?[\\t])+|\\Z|(?=[\\[\"()<>@,;:\\\\\".\\[\\]]))|\"(?:[^\\\"\\r\\\\]|\\\\ |
+                   .|(?:(?:\\r\\n)?[\\t]))*\"(?:(?:\\r\\n)?[\\t])*)(?:\\.(?:(?:\\r\\n)?[ \\t])*(?:[^()<>@, |
+                   ;:\\\\\".\\[\\]\\000-\\031]+(?:(?:(?:\\r\\n)?[\\t])+|\\Z|(?=[\\[\"()<>@,;:\\\\\".\\[\\]]))|\"(? |
+                   :[^\\\"\\r\\\\]|\\\\.|(?:(?:\\r\\n)?[\\t]))*\"(?:(?:\\r\\n)?[\\t])*))*@(?:(?:\\r\\n)?[ \\t])* |
+                   ]))|\\[([^\\[\\]\\r\\\\]|\\\\.)*\\](?:(?:\\r\\n)?[\\t])*))*\\>(?:(?:\\r\\n)?[ \\t])*)(?:,\\s*( |
+                   \".\\[\\]]))|\"(?:[^\\\"\\r\\\\]|\\\\.|(?:(?:\\r\\n)?[\\t]))*\"(?:(?:\\r\\n)?[ \\t])*)(?:\\.(?:( |
+                   \\[\"()<>@,;:\\\\\".\\[\\]]))|\"(?:[^\\\"\\r\\\\]|\\\\.|(?:(?:\\r\\n)?[\\t]))*\"(?:(?:\\r\\n)?[\\t"

+ 1 - 0
server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

@@ -127,6 +127,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
         IndexSettings.INDEX_CHECK_ON_STARTUP,
         IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD,
         IndexSettings.MAX_SLICES_PER_SCROLL,
+        IndexSettings.MAX_REGEX_LENGTH_SETTING,
         ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING,
         IndexSettings.INDEX_GC_DELETES_SETTING,
         IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,

+ 25 - 0
server/src/main/java/org/elasticsearch/index/IndexSettings.java

@@ -250,6 +250,12 @@ public final class IndexSettings {
     public static final Setting<Integer> MAX_SLICES_PER_SCROLL = Setting.intSetting("index.max_slices_per_scroll",
         1024, 1, Property.Dynamic, Property.IndexScope);
 
+    /**
+     * The maximum length of regex string allowed in a regexp query.
+     */
+    public static final Setting<Integer> MAX_REGEX_LENGTH_SETTING = Setting.intSetting("index.max_regex_length",
+        1000, 1, Property.Dynamic, Property.IndexScope);
+
     public static final String INDEX_MAPPING_SINGLE_TYPE_SETTING_KEY = "index.mapping.single_type";
     private static final Setting<Boolean> INDEX_MAPPING_SINGLE_TYPE_SETTING; // private - should not be registered
     static {
@@ -313,6 +319,12 @@ public final class IndexSettings {
      * The maximum number of slices allowed in a scroll request.
      */
     private volatile int maxSlicesPerScroll;
+
+    /**
+     * The maximum length of regex string allowed in a regexp query.
+     */
+    private volatile int maxRegexLength;
+
     /**
      * Whether the index is required to have at most one type.
      */
@@ -415,6 +427,7 @@ public final class IndexSettings {
         maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
         maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
         maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
+        maxRegexLength = scopedSettings.get(MAX_REGEX_LENGTH_SETTING);
         this.mergePolicyConfig = new MergePolicyConfig(logger, this);
         this.indexSortConfig = new IndexSortConfig(this);
         searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
@@ -462,6 +475,7 @@ public final class IndexSettings {
         scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
         scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
         scopedSettings.addSettingsUpdateConsumer(INDEX_SEARCH_IDLE_AFTER, this::setSearchIdleAfter);
+        scopedSettings.addSettingsUpdateConsumer(MAX_REGEX_LENGTH_SETTING, this::setMaxRegexLength);
     }
 
     private void setSearchIdleAfter(TimeValue searchIdleAfter) { this.searchIdleAfter = searchIdleAfter; }
@@ -823,6 +837,17 @@ public final class IndexSettings {
         this.maxSlicesPerScroll = value;
     }
 
+    /**
+     * The maximum length of regex string allowed in a regexp query.
+     */
+    public int getMaxRegexLength() {
+        return maxRegexLength;
+    }
+
+    private void setMaxRegexLength(int maxRegexLength) {
+        this.maxRegexLength = maxRegexLength;
+    }
+
     /**
      * Returns the index sort config that should be used for this index.
      */

+ 9 - 0
server/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java

@@ -33,6 +33,7 @@ import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.support.QueryParsers;
 
@@ -239,6 +240,14 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
 
     @Override
     protected Query doToQuery(QueryShardContext context) throws QueryShardException, IOException {
+        final int maxAllowedRegexLength = context.getIndexSettings().getMaxRegexLength();
+        if (value.length() > maxAllowedRegexLength) {
+            throw new IllegalArgumentException(
+                "The length of regex ["  + value.length() +  "] used in the Regexp Query request has exceeded " +
+                    "the allowed maximum of [" + maxAllowedRegexLength + "]. " +
+                    "This maximum can be set by changing the [" +
+                    IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey() + "] index level setting.");
+        }
         MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewrite, null, LoggingDeprecationHandler.INSTANCE);
 
         Query query = null;

+ 20 - 0
server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java

@@ -366,6 +366,26 @@ public class IndexSettingsTests extends ESTestCase {
                 settings.getMaxAdjacencyMatrixFilters());
     }
 
+    public void testMaxRegexLengthSetting() {
+        IndexMetaData metaData = newIndexMeta("index", Settings.builder()
+            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+            .put(IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey(), 99)
+            .build());
+        IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY);
+        assertEquals(99, settings.getMaxRegexLength());
+        settings.updateIndexMetaData(newIndexMeta("index",
+            Settings.builder().put(IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey(), 101).build()));
+        assertEquals(101, settings.getMaxRegexLength());
+        settings.updateIndexMetaData(newIndexMeta("index", Settings.EMPTY));
+        assertEquals(IndexSettings.MAX_REGEX_LENGTH_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxRegexLength());
+
+        metaData = newIndexMeta("index", Settings.builder()
+            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+            .build());
+        settings = new IndexSettings(metaData, Settings.EMPTY);
+        assertEquals(IndexSettings.MAX_REGEX_LENGTH_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxRegexLength());
+    }
+
     public void testGCDeletesSetting() {
         TimeValue gcDeleteSetting = new TimeValue(Math.abs(randomInt()), TimeUnit.MILLISECONDS);
         IndexMetaData metaData = newIndexMeta("index", Settings.builder()

+ 18 - 5
server/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java

@@ -19,7 +19,6 @@
 
 package org.elasticsearch.search.simple;
 
-import org.apache.lucene.util.Constants;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.action.search.SearchRequestBuilder;
@@ -30,7 +29,6 @@ import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.rest.RestStatus;
-import org.elasticsearch.search.SearchContextException;
 import org.elasticsearch.search.rescore.QueryRescorerBuilder;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -39,11 +37,9 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 
-import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsBoolean;
 import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
 import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
@@ -54,7 +50,6 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitC
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
 public class SimpleSearchIT extends ESIntegTestCase {
 
@@ -426,6 +421,24 @@ public class SimpleSearchIT extends ESIntegTestCase {
         }
     }
 
+    public void testTooLongRegexInRegexpQuery() throws Exception {
+        createIndex("idx");
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}", XContentType.JSON));
+
+        int defaultMaxRegexLength = IndexSettings.MAX_REGEX_LENGTH_SETTING.get(Settings.EMPTY);
+        StringBuilder regexp = new StringBuilder(defaultMaxRegexLength);
+        while (regexp.length() <= defaultMaxRegexLength) {
+            regexp.append("]\\r\\\\]|\\\\.)*\\](?:(?:\\r\\n)?[\\t])*))*(?:,@(?:(?:\\r\\n)?[ \\t])*(?:[^()<>@,;:\\\\\".\\");
+        }
+        SearchPhaseExecutionException e = expectThrows(SearchPhaseExecutionException.class,
+            () -> client().prepareSearch("idx").setQuery(QueryBuilders.regexpQuery("num", regexp.toString())).get());
+        assertThat(e.getRootCause().getMessage(), containsString("The length of regex [" +
+            regexp.length() + "] used in the Regexp Query request has exceeded " +
+            "the allowed maximum of [" + defaultMaxRegexLength + "]. " +
+            "This maximum can be set by changing the [" + IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey() +
+            "] index level setting."));
+    }
+
     private void assertWindowFails(SearchRequestBuilder search) {
         SearchPhaseExecutionException e = expectThrows(SearchPhaseExecutionException.class, () -> search.get());
         assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to: ["