Browse Source

Introduce limit to the number of terms in Terms Query (#27968)

- Introduce index level settings to control the maximum number of terms
    that can be used in a Terms Query
- Throw an error if a request exceeds this max number

Closes #18829
Mayya Sharipova 7 years ago
parent
commit
dcde895f49

+ 1 - 0
core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

@@ -119,6 +119,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
         IndexSettings.MAX_RESCORE_WINDOW_SETTING,
         IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
         IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
+        IndexSettings.MAX_TERMS_COUNT_SETTING,
         IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
         IndexSettings.DEFAULT_FIELD_SETTING,
         IndexSettings.QUERY_STRING_LENIENT_SETTING,

+ 19 - 0
core/src/main/java/org/elasticsearch/index/IndexSettings.java

@@ -129,6 +129,15 @@ public final class IndexSettings {
     public static final Setting<Integer> MAX_ANALYZED_OFFSET_SETTING =
         Setting.intSetting("index.highlight.max_analyzed_offset", 10000, 1, Property.Dynamic, Property.IndexScope);
 
+
+    /**
+     * Index setting describing the maximum number of terms that can be used in Terms Query.
+     * The default maximum of 65536 terms is defensive, as extra processing and memory is involved
+     * for each additional term, and a large number of terms degrade the cluster performance.
+     */
+    public static final Setting<Integer> MAX_TERMS_COUNT_SETTING =
+        Setting.intSetting("index.max_terms_count", 65536, 1, Property.Dynamic, Property.IndexScope);
+
     /**
      * Index setting describing for NGramTokenizer and NGramTokenFilter
      * the maximum difference between
@@ -287,6 +296,7 @@ public final class IndexSettings {
     private volatile boolean TTLPurgeDisabled;
     private volatile TimeValue searchIdleAfter;
     private volatile int maxAnalyzedOffset;
+    private volatile int maxTermsCount;
 
     /**
      * The maximum number of refresh listeners allows on this shard.
@@ -397,6 +407,7 @@ public final class IndexSettings {
         maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
         maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
         maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
+        maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
         this.mergePolicyConfig = new MergePolicyConfig(logger, this);
         this.indexSortConfig = new IndexSortConfig(this);
         searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
@@ -440,6 +451,7 @@ public final class IndexSettings {
         scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
         scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
         scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
+        scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
         scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
         scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
         scopedSettings.addSettingsUpdateConsumer(INDEX_SEARCH_IDLE_AFTER, this::setSearchIdleAfter);
@@ -734,6 +746,13 @@ public final class IndexSettings {
 
     private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; }
 
+    /**
+     *  Returns the maximum number of terms that can be used in a Terms Query request
+     */
+    public int getMaxTermsCount() { return this.maxTermsCount; }
+
+    private void setMaxTermsCount (int maxTermsCount) { this.maxTermsCount = maxTermsCount; }
+
     /**
      * Returns the maximum number of allowed script_fields to retrieve in a search request
      */

+ 8 - 0
core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java

@@ -39,6 +39,7 @@ import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.indices.TermsLookup;
 
@@ -416,6 +417,13 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
         if (values == null || values.isEmpty()) {
             return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
         }
+        int maxTermsCount = context.getIndexSettings().getMaxTermsCount();
+        if (values.size() > maxTermsCount){
+            throw new IllegalArgumentException(
+                "The number of terms ["  + values.size() +  "] used in the Terms Query request has exceeded " +
+                    "the allowed maximum of [" + maxTermsCount + "]. " + "This maximum can be set by changing the [" +
+                    IndexSettings.MAX_TOKEN_COUNT_SETTING.getKey() + "] index level setting.");
+        }
         MappedFieldType fieldType = context.fieldMapper(fieldName);
 
         if (fieldType != null) {

+ 5 - 0
docs/reference/index-modules.asciidoc

@@ -204,6 +204,11 @@ specific index module:
      This setting is only applicable when highlighting is requested on a text that was indexed without offsets or term vectors.
      Defaults to `10000`.
 
+ `index.max_terms_count`::
+
+    The maximum number of terms that can be used in Terms Query.
+    Defaults to `65536`.
+
 
 [float]
 === Settings in other index modules

+ 9 - 0
docs/reference/migration/migrate_7_0/search.asciidoc

@@ -49,3 +49,12 @@ removed.
 
 * 	`levenstein` - replaced by `levenshtein`
 * 	`jarowinkler` - replaced by `jaro_winkler`
+
+
+==== Limiting the number of terms that can be used in a Terms Query request
+
+Executing a Terms Query with a lot of terms may degrade the cluster performance,
+as each additional term demands extra processing and memory.
+To safeguard against this, the maximum number of terms that can be used in a
+Terms Query request has been limited to 65536. This default maximum can be changed
+for a particular index with the index setting `index.max_terms_count`.

+ 8 - 0
docs/reference/query-dsl/terms-query.asciidoc

@@ -57,6 +57,14 @@ across all nodes if the "reference" terms data is not large. The lookup
 terms filter will prefer to execute the get request on a local node if
 possible, reducing the need for networking.
 
+[WARNING]
+Executing a Terms Query request with a lot of terms can be quite slow,
+as each additional term demands extra processing and memory.
+To safeguard against this, the maximum number of terms that can be used
+in a Terms Query both directly or through lookup has been limited to `65536`.
+This default maximum can be changed for a particular index with the index setting
+ `index.max_terms_count`.
+
 [float]
 ===== Terms lookup twitter example
 At first we index the information for user with id 2, specifically, its

+ 55 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search/170_terms_query.yml

@@ -0,0 +1,55 @@
+---
+"Terms Query with No.of terms exceeding index.max_terms_count should FAIL":
+  - skip:
+      version: " - 6.99.99"
+      reason: index.max_terms_count setting has been added in 7.0.0
+  - do:
+      indices.create:
+          index: test_index
+          body:
+              settings:
+                  number_of_shards: 1
+                  index.max_terms_count: 2
+              mappings:
+                  test_type:
+                      properties:
+                          user:
+                              type: keyword
+                          followers:
+                              type: keyword
+  - do:
+      bulk:
+          refresh: true
+          body:
+              - '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u1"}}'
+              - '{"user": "u1", "followers": ["u2", "u3"]}'
+              - '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u2"}}'
+              - '{"user": "u2", "followers": ["u1", "u3", "u4"]}'
+              - '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u3"}}'
+              - '{"user": "u3", "followers": ["u1"]}'
+              - '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u4"}}'
+              - '{"user": "u4", "followers": ["u3"]}'
+
+  - do:
+      search:
+          index: test_index
+          body: {"query" : {"terms" : {"user" : ["u1", "u2"]}}}
+  - match: { hits.total: 2 }
+
+  - do:
+      catch: bad_request
+      search:
+          index: test_index
+          body: {"query" : {"terms" : {"user" : ["u1", "u2", "u3"]}}}
+
+  - do:
+      search:
+          index: test_index
+          body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u1", "path" : "followers"}}}}
+  - match: { hits.total: 2 }
+
+  - do:
+      catch: bad_request
+      search:
+          index: test_index
+          body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u2", "path" : "followers"}}}}