Browse Source

[search] Limit the size of the result window

Requesting a million hits, or page 100,000 is always a bad idea, but users
may not be aware of this. This adds a per-index limit on the maximum size +
from that can be requested which defaults to 10,000.

This should not interfere with deep-scrolling.

Closes #9311
Nik Everett 10 years ago
parent
commit
e4981968ad

+ 2 - 0
core/src/main/java/org/elasticsearch/cluster/ClusterModule.java

@@ -90,6 +90,7 @@ import org.elasticsearch.indices.recovery.RecoverySettings;
 import org.elasticsearch.indices.store.IndicesStore;
 import org.elasticsearch.indices.ttl.IndicesTTLService;
 import org.elasticsearch.search.SearchService;
+import org.elasticsearch.search.internal.DefaultSearchContext;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 
@@ -275,6 +276,7 @@ public class ClusterModule extends AbstractModule {
         registerIndexDynamicSetting(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
         registerIndexDynamicSetting(IndicesRequestCache.DEPRECATED_INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
         registerIndexDynamicSetting(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, Validator.TIME);
+        registerIndexDynamicSetting(DefaultSearchContext.MAX_RESULT_WINDOW, Validator.POSITIVE_INTEGER);
     }
 
     public void registerIndexDynamicSetting(String setting, Validator validator) {

+ 28 - 6
core/src/main/java/org/elasticsearch/search/internal/DefaultSearchContext.java

@@ -77,6 +77,20 @@ import java.util.Map;
  *
  */
 public class DefaultSearchContext extends SearchContext {
+    /**
+     * Index setting describing the maximum value of from + size on a query.
+     */
+    public static final String MAX_RESULT_WINDOW = "index.max_result_window";
+    public static class Defaults {
+        /**
+         * Default maximum value of from + size on a query. 10,000 was chosen as
+         * a conservative default as it is sure to not cause trouble. Users can
+         * certainly profile their cluster and decide to set it to 100,000
+         * safely. 1,000,000 is probably way to high for any cluster to set
+         * safely.
+         */
+        public static final int MAX_RESULT_WINDOW = 10000;
+    }
 
     private final long id;
     private final ShardSearchRequest request;
@@ -168,12 +182,20 @@ public class DefaultSearchContext extends SearchContext {
      */
     @Override
     public void preProcess() {
-        if (!(from() == -1 && size() == -1)) {
-            // from and size have been set.
-            int numHits = from() + size();
-            if (numHits < 0) {
-                String msg = "Result window is too large, from + size must be less than or equal to: [" + Integer.MAX_VALUE + "] but was [" + (((long) from()) + ((long) size())) + "]";
-                throw new QueryPhaseExecutionException(this, msg);
+        if (scrollContext == null) {
+            long from = from() == -1 ? 0 : from();
+            long size = size() == -1 ? 10 : size();
+            long resultWindow = from + size;
+            // We need settingsService's view of the settings because its dynamic.
+            // indexService's isn't.
+            int maxResultWindow = indexService.settingsService().getSettings().getAsInt(MAX_RESULT_WINDOW, Defaults.MAX_RESULT_WINDOW);
+
+            if (resultWindow > maxResultWindow) {
+                throw new QueryPhaseExecutionException(this,
+                        "Result window is too large, from + size must be less than or equal to: [" + maxResultWindow + "] but was ["
+                                + resultWindow + "]. See the scroll api for a more efficient way to request large data sets. "
+                                + "This limit can be set by changing the [" + DefaultSearchContext.MAX_RESULT_WINDOW
+                                + "] index level parameter.");
             }
         }
 

+ 17 - 22
core/src/test/java/org/elasticsearch/search/scroll/SearchScrollIT.java

@@ -74,27 +74,27 @@ public class SearchScrollIT extends ESIntegTestCase {
                 .execute().actionGet();
         try {
             long counter = 0;
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(35));
             for (SearchHit hit : searchResponse.getHits()) {
                 assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
             }
-    
+
             searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
                     .setScroll(TimeValue.timeValueMinutes(2))
                     .execute().actionGet();
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(35));
             for (SearchHit hit : searchResponse.getHits()) {
                 assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
             }
-    
+
             searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
                     .setScroll(TimeValue.timeValueMinutes(2))
                     .execute().actionGet();
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(30));
             for (SearchHit hit : searchResponse.getHits()) {
@@ -133,47 +133,47 @@ public class SearchScrollIT extends ESIntegTestCase {
                 .execute().actionGet();
         try {
             long counter = 0;
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(3));
             for (SearchHit hit : searchResponse.getHits()) {
                 assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
             }
-    
+
             for (int i = 0; i < 32; i++) {
                 searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
                         .setScroll(TimeValue.timeValueMinutes(2))
                         .execute().actionGet();
-    
+
                 assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
                 assertThat(searchResponse.getHits().hits().length, equalTo(3));
                 for (SearchHit hit : searchResponse.getHits()) {
                     assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
                 }
             }
-    
+
             // and now, the last one is one
             searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
                     .setScroll(TimeValue.timeValueMinutes(2))
                     .execute().actionGet();
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(1));
             for (SearchHit hit : searchResponse.getHits()) {
                 assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
             }
-    
+
             // a the last is zero
             searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
                     .setScroll(TimeValue.timeValueMinutes(2))
                     .execute().actionGet();
-    
+
             assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
             assertThat(searchResponse.getHits().hits().length, equalTo(0));
             for (SearchHit hit : searchResponse.getHits()) {
                 assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
             }
-            
+
         } finally {
             clearScroll(searchResponse.getScrollId());
         }
@@ -212,7 +212,7 @@ public class SearchScrollIT extends ESIntegTestCase {
                 }
                 searchResponse = client().prepareSearchScroll(searchResponse.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)).execute().actionGet();
             } while (searchResponse.getHits().hits().length > 0);
-    
+
             client().admin().indices().prepareRefresh().execute().actionGet();
             assertThat(client().prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(500l));
             assertThat(client().prepareCount().setQuery(termQuery("message", "test")).execute().actionGet().getCount(), equalTo(0l));
@@ -410,9 +410,7 @@ public class SearchScrollIT extends ESIntegTestCase {
         assertThrows(internalCluster().transportClient().prepareSearchScroll(searchResponse2.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)), RestStatus.NOT_FOUND);
     }
 
-    @Test
-    // https://github.com/elasticsearch/elasticsearch/issues/4156
-    public void testDeepPaginationWithOneDocIndexAndDoNotBlowUp() throws Exception {
+    public void testDeepScrollingDoesNotBlowUp() throws Exception {
         client().prepareIndex("index", "type", "1")
                 .setSource("field", "value")
                 .setRefresh(true)
@@ -422,11 +420,8 @@ public class SearchScrollIT extends ESIntegTestCase {
             SearchRequestBuilder builder = client().prepareSearch("index")
                     .setSearchType(searchType)
                     .setQuery(QueryBuilders.matchAllQuery())
-                    .setSize(Integer.MAX_VALUE);
-
-            if (randomBoolean()) {
-                builder.setScroll("1m");
-            }
+                    .setSize(Integer.MAX_VALUE)
+                    .setScroll("1m");
 
             SearchResponse response = builder.execute().actionGet();
             try {

+ 76 - 20
core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java

@@ -22,11 +22,13 @@ package org.elasticsearch.search.simple;
 import org.apache.lucene.util.Constants;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
+import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.internal.DefaultSearchContext;
 import org.elasticsearch.test.ESIntegTestCase;
-import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -38,12 +40,12 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
 import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
 import static org.hamcrest.Matchers.containsString;
 
 public class SimpleSearchIT extends ESIntegTestCase {
-
-    @Test
     public void testSearchNullIndex() {
         try {
             client().prepareSearch((String) null).setQuery(QueryBuilders.termQuery("_id", "XXX1")).execute().actionGet();
@@ -60,7 +62,6 @@ public class SimpleSearchIT extends ESIntegTestCase {
         }
     }
 
-    @Test
     public void testSearchRandomPreference() throws InterruptedException, ExecutionException {
         createIndex("test");
         indexRandom(true, client().prepareIndex("test", "type", "1").setSource("field", "value"),
@@ -84,8 +85,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
         }
     }
 
-    @Test
-    public void simpleIpTests() throws Exception {
+    public void testSimpleIp() throws Exception {
         createIndex("test");
 
         client().admin().indices().preparePutMapping("test").setType("type1")
@@ -104,8 +104,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
         assertHitCount(search, 1l);
     }
 
-    @Test
-    public void simpleIdTests() {
+    public void testSimpleId() {
         createIndex("test");
 
         client().prepareIndex("test", "type", "XXX1").setSource("field", "value").setRefresh(true).execute().actionGet();
@@ -124,8 +123,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
         assertHitCount(searchResponse, 1l);
     }
 
-    @Test
-    public void simpleDateRangeTests() throws Exception {
+    public void testSimpleDateRange() throws Exception {
         createIndex("test");
         client().prepareIndex("test", "type1", "1").setSource("field", "2010-01-05T02:00").execute().actionGet();
         client().prepareIndex("test", "type1", "2").setSource("field", "2010-01-06T02:00").execute().actionGet();
@@ -150,9 +148,8 @@ public class SimpleSearchIT extends ESIntegTestCase {
         searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryStringQuery("field:[2010-01-03||+2d TO 2010-01-04||+2d/d]")).execute().actionGet();
         assertHitCount(searchResponse, 2l);
     }
-    
-    @Test
-    public void localeDependentDateTests() throws Exception {
+
+    public void testLocaleDependentDate() throws Exception {
         assumeFalse("Locals are buggy on JDK9EA", Constants.JRE_IS_MINIMUM_JAVA9 && systemPropertyAsBoolean("tests.security.manager", false));
         assertAcked(prepareCreate("test")
                 .addMapping("type1",
@@ -189,8 +186,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
         }
     }
 
-    @Test
-    public void simpleTerminateAfterCountTests() throws Exception {
+    public void testSimpleTerminateAfterCount() throws Exception {
         prepareCreate("test").setSettings(
                 SETTING_NUMBER_OF_SHARDS, 1,
                 SETTING_NUMBER_OF_REPLICAS, 0).get();
@@ -225,16 +221,76 @@ public class SimpleSearchIT extends ESIntegTestCase {
         assertFalse(searchResponse.isTerminatedEarly());
     }
 
-    @Test
-    public void testInsaneFrom() throws Exception {
+    public void testInsaneFromAndSize() throws Exception {
+        createIndex("idx");
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
+
+        assertWindowFails(client().prepareSearch("idx").setFrom(Integer.MAX_VALUE));
+        assertWindowFails(client().prepareSearch("idx").setSize(Integer.MAX_VALUE));
+    }
+
+    public void testTooLargeFromAndSize() throws Exception {
         createIndex("idx");
         indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
 
+        assertWindowFails(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
+        assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1));
+        assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
+                .setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
+    }
+
+    public void testLargeFromAndSizeSucceeds() throws Exception {
+        createIndex("idx");
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
+
+        assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW - 10).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2)
+                .setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2 - 1).get(), 1);
+    }
+
+    public void testTooLargeFromAndSizeOkBySetting() throws Exception {
+        prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2).get();
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
+
+        assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
+                .setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
+    }
+
+    public void testTooLargeFromAndSizeOkByDynamicSetting() throws Exception {
+        createIndex("idx");
+        assertAcked(client().admin().indices().prepareUpdateSettings("idx")
+                .setSettings(
+                        Settings.builder().put(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2))
+                .get());
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
+
+        assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
+                .setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
+    }
+
+    public void testTooLargeFromAndSizeBackwardsCompatibilityRecommendation() throws Exception {
+        prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, Integer.MAX_VALUE).get();
+        indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
+
+        assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
+        assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10)
+                .setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
+    }
+
+    private void assertWindowFails(SearchRequestBuilder search) {
         try {
-            client().prepareSearch("idx").setFrom(Integer.MAX_VALUE).get();
+            search.get();
             fail();
         } catch (SearchPhaseExecutionException e) {
-            assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to:"));
+            assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to: ["
+                    + DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
+            assertThat(e.toString(), containsString("See the scroll api for a more efficient way to request large data sets"));
         }
     }
 }

+ 8 - 2
docs/reference/index-modules.asciidoc

@@ -98,6 +98,14 @@ specific index module:
     index visible to search.  Defaults to `1s`.  Can be set to `-1` to disable
     refresh.
 
+`index.max_result_window`::
+
+    The maximum value of `from + size` for searches to this index. Defaults to
+    `10000`. Search requests take heap memory and time proportional to
+    `from + size` and this limits that memory. See
+    {ref}/search-request-scroll.html[Scroll] for a more efficient alternative
+    to raising this.
+
 `index.blocks.read_only`::
 
     Set to `true` to make the index and index metadata read only, `false` to
@@ -184,5 +192,3 @@ include::index-modules/slowlog.asciidoc[]
 include::index-modules/store.asciidoc[]
 
 include::index-modules/translog.asciidoc[]
-
-

+ 10 - 0
docs/reference/migration/migrate_2_1.asciidoc

@@ -26,6 +26,16 @@ Scroll requests sorted by `_doc` have been optimized to more efficiently resume
 from where the previous request stopped, so this will have the same performance
 characteristics as the former `scan` search type.
 
+==== from + size limits
+
+Elasticsearch will now return an error message if a query's `from` + `size` is
+more than the `index.max_result_window` parameter. This parameter defaults to
+10,000 which is safe for almost all clusters. Values higher than can consume
+significant chunks of heap memory per search and per shard executing the
+search. It's safest to leave this value as it is an use the scroll api for any
+deep scrolling but this setting is dynamic so it can raised or lowered as
+needed.
+
 === Update changes
 
 ==== Updates now `detect_noop` by default

+ 5 - 0
docs/reference/search/request/from-size.asciidoc

@@ -19,3 +19,8 @@ defaults to `10`.
     }
 }
 --------------------------------------------------
+
+Note that `from` + `size` can not be more than the `index.max_result_window`
+index setting which defaults to 10,000. See the
+{ref}/search-request-scroll.html[Scroll] api for more efficient ways to do deep
+scrolling.