Browse Source

Add setting to disable aggs optimization (#73620)

Sometimes our fancy "run this agg as a Query" optimizations end up
slower than running the aggregation in the old way. We know that and use
heuristics to dissable the optimization in that case. But it turns out
that the process of running the heuristics itself can be slow, depending
on the query. Worse, changing the heuristics requires an upgrade, which
means waiting. If the heurisics make a terrible choice folks need a
quick way out. This adds such a way: a cluster level setting that
contains a list of queries that are considered "too expensive" to try
and optimize. If the top level query contains any of those queries we'll
disable the "run as Query" optimization.

The default for this settings is wildcard and term-in-set queries, which
is fairly conservative. There are certainly wildcard and term-in-set
queries that the optimization works well with, but there are other queries
of that type that it works very badly with. So we're being careful.

Better, you can modify this setting in a running cluster to disable the
optimization if we find a new type of query that doesn't work well.

Closes #73426
Nik Everett 4 years ago
parent
commit
4b5aebe8b0

+ 5 - 0
benchmarks/src/main/java/org/elasticsearch/benchmark/search/aggregations/AggConstructionContentionBenchmark.java

@@ -332,6 +332,11 @@ public class AggConstructionContentionBenchmark {
             throw new UnsupportedOperationException();
         }
 
+        @Override
+        public boolean enableRewriteToFilterByFilter() {
+            return true;
+        }
+
         @Override
         public void close() {
             List<Releasable> releaseMe = new ArrayList<>(this.releaseMe);

+ 49 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/22_terms_disable_opt.yml

@@ -0,0 +1,49 @@
+setup:
+  - do:
+      cluster.put_settings:
+        body:
+          persistent:
+            search.aggs.rewrite_to_filter_by_filter: false
+
+---
+teardown:
+  - do:
+      cluster.put_settings:
+        body:
+          persistent:
+            search.aggs.rewrite_to_filter_by_filter: null
+
+---
+does not use optimization:
+  - skip:
+      version: " - 7.99.99"
+      reason: setting to disable optimization added in 8.0.0 to be backported to 7.13.2
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body: |
+          { "index": {} }
+          { "str": "sheep" }
+          { "index": {} }
+          { "str": "sheep" }
+          { "index": {} }
+          { "str": "cow" }
+          { "index": {} }
+          { "str": "pig" }
+
+  - do:
+      search:
+        index: test
+        body:
+          profile: true
+          size: 0
+          aggs:
+            str_terms:
+              terms:
+                field: str.keyword
+  - match: { aggregations.str_terms.buckets.0.key: sheep }
+  - match: { aggregations.str_terms.buckets.1.key: cow }
+  - match: { aggregations.str_terms.buckets.2.key: pig }
+  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.description: str_terms }

+ 89 - 1
server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

@@ -10,6 +10,8 @@ package org.elasticsearch.search.profile.aggregation;
 
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.search.SearchService;
 import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
 import org.elasticsearch.search.aggregations.BucketOrder;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
@@ -30,6 +32,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import static io.github.nik9000.mapmatcher.MapMatcher.assertMap;
+import static io.github.nik9000.mapmatcher.MapMatcher.matchesMap;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.search.aggregations.AggregationBuilders.avg;
 import static org.elasticsearch.search.aggregations.AggregationBuilders.diversifiedSampler;
@@ -598,7 +602,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
         assertSearchResponse(response);
         Map<String, ProfileShardResult> profileResults = response.getProfileResults();
         assertThat(profileResults, notNullValue());
-        assertThat(profileResults.size(), equalTo(getNumShards("idx").numPrimaries));
+        assertThat(profileResults.size(), equalTo(getNumShards("dateidx").numPrimaries));
         for (ProfileShardResult profileShardResult : profileResults.values()) {
             assertThat(profileShardResult, notNullValue());
             AggregationProfileShardResult aggProfileResults = profileShardResult.getAggregationProfileResults();
@@ -645,4 +649,88 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(queryDebug, hasEntry("query", "DocValuesFieldExistsQuery [field=date]"));
         }
     }
+
+    public void testDateHistogramFilterByFilterDisabled() throws InterruptedException, IOException {
+        assertAcked(
+            client().admin()
+                .cluster()
+                .prepareUpdateSettings()
+                .setPersistentSettings(Settings.builder().put(SearchService.ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER.getKey(), false))
+        );
+        try {
+            assertAcked(
+                client().admin()
+                    .indices()
+                    .prepareCreate("date_filter_by_filter_disabled")
+                    .setSettings(Map.of("number_of_shards", 1, "number_of_replicas", 0))
+                    .setMapping("date", "type=date", "keyword", "type=keyword")
+                    .get()
+            );
+            List<IndexRequestBuilder> builders = new ArrayList<>();
+            for (int i = 0; i < RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2; i++) {
+                String date = Instant.ofEpochSecond(i).toString();
+                builders.add(
+                    client().prepareIndex("date_filter_by_filter_disabled")
+                        .setSource(
+                            jsonBuilder().startObject()
+                                .field("date", date)
+                                .endObject()
+                        )
+                );
+            }
+            indexRandom(true, false, builders);
+
+            SearchResponse response = client().prepareSearch("date_filter_by_filter_disabled")
+                .setProfile(true)
+                .addAggregation(new DateHistogramAggregationBuilder("histo").field("date").calendarInterval(DateHistogramInterval.MONTH))
+                .get();
+            assertSearchResponse(response);
+            Map<String, ProfileShardResult> profileResults = response.getProfileResults();
+            assertThat(profileResults, notNullValue());
+            assertThat(profileResults.size(), equalTo(getNumShards("date_filter_by_filter_disabled").numPrimaries));
+            for (ProfileShardResult profileShardResult : profileResults.values()) {
+                assertThat(profileShardResult, notNullValue());
+                AggregationProfileShardResult aggProfileResults = profileShardResult.getAggregationProfileResults();
+                assertThat(aggProfileResults, notNullValue());
+                List<ProfileResult> aggProfileResultsList = aggProfileResults.getProfileResults();
+                assertThat(aggProfileResultsList, notNullValue());
+                assertThat(aggProfileResultsList.size(), equalTo(1));
+                ProfileResult histoAggResult = aggProfileResultsList.get(0);
+                assertThat(histoAggResult, notNullValue());
+                assertThat(histoAggResult.getQueryName(), equalTo("DateHistogramAggregator.FromDateRange"));
+                assertThat(histoAggResult.getLuceneDescription(), equalTo("histo"));
+                assertThat(histoAggResult.getProfiledChildren().size(), equalTo(0));
+                assertThat(histoAggResult.getTime(), greaterThan(0L));
+                Map<String, Long> breakdown = histoAggResult.getTimeBreakdown();
+                assertMap(
+                    breakdown,
+                    matchesMap().entry(INITIALIZE, greaterThan(0L))
+                        .entry(INITIALIZE + "_count", greaterThan(0L))
+                        .entry(BUILD_LEAF_COLLECTOR, greaterThan(0L))
+                        .entry(BUILD_LEAF_COLLECTOR + "_count", greaterThan(0L))
+                        .entry(COLLECT, greaterThan(0L))
+                        .entry(COLLECT + "_count", greaterThan(0L))
+                        .entry(POST_COLLECTION, greaterThan(0L))
+                        .entry(POST_COLLECTION + "_count", 1L)
+                        .entry(BUILD_AGGREGATION, greaterThan(0L))
+                        .entry(BUILD_AGGREGATION + "_count", greaterThan(0L))
+                        .entry(REDUCE, 0L)
+                        .entry(REDUCE + "_count", 0L)
+                );
+                Map<String, Object> debug = histoAggResult.getDebugInfo();
+                assertMap(
+                    debug,
+                    matchesMap().entry("delegate", "RangeAggregator.NoOverlap")
+                        .entry("delegate_debug", matchesMap().entry("ranges", 1).entry("average_docs_per_range", 10000.0))
+                );
+            }
+        } finally {
+            assertAcked(
+                client().admin()
+                    .cluster()
+                    .prepareUpdateSettings()
+                    .setPersistentSettings(Settings.builder().putNull(SearchService.ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER.getKey()))
+            );
+        }
+    }
 }

+ 1 - 0
server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

@@ -397,6 +397,7 @@ public final class ClusterSettings extends AbstractScopedSettings {
             MultiBucketConsumerService.MAX_BUCKET_SETTING,
             SearchService.LOW_LEVEL_CANCELLATION_SETTING,
             SearchService.MAX_OPEN_SCROLL_CONTEXT,
+            SearchService.ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER,
             Node.WRITE_PORTS_FILE_SETTING,
             Node.NODE_NAME_SETTING,
             Node.NODE_ATTRIBUTES,

+ 19 - 1
server/src/main/java/org/elasticsearch/search/SearchService.java

@@ -164,6 +164,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
     public static final Setting<Integer> MAX_OPEN_SCROLL_CONTEXT =
         Setting.intSetting("search.max_open_scroll_context", 500, 0, Property.Dynamic, Property.NodeScope);
 
+    public static final Setting<Boolean> ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER = Setting.boolSetting(
+        "search.aggs.rewrite_to_filter_by_filter",
+        true,
+        Property.Dynamic,
+        Property.NodeScope
+    );
+
     public static final int DEFAULT_SIZE = 10;
     public static final int DEFAULT_FROM = 0;
 
@@ -197,6 +204,8 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
 
     private volatile int maxOpenScrollContext;
 
+    private volatile boolean enableRewriteAggsToFilterByFilter;
+
     private final Cancellable keepAliveReaper;
 
     private final AtomicLong idGenerator = new AtomicLong();
@@ -243,6 +252,10 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
 
         lowLevelCancellation = LOW_LEVEL_CANCELLATION_SETTING.get(settings);
         clusterService.getClusterSettings().addSettingsUpdateConsumer(LOW_LEVEL_CANCELLATION_SETTING, this::setLowLevelCancellation);
+
+        enableRewriteAggsToFilterByFilter = ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER.get(settings);
+        clusterService.getClusterSettings()
+            .addSettingsUpdateConsumer(ENABLE_REWRITE_AGGS_TO_FILTER_BY_FILTER, this::setEnableRewriteAggsToFilterByFilter);
     }
 
     private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) {
@@ -279,6 +292,10 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
         this.lowLevelCancellation = lowLevelCancellation;
     }
 
+    private void setEnableRewriteAggsToFilterByFilter(boolean enableRewriteAggsToFilterByFilter) {
+        this.enableRewriteAggsToFilterByFilter = enableRewriteAggsToFilterByFilter;
+    }
+
     @Override
     public void afterIndexRemoved(Index index, IndexSettings indexSettings, IndexRemovalReason reason) {
         // once an index is removed due to deletion or closing, we can just clean up all the pending search context information
@@ -968,7 +985,8 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
                 context.indexShard().shardId().hashCode(),
                 context::getRelativeTimeInMillis,
                 context::isCancelled,
-                context::buildFilteredQuery
+                context::buildFilteredQuery,
+                enableRewriteAggsToFilterByFilter
             );
             context.addReleasable(aggContext);
             try {

+ 3 - 0
server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java

@@ -353,6 +353,9 @@ public abstract class RangeAggregator extends BucketsAggregator {
         if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) {
             return null;
         }
+        if (false == context.enableRewriteToFilterByFilter()) {
+            return null;
+        }
         boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint();
         List<QueryToFilterAdapter<?>> filters = new ArrayList<>(ranges.length);
         for (int i = 0; i < ranges.length; i++) {

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

@@ -284,7 +284,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
                 order,
                 format,
                 bucketCountThresholds,
-                l -> true,
+                ALWAYS_TRUE,
                 context,
                 parent,
                 remapGlobalOrds,

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

@@ -360,7 +360,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
                     .getValuesSource();
                 SortedSetDocValues values = globalOrdsValues(context, ordinalsValuesSource);
                 long maxOrd = values.getValueCount();
-                if (maxOrd > 0 && maxOrd <= MAX_ORDS_TO_TRY_FILTERS) {
+                if (maxOrd > 0 && maxOrd <= MAX_ORDS_TO_TRY_FILTERS && context.enableRewriteToFilterByFilter()) {
                     StringTermsAggregatorFromFilters adapted = StringTermsAggregatorFromFilters.adaptIntoFiltersOrNull(
                         name,
                         factories,

+ 21 - 2
server/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java

@@ -24,13 +24,14 @@ import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.ObjectMapper;
 import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.query.Rewriteable;
+import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.query.support.NestedScope;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptContext;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer;
+import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.FilterByFilter;
 import org.elasticsearch.search.internal.SubSearchContext;
 import org.elasticsearch.search.lookup.SearchLookup;
 import org.elasticsearch.search.profile.aggregation.AggregationProfiler;
@@ -244,6 +245,16 @@ public abstract class AggregationContext implements Releasable {
      */
     public abstract boolean isCacheable();
 
+    /**
+     * Are aggregations allowed to try to rewrite themselves into
+     * {@link FilterByFilter} aggregations? <strong>Often</strong>
+     * {@linkplain FilterByFilter} is faster to execute, but it isn't
+     * always. For now this just hooks into a cluster level setting
+     * so users can disable the behavior when the existing heuristics
+     * don't detect cases where its slower.
+     */
+    public abstract boolean enableRewriteToFilterByFilter();
+
     /**
      * Implementation of {@linkplain AggregationContext} for production usage
      * that wraps our ubiquitous {@link SearchExecutionContext} and anything else
@@ -264,6 +275,7 @@ public abstract class AggregationContext implements Releasable {
         private final LongSupplier relativeTimeInMillis;
         private final Supplier<Boolean> isCancelled;
         private final Function<Query, Query> filterQuery;
+        private final boolean enableRewriteToFilterByFilter;
 
         private final List<Aggregator> releaseMe = new ArrayList<>();
 
@@ -279,7 +291,8 @@ public abstract class AggregationContext implements Releasable {
             int randomSeed,
             LongSupplier relativeTimeInMillis,
             Supplier<Boolean> isCancelled,
-            Function<Query, Query> filterQuery
+            Function<Query, Query> filterQuery,
+            boolean enableRewriteToFilterByFilter
         ) {
             this.context = context;
             if (bytesToPreallocate == 0) {
@@ -310,6 +323,7 @@ public abstract class AggregationContext implements Releasable {
             this.relativeTimeInMillis = relativeTimeInMillis;
             this.isCancelled = isCancelled;
             this.filterQuery = filterQuery;
+            this.enableRewriteToFilterByFilter = enableRewriteToFilterByFilter;
         }
 
         @Override
@@ -466,6 +480,11 @@ public abstract class AggregationContext implements Releasable {
             return context.isCacheable();
         }
 
+        @Override
+        public boolean enableRewriteToFilterByFilter() {
+            return enableRewriteToFilterByFilter;
+        }
+
         @Override
         public void close() {
             /*

+ 10 - 4
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

@@ -22,11 +22,14 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -835,7 +838,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                             .size(numTerms)
                             .collectMode(randomFrom(Aggregator.SubAggCollectionMode.values()))
                             .field("field"));
-                        context = createAggregationContext(indexSearcher, null, fieldType, filterFieldType);
+                        context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType, filterFieldType);
                         aggregator = createAggregator(aggregationBuilder, context);
                         aggregator.preCollection();
                         indexSearcher.search(new MatchAllDocsQuery(), aggregator);
@@ -1020,7 +1023,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                         TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name")
                             .userValueTypeHint(valueTypes[i])
                             .field(fieldNames[i]).missing(missingValues[i]);
-                        AggregationContext context = createAggregationContext(indexSearcher, null, fieldType1);
+                        AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType1);
                         Aggregator aggregator = createAggregator(aggregationBuilder, context);
                         aggregator.preCollection();
                         indexSearcher.search(new MatchAllDocsQuery(), aggregator);
@@ -1764,7 +1767,10 @@ public class TermsAggregatorTests extends AggregatorTestCase {
          * would trigger that bug.
          */
         builder.size(2).order(BucketOrder.key(true));
-        Query topLevel = new TermInSetQuery("k", new BytesRef[] {new BytesRef("b"), new BytesRef("c")});
+        Query topLevel = new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("k", "b")), Occur.SHOULD)
+            .add(new TermQuery(new Term("k", "c")), Occur.SHOULD)
+            .build();
         testCase(builder, topLevel, buildIndex, (StringTerms terms) -> {
             assertThat(terms.getBuckets().stream().map(StringTerms.Bucket::getKey).collect(toList()), equalTo(List.of("b", "c")));
         }, kft);

+ 5 - 0
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java

@@ -476,6 +476,11 @@ public abstract class MapperServiceTestCase extends ESTestCase {
                 throw new UnsupportedOperationException();
             }
 
+            @Override
+            public boolean enableRewriteToFilterByFilter() {
+                throw new UnsupportedOperationException();
+            }
+
             @Override
             public void close() {
                 throw new UnsupportedOperationException();

+ 2 - 1
test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java

@@ -292,7 +292,8 @@ public abstract class AggregatorTestCase extends ESTestCase {
             randomInt(),
             () -> 0L,
             () -> false,
-            q -> q
+            q -> q,
+            true
         );
         releasables.add(context);
         return context;

+ 1 - 0
x-pack/qa/runtime-fields/build.gradle

@@ -77,6 +77,7 @@ subprojects {
           'search.aggregation/20_terms/string profiler via global ordinals filters implementation',
           'search.aggregation/20_terms/string profiler via global ordinals native implementation',
           'search.aggregation/20_terms/Global ordinals are loaded with the global_ordinals execution hint',
+          'search.aggregation/22_terms_disable_opt/does not use optimization',
           'search.aggregation/170_cardinality_metric/profiler string',
           'search.aggregation/235_composite_sorted/*',
           /////// NOT SUPPORTED ///////