9 years ago · ad232aebbe
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java
@@ -166,16 +166,16 @@ public class BestBucketsDeferringCollector extends DeferringBucketCollector {
 
				             int doc = 0;
			
 
				             for (long i = 0, end = entry.docDeltas.size(); i < end; ++i) {
			
 
				                 doc += docDeltaIterator.next();
			
 
				-                if (needsScores) {
			
 
				-                    if (docIt.docID() < doc) {
			
 
				-                        docIt.advance(doc);
			
 
				-                    }
			
 
				-                    // aggregations should only be replayed on matching documents
			
 
				-                    assert docIt.docID() == doc;
			
 
				-                }
			
 
				                 final long bucket = buckets.next();
			
 
				                 final long rebasedBucket = hash.find(bucket);
			
 
				                 if (rebasedBucket != -1) {
			
 
				+                    if (needsScores) {
			
 
				+                        if (docIt.docID() < doc) {
			
 
				+                            docIt.advance(doc);
			
 
				+                        }
			
 
				+                        // aggregations should only be replayed on matching documents
			
 
				+                        assert docIt.docID() == doc;
			
 
				+                    }
			
 
				                     leafCollector.collect(doc, rebasedBucket);
			
 
				                 }
			
 
				             }
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregationBuilder.java
@@ -56,7 +56,7 @@ public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<Valu
 
				     private Terms.Order order = Terms.Order.compound(Terms.Order.count(false), Terms.Order.term(true));
			
 
				     private IncludeExclude includeExclude = null;
			
 
				     private String executionHint = null;
			
 
				-    private SubAggCollectionMode collectMode = SubAggCollectionMode.DEPTH_FIRST;
			
 
				+    private SubAggCollectionMode collectMode = null;
			
 
				     private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(
			
 
				             DEFAULT_BUCKET_COUNT_THRESHOLDS);
			
 
				     private boolean showTermDocCountError = false;
			
@@ -71,7 +71,7 @@ public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<Valu
 
				     public TermsAggregationBuilder(StreamInput in) throws IOException {
			
 
				         super(in, StringTerms.TYPE, ValuesSourceType.ANY);
			
 
				         bucketCountThresholds = new BucketCountThresholds(in);
			
 
				-        collectMode = SubAggCollectionMode.readFromStream(in);
			
 
				+        collectMode = in.readOptionalWriteable(SubAggCollectionMode::readFromStream);
			
 
				         executionHint = in.readOptionalString();
			
 
				         includeExclude = in.readOptionalWriteable(IncludeExclude::new);
			
 
				         order = InternalOrder.Streams.readOrder(in);
			
@@ -86,7 +86,7 @@ public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<Valu
 
				     @Override
			
 
				     protected void innerWriteTo(StreamOutput out) throws IOException {
			
 
				         bucketCountThresholds.writeTo(out);
			
 
				-        collectMode.writeTo(out);
			
 
				+        out.writeOptionalWriteable(collectMode);
			
 
				         out.writeOptionalString(executionHint);
			
 
				         out.writeOptionalWriteable(includeExclude);
			
 
				         InternalOrder.Streams.writeOrder(order, out);
			
@@ -266,7 +266,9 @@ public class TermsAggregationBuilder extends ValuesSourceAggregationBuilder<Valu
 
				         }
			
 
				         builder.field(ORDER_FIELD.getPreferredName());
			
 
				         order.toXContent(builder, params);
			
 
				-        builder.field(SubAggCollectionMode.KEY.getPreferredName(), collectMode.parseField().getPreferredName());
			
 
				+        if (collectMode != null) {
			
 
				+            builder.field(SubAggCollectionMode.KEY.getPreferredName(), collectMode.parseField().getPreferredName());
			
 
				+        }
			
 
				         if (includeExclude != null) {
			
 
				             includeExclude.toXContent(builder, params);
			
 
				         }
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@@ -150,14 +150,22 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
 
				                     }
			
 
				                 }
			
 
				             }
			
 
				+            SubAggCollectionMode cm = collectMode;
			
 
				+            if (cm == null) {
			
 
				+                cm = SubAggCollectionMode.DEPTH_FIRST;
			
 
				+                if (factories != AggregatorFactories.EMPTY) {
			
 
				+                    cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				             DocValueFormat format = config.format();
			
 
				             if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
			
 
				                 throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
			
 
				                         + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
			
 
				             }
			
 
				 
			
 
				-            return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, 
			
 
				-                    collectMode, showTermDocCountError, pipelineAggregators, metaData);
			
 
				+            return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent,
			
 
				+                    cm, showTermDocCountError, pipelineAggregators, metaData);
			
 
				         }
			
 
				 
			
 
				         if ((includeExclude != null) && (includeExclude.isRegexBased())) {
			
@@ -167,19 +175,27 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
 
				 
			
 
				         if (valuesSource instanceof ValuesSource.Numeric) {
			
 
				             IncludeExclude.LongFilter longFilter = null;
			
 
				+            SubAggCollectionMode cm = collectMode;
			
 
				+            if (cm == null) {
			
 
				+                if (factories != AggregatorFactories.EMPTY) {
			
 
				+                    cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
			
 
				+                } else {
			
 
				+                    cm = SubAggCollectionMode.DEPTH_FIRST;
			
 
				+                }
			
 
				+            }
			
 
				             if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
			
 
				                 if (includeExclude != null) {
			
 
				                     longFilter = includeExclude.convertToDoubleFilter();
			
 
				                 }
			
 
				                 return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
			
 
				-                        bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter,
			
 
				+                        bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter,
			
 
				                         pipelineAggregators, metaData);
			
 
				             }
			
 
				             if (includeExclude != null) {
			
 
				                 longFilter = includeExclude.convertToLongFilter(config.format());
			
 
				             }
			
 
				             return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
			
 
				-                    bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators,
			
 
				+                    bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators,
			
 
				                     metaData);
			
 
				         }
			
 
				 
			
@@ -187,6 +203,20 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
 
				                 + "]. It can only be applied to numeric or string fields.");
			
 
				     }
			
 
				 
			
 
				+    // return the SubAggCollectionMode that this aggregation should use based on the expected size
			
 
				+    // and the cardinality of the field
			
 
				+    static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
			
 
				+        if (expectedSize == Integer.MAX_VALUE) {
			
 
				+            // return all buckets
			
 
				+            return SubAggCollectionMode.DEPTH_FIRST;
			
 
				+        }
			
 
				+        if (maxOrd == -1 || maxOrd > expectedSize) {
			
 
				+            // use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
			
 
				+            return SubAggCollectionMode.BREADTH_FIRST;
			
 
				+        }
			
 
				+        return SubAggCollectionMode.DEPTH_FIRST;
			
 
				+    }
			
 
				+
			
 
				     public enum ExecutionMode {
			
 
				 
			
 
				         MAP(new ParseField("map")) {
			
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.search.aggregations.bucket.terms;
			
 
				+
			
 
				+import org.elasticsearch.search.aggregations.Aggregator;
			
 
				+import org.elasticsearch.test.ESSingleNodeTestCase;
			
 
				+
			
 
				+import static org.hamcrest.Matchers.equalTo;
			
 
				+
			
 
				+public class TermsAggregatorFactoryTests extends ESSingleNodeTestCase {
			
 
				+    public void testSubAggCollectMode() throws Exception {
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(10, -1),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 5),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 10),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 100),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 2),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
			
 
				+        assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 100),
			
 
				+            equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
			
 
				+    }
			
 
				+}
			
--- a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc
@@ -577,7 +577,8 @@ Deferring calculation of child aggregations
 
				 
			
 
				 For fields with many unique terms and a small number of required results it can be more efficient to delay the calculation
			
 
				 of child aggregations until the top parent-level aggs have been pruned. Ordinarily, all branches of the aggregation tree
			
 
				-are expanded in one depth-first pass and only then any pruning occurs. In some rare scenarios this can be very wasteful and can hit memory constraints.
			
 
				+are expanded in one depth-first pass and only then any pruning occurs.
			
 
				+In some scenarios this can be very wasteful and can hit memory constraints.
			
 
				 An example problem scenario is querying a movie database for the 10 most popular actors and their 5 most common co-stars:
			
 
				 
			
 
				 [source,js]
			
@@ -602,10 +603,13 @@ An example problem scenario is querying a movie database for the 10 most popular
 
				 }
			
 
				 --------------------------------------------------
			
 
				 
			
 
				-Even though the number of movies may be comparatively small and we want only 50 result buckets there is a combinatorial explosion of buckets
			
 
				-during calculation - a single movie will produce n² buckets where n is the number of actors. The sane option would be to first determine
			
 
				+Even though the number of actors may be comparatively small and we want only 50 result buckets there is a combinatorial explosion of buckets
			
 
				+during calculation - a single actor can produce n² buckets where n is the number of actors. The sane option would be to first determine
			
 
				 the 10 most popular actors and only then examine the top co-stars for these 10 actors. This alternative strategy is what we call the `breadth_first` collection
			
 
				-mode as opposed to the default `depth_first` mode:
			
 
				+mode as opposed to the `depth_first` mode.
			
 
				+
			
 
				+NOTE: The `breadth_first` is the default mode for fields with a cardinality bigger than the requested size or when the cardinality is unknown (numeric fields or scripts for instance).
			
 
				+It is possible to override the default heuristic and to provide a collect mode directly in the request:
			
 
				 
			
 
				 [source,js]
			
 
				 --------------------------------------------------
			
@@ -615,7 +619,7 @@ mode as opposed to the default `depth_first` mode:
 
				              "terms" : {
			
 
				                  "field" : "actors",
			
 
				                  "size" : 10,
			
 
				-                 "collect_mode" : "breadth_first"
			
 
				+                 "collect_mode" : "breadth_first" <1>
			
 
				              },
			
 
				             "aggs" : {
			
 
				                 "costars" : {
			
@@ -630,12 +634,10 @@ mode as opposed to the default `depth_first` mode:
 
				 }
			
 
				 --------------------------------------------------
			
 
				 
			
 
				+<1> the possible values are `breadth_first` and `depth_first`
			
 
				 
			
 
				 When using `breadth_first` mode the set of documents that fall into the uppermost buckets are
			
 
				 cached for subsequent replay so there is a memory overhead in doing this which is linear with the number of matching documents.
			
 
				-In most requests the volume of buckets generated is smaller than the number of documents that fall into them so the default `depth_first`
			
 
				-collection mode is normally the best bet but occasionally the `breadth_first` strategy can be significantly more efficient. Currently
			
 
				-elasticsearch will always use the `depth_first` collect_mode unless explicitly instructed to use `breadth_first` as in the above example.
			
 
				 Note that the `order` parameter can still be used to refer to data from a child aggregation when using the `breadth_first` setting - the parent
			
 
				 aggregation understands that this child aggregation will need to be called first before any of the other child aggregations.