Browse Source

Add new option `min_doc_count` to terms and histogram aggregations.

`min_doc_count` is the minimum number of hits that a term or histogram key
should match in order to appear in the response.

`min_doc_count=0` replaces `compute_empty_buckets` for histograms and will
behave exactly like facets' `all_terms=true` for terms aggregations.

Close #4662
Adrien Grand 11 years ago
parent
commit
5c237fe834
42 changed files with 859 additions and 136 deletions
  1. 57 0
      docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
  2. 28 0
      docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc
  3. 64 0
      src/main/java/org/elasticsearch/common/collect/Iterators2.java
  4. 33 21
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBase.java
  5. 11 7
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java
  6. 12 12
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java
  7. 5 5
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramBuilder.java
  8. 6 6
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java
  9. 4 4
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java
  10. 4 4
      src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java
  11. 10 5
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java
  12. 21 3
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java
  13. 17 17
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java
  14. 8 3
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java
  15. 21 3
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java
  16. 4 2
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java
  17. 127 10
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java
  18. 8 6
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
  19. 12 0
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java
  20. 6 3
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java
  21. 4 2
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java
  22. 5 3
      src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTermsAggregator.java
  23. 50 0
      src/test/java/org/elasticsearch/common/collect/Iterators2Tests.java
  24. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramTests.java
  25. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/DateRangeTests.java
  26. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java
  27. 2 2
      src/test/java/org/elasticsearch/search/aggregations/bucket/FilterTests.java
  28. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java
  29. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/HistogramTests.java
  30. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/IPv4RangeTests.java
  31. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java
  32. 322 0
      src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountTests.java
  33. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/MissingTests.java
  34. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java
  35. 1 1
      src/test/java/org/elasticsearch/search/aggregations/bucket/RangeTests.java
  36. 2 2
      src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java
  37. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/AvgTests.java
  38. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsTests.java
  39. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/MaxTests.java
  40. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/MinTests.java
  41. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/StatsTests.java
  42. 1 1
      src/test/java/org/elasticsearch/search/aggregations/metrics/SumTests.java

+ 57 - 0
docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc

@@ -159,6 +159,63 @@ If the histogram aggregation has a direct metrics sub-aggregation, the latter ca
 
 <2> There is no need to configure the `price` field for the `price_stats` aggregation as it will inherit it by default from its parent histogram aggregation.
 
+==== Minimum document count
+
+It is possible to only return buckets that have a document count that is greater than or equal to a configured limit through the `min_doc_count` option.
+
+[source,js]
+--------------------------------------------------
+{
+    "aggs" : {
+        "prices" : {
+            "histogram" : { 
+                "field" : "price",
+                "interval" : 50,
+                "min_doc_count": 10
+            }
+        }
+    }
+}
+--------------------------------------------------
+
+The above aggregation would only return buckets that contain 10 documents or more. Default value is `1`.
+
+NOTE:   The special value `0` can be used to add empty buckets to the response between the minimum and the maximum buckets. Here is an example of what the response could look like:
+
+[source,js]
+--------------------------------------------------
+{
+    "aggregations": {
+        "prices": {
+            "0": {
+                "key": 0,
+                "doc_count": 2
+            },
+            "50": {
+                "key": 50,
+                "doc_count": 0
+            },
+            "150": {
+                "key": 150,
+                "doc_count": 3
+            },
+            "200": {
+                "key": 150,
+                "doc_count": 0
+            },
+            "250": {
+                "key": 150,
+                "doc_count": 0
+            },
+            "300": {
+                "key": 150,
+                "doc_count": 1
+            }
+        }
+   }
+}
+--------------------------------------------------
+
 ==== Response Format
 
 By default, the buckets are retuned as an ordered array. It is also possilbe to request the response as a hash instead keyed by the buckets keys:

+ 28 - 0
docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc

@@ -142,6 +142,34 @@ Ordering the buckets by multi value metrics sub-aggregation (identified by the a
 }
 --------------------------------------------------
 
+==== Minimum document count
+
+It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option:
+
+[source,js]
+--------------------------------------------------
+{
+    "aggs" : {
+        "tags" : {
+            "terms" : { 
+                "field" : "tag",
+                "min_doc_count": 10
+            }
+        }
+    }
+}
+--------------------------------------------------
+
+The above aggregation would only return tags which have been found in 10 hits or more. Default value is `1`.
+
+NOTE:    Setting `min_doc_count`=`0` will also return buckets for terms that didn't match any hit. However, some of
+         the returned terms which have a document count of zero might only belong to deleted documents, so there is
+         no warranty that a `match_all` query would find a positive document count for those terms.
+
+WARNING: When NOT sorting on `doc_count` descending, high values of `min_doc_count` may return a number of buckets
+         which is less than `size` because not enough data was gathered from the shards. Missing buckets can be
+         back by increasing `shard_size`.
+
 ==== Script
 
 Generating the terms using a script:

+ 64 - 0
src/main/java/org/elasticsearch/common/collect/Iterators2.java

@@ -0,0 +1,64 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.collect;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
+import com.google.common.collect.UnmodifiableIterator;
+
+import java.util.Comparator;
+import java.util.Iterator;
+
+public enum Iterators2 {
+    ;
+
+    /** Remove duplicated elements from an iterator over sorted content. */
+    public static <T> Iterator<T> deduplicateSorted(Iterator<? extends T> iterator, final Comparator<? super T> comparator) {
+        final PeekingIterator<T> it = Iterators.peekingIterator(iterator);
+        return new UnmodifiableIterator<T>() {
+
+            @Override
+            public boolean hasNext() {
+                return it.hasNext();
+            }
+
+            @Override
+            public T next() {
+                final T ret = it.next();
+                while (it.hasNext() && comparator.compare(ret, it.peek()) == 0) {
+                    it.next();
+                }
+                assert !it.hasNext() || comparator.compare(ret, it.peek()) < 0 : "iterator is not sorted: " + ret + " > " + it.peek();
+                return ret;
+            }
+
+        };
+    }
+
+    /** Return a merged view over several iterators, optionally deduplicating equivalent entries. */
+    public static <T> Iterator<T> mergeSorted(Iterable<Iterator<? extends T>> iterators, Comparator<? super T> comparator, boolean deduplicate) {
+        Iterator<T> it = Iterators.mergeSorted(iterators, comparator);
+        if (deduplicate) {
+            it = deduplicateSorted(it, comparator);
+        }
+        return it;
+    }
+
+}

+ 33 - 21
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBase.java

@@ -117,7 +117,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
 
         String type();
 
-        AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
+        AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
 
         Bucket createBucket(long key, long docCount, InternalAggregations aggregations);
 
@@ -128,14 +128,17 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
     private InternalOrder order;
     private ValueFormatter formatter;
     private boolean keyed;
+    private long minDocCount;
     private EmptyBucketInfo emptyBucketInfo;
 
     protected AbstractHistogramBase() {} // for serialization
 
-    protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
+    protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
         super(name);
         this.buckets = buckets;
         this.order = order;
+        assert (minDocCount == 0) == (emptyBucketInfo != null);
+        this.minDocCount = minDocCount;
         this.emptyBucketInfo = emptyBucketInfo;
         this.formatter = formatter;
         this.keyed = keyed;
@@ -169,28 +172,36 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
         List<InternalAggregation> aggregations = reduceContext.aggregations();
         if (aggregations.size() == 1) {
 
-            if (emptyBucketInfo == null) {
+            if (minDocCount == 1) {
                 return aggregations.get(0);
             }
 
-            // we need to fill the gaps with empty buckets
             AbstractHistogramBase histo = (AbstractHistogramBase) aggregations.get(0);
             CollectionUtil.introSort(histo.buckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
             List<HistogramBase.Bucket> list = order.asc ? histo.buckets : Lists.reverse(histo.buckets);
             HistogramBase.Bucket prevBucket = null;
             ListIterator<HistogramBase.Bucket> iter = list.listIterator();
-            while (iter.hasNext()) {
-                // look ahead on the next bucket without advancing the iter
-                // so we'll be able to insert elements at the right position
-                HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
-                if (prevBucket != null) {
-                    long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
-                    while (key != nextBucket.getKey()) {
-                        iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
-                        key = emptyBucketInfo.rounding.nextRoundingValue(key);
+            if (minDocCount == 0) {
+                // we need to fill the gaps with empty buckets
+                while (iter.hasNext()) {
+                    // look ahead on the next bucket without advancing the iter
+                    // so we'll be able to insert elements at the right position
+                    HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
+                    if (prevBucket != null) {
+                        long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
+                        while (key != nextBucket.getKey()) {
+                            iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
+                            key = emptyBucketInfo.rounding.nextRoundingValue(key);
+                        }
+                    }
+                    prevBucket = iter.next();
+                }
+            } else {
+                while (iter.hasNext()) {
+                    if (iter.next().getDocCount() < minDocCount) {
+                        iter.remove();
                     }
                 }
-                prevBucket = iter.next();
             }
 
             if (order != InternalOrder.KEY_ASC && order != InternalOrder.KEY_DESC) {
@@ -222,7 +233,9 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
         for (int i = 0; i < allocated.length; i++) {
             if (allocated[i]) {
                 Bucket bucket = ((List<Bucket>) buckets[i]).get(0).reduce(((List<Bucket>) buckets[i]), reduceContext.cacheRecycler());
-                reducedBuckets.add(bucket);
+                if (bucket.getDocCount() >= minDocCount) {
+                    reducedBuckets.add(bucket);
+                }
             }
         }
         bucketsByKey.release();
@@ -230,7 +243,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
 
 
         // adding empty buckets in needed
-        if (emptyBucketInfo != null) {
+        if (minDocCount == 0) {
             CollectionUtil.introSort(reducedBuckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
             List<HistogramBase.Bucket> list = order.asc ? reducedBuckets : Lists.reverse(reducedBuckets);
             HistogramBase.Bucket prevBucket = null;
@@ -268,7 +281,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
     public void readFrom(StreamInput in) throws IOException {
         name = in.readString();
         order = InternalOrder.Streams.readOrder(in);
-        if (in.readBoolean()) {
+        minDocCount = in.readVLong();
+        if (minDocCount == 0) {
             emptyBucketInfo = EmptyBucketInfo.readFrom(in);
         }
         formatter = ValueFormatterStreams.readOptional(in);
@@ -286,10 +300,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(name);
         InternalOrder.Streams.writeOrder(order, out);
-        if (emptyBucketInfo == null) {
-            out.writeBoolean(false);
-        } else {
-            out.writeBoolean(true);
+        out.writeVLong(minDocCount);
+        if (minDocCount == 0) {
             EmptyBucketInfo.writeTo(emptyBucketInfo, out);
         }
         ValueFormatterStreams.writeOptional(formatter, out);

+ 11 - 7
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java

@@ -86,7 +86,7 @@ public class DateHistogramParser implements Aggregator.Parser {
         String scriptLang = null;
         Map<String, Object> scriptParams = null;
         boolean keyed = false;
-        boolean computeEmptyBuckets = false;
+        long minDocCount = 1;
         InternalOrder order = (InternalOrder) Histogram.Order.KEY_ASC;
         String interval = null;
         boolean preZoneAdjustLargeInterval = false;
@@ -131,13 +131,17 @@ public class DateHistogramParser implements Aggregator.Parser {
             } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
                 if ("keyed".equals(currentFieldName)) {
                     keyed = parser.booleanValue();
-                } else if ("compute_empty_buckets".equals(currentFieldName) || "computeEmptyBuckets".equals(currentFieldName)) {
-                    computeEmptyBuckets = parser.booleanValue();
                 } else if ("script_values_sorted".equals(currentFieldName)) {
                     assumeSorted = parser.booleanValue();
                 } else {
                     throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
                 }
+            } else if (token == XContentParser.Token.VALUE_NUMBER) {
+                if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
+                    minDocCount = parser.longValue();
+                } else {
+                    throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
+                }
             } else if (token == XContentParser.Token.START_OBJECT) {
                 if ("params".equals(currentFieldName)) {
                     scriptParams = parser.map();
@@ -199,17 +203,17 @@ public class DateHistogramParser implements Aggregator.Parser {
             if (searchScript != null) {
                 ValueParser valueParser = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT));
                 config.parser(valueParser);
-                return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
+                return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
             }
 
             // falling back on the get field data context
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
         }
 
         FieldMapper<?> mapper = context.smartNameFieldMapper(field);
         if (mapper == null) {
             config.unmapped(true);
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
         }
 
         if (!(mapper instanceof DateFieldMapper)) {
@@ -218,7 +222,7 @@ public class DateHistogramParser implements Aggregator.Parser {
 
         IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
         config.fieldContext(new FieldContext(field, indexFieldData));
-        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
+        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
     }
 
     private static InternalOrder resolveOrder(String key, boolean asc) {

+ 12 - 12
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java

@@ -45,7 +45,7 @@ public class HistogramAggregator extends BucketsAggregator {
     private final Rounding rounding;
     private final InternalOrder order;
     private final boolean keyed;
-    private final boolean computeEmptyBuckets;
+    private final long minDocCount;
     private final AbstractHistogramBase.Factory histogramFactory;
 
     private final LongHash bucketOrds;
@@ -55,7 +55,7 @@ public class HistogramAggregator extends BucketsAggregator {
                                Rounding rounding,
                                InternalOrder order,
                                boolean keyed,
-                               boolean computeEmptyBuckets,
+                               long minDocCount,
                                @Nullable NumericValuesSource valuesSource,
                                long initialCapacity,
                                AbstractHistogramBase.Factory<?> histogramFactory,
@@ -67,7 +67,7 @@ public class HistogramAggregator extends BucketsAggregator {
         this.rounding = rounding;
         this.order = order;
         this.keyed = keyed;
-        this.computeEmptyBuckets = computeEmptyBuckets;
+        this.minDocCount = minDocCount;
         this.histogramFactory = histogramFactory;
 
         bucketOrds = new LongHash(initialCapacity, aggregationContext.pageCacheRecycler());
@@ -118,15 +118,15 @@ public class HistogramAggregator extends BucketsAggregator {
 
         // value source will be null for unmapped fields
         ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
-        AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
-        return histogramFactory.create(name, buckets, order, emptyBucketInfo, formatter, keyed);
+        AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
+        return histogramFactory.create(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
     }
 
     @Override
     public InternalAggregation buildEmptyAggregation() {
         ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
-        AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
-        return histogramFactory.create(name, (List<HistogramBase.Bucket>) Collections.EMPTY_LIST, order, emptyBucketInfo, formatter, keyed);
+        AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
+        return histogramFactory.create(name, Collections.emptyList(), order, minDocCount, emptyBucketInfo, formatter, keyed);
     }
 
     @Override
@@ -139,28 +139,28 @@ public class HistogramAggregator extends BucketsAggregator {
         private final Rounding rounding;
         private final InternalOrder order;
         private final boolean keyed;
-        private final boolean computeEmptyBuckets;
+        private final long minDocCount;
         private final AbstractHistogramBase.Factory<?> histogramFactory;
 
         public Factory(String name, ValuesSourceConfig<NumericValuesSource> valueSourceConfig,
-                       Rounding rounding, InternalOrder order, boolean keyed, boolean computeEmptyBuckets, AbstractHistogramBase.Factory<?> histogramFactory) {
+                       Rounding rounding, InternalOrder order, boolean keyed, long minDocCount, AbstractHistogramBase.Factory<?> histogramFactory) {
             super(name, histogramFactory.type(), valueSourceConfig);
             this.rounding = rounding;
             this.order = order;
             this.keyed = keyed;
-            this.computeEmptyBuckets = computeEmptyBuckets;
+            this.minDocCount = minDocCount;
             this.histogramFactory = histogramFactory;
         }
 
         @Override
         protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
-            return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, null, 0, histogramFactory, aggregationContext, parent);
+            return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, null, 0, histogramFactory, aggregationContext, parent);
         }
 
         @Override
         protected Aggregator create(NumericValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
             // todo if we'll keep track of min/max values in IndexFieldData, we could use the max here to come up with a better estimation for the buckets count
-            return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, valuesSource, 50, histogramFactory, aggregationContext, parent);
+            return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, valuesSource, 50, histogramFactory, aggregationContext, parent);
         }
 
     }

+ 5 - 5
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramBuilder.java

@@ -32,7 +32,7 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
 
     private Long interval;
     private HistogramBase.Order order;
-    private Boolean computeEmptyBuckets;
+    private Long minDocCount;
 
     public HistogramBuilder(String name) {
         super(name, InternalHistogram.TYPE.name());
@@ -48,8 +48,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
         return this;
     }
 
-    public HistogramBuilder emptyBuckets(boolean computeEmptyBuckets) {
-        this.computeEmptyBuckets = computeEmptyBuckets;
+    public HistogramBuilder minDocCount(long minDocCount) {
+        this.minDocCount = minDocCount;
         return this;
     }
 
@@ -65,8 +65,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
             order.toXContent(builder, params);
         }
 
-        if (computeEmptyBuckets != null) {
-            builder.field("empty_buckets", computeEmptyBuckets);
+        if (minDocCount != null) {
+            builder.field("min_doc_count", minDocCount);
         }
 
         return builder;

+ 6 - 6
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java

@@ -54,7 +54,7 @@ public class HistogramParser implements Aggregator.Parser {
         String scriptLang = null;
         Map<String, Object> scriptParams = null;
         boolean keyed = false;
-        boolean emptyBuckets = false;
+        long minDocCount = 1;
         InternalOrder order = (InternalOrder) InternalOrder.KEY_ASC;
         long interval = -1;
         boolean assumeSorted = false;
@@ -80,14 +80,14 @@ public class HistogramParser implements Aggregator.Parser {
             } else if (token == XContentParser.Token.VALUE_NUMBER) {
                 if ("interval".equals(currentFieldName)) {
                     interval = parser.longValue();
+                } else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
+                    minDocCount = parser.longValue();
                 } else {
                     throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
                 }
             } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
                 if ("keyed".equals(currentFieldName)) {
                     keyed = parser.booleanValue();
-                } else if ("empty_buckets".equals(currentFieldName) || "emptyBuckets".equals(currentFieldName)) {
-                    emptyBuckets = parser.booleanValue();
                 } else if ("script_values_sorted".equals(currentFieldName)) {
                     assumeSorted = parser.booleanValue();
                 } else {
@@ -130,13 +130,13 @@ public class HistogramParser implements Aggregator.Parser {
         }
 
         if (field == null) {
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
         }
 
         FieldMapper<?> mapper = context.smartNameFieldMapper(field);
         if (mapper == null) {
             config.unmapped(true);
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
         }
 
         IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
@@ -146,7 +146,7 @@ public class HistogramParser implements Aggregator.Parser {
             config.formatter(new ValueFormatter.Number.Pattern(format));
         }
 
-        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
+        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
 
     }
 

+ 4 - 4
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java

@@ -76,8 +76,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
         }
 
         @Override
-        public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
-            return new InternalDateHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
+        public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
+            return new InternalDateHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
         }
 
         @Override
@@ -88,8 +88,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
 
     InternalDateHistogram() {} // for serialization
 
-    InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
-        super(name, buckets, order, emptyBucketInfo, formatter, keyed);
+    InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
+        super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
     }
 
     @Override

+ 4 - 4
src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java

@@ -65,8 +65,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
             return TYPE.name();
         }
 
-        public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
-            return new InternalHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
+        public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
+            return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
         }
 
         public Bucket createBucket(long key, long docCount, InternalAggregations aggregations) {
@@ -77,8 +77,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
 
     public InternalHistogram() {} // for serialization
 
-    public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
-        super(name, buckets, order, emptyBucketInfo, formatter, keyed);
+    public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
+        super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
     }
 
     @Override

+ 10 - 5
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java

@@ -88,12 +88,12 @@ public class DoubleTerms extends InternalTerms {
 
     DoubleTerms() {} // for serialization
 
-    public DoubleTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
-        this(name, order, null, requiredSize, buckets);
+    public DoubleTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
+        this(name, order, null, requiredSize, minDocCount, buckets);
     }
 
-    public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
-        super(name, order, requiredSize, buckets);
+    public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
+        super(name, order, requiredSize, minDocCount, buckets);
         this.valueFormatter = valueFormatter;
     }
 
@@ -147,7 +147,10 @@ public class DoubleTerms extends InternalTerms {
         for (int i = 0; i < states.length; i++) {
             if (states[i]) {
                 List<DoubleTerms.Bucket> sameTermBuckets = (List<DoubleTerms.Bucket>) internalBuckets[i];
-                ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
+                final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
+                if (b.getDocCount() >= minDocCount) {
+                    ordered.insertWithOverflow(b);
+                }
             }
         }
         buckets.release();
@@ -165,6 +168,7 @@ public class DoubleTerms extends InternalTerms {
         this.order = InternalOrder.Streams.readOrder(in);
         this.valueFormatter = ValueFormatterStreams.readOptional(in);
         this.requiredSize = in.readVInt();
+        this.minDocCount = in.readVLong();
         int size = in.readVInt();
         List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
         for (int i = 0; i < size; i++) {
@@ -180,6 +184,7 @@ public class DoubleTerms extends InternalTerms {
         InternalOrder.Streams.writeOrder(order, out);
         ValueFormatterStreams.writeOptional(valueFormatter, out);
         out.writeVInt(requiredSize);
+        out.writeVLong(minDocCount);
         out.writeVInt(buckets.size());
         for (InternalTerms.Bucket bucket : buckets) {
             out.writeDouble(((Bucket) bucket).term);

+ 21 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java

@@ -18,6 +18,7 @@
  */
 package org.elasticsearch.search.aggregations.bucket.terms;
 
+import org.apache.lucene.index.AtomicReaderContext;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.index.fielddata.DoubleValues;
 import org.elasticsearch.search.aggregations.Aggregator;
@@ -40,16 +41,18 @@ public class DoubleTermsAggregator extends BucketsAggregator {
     private final InternalOrder order;
     private final int requiredSize;
     private final int shardSize;
+    private final long minDocCount;
     private final NumericValuesSource valuesSource;
     private final LongHash bucketOrds;
 
     public DoubleTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
-                               InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
+                               InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
         super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
         this.valuesSource = valuesSource;
         this.order = InternalOrder.validate(order, this);
         this.requiredSize = requiredSize;
         this.shardSize = shardSize;
+        this.minDocCount = minDocCount;
         bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
     }
 
@@ -78,6 +81,21 @@ public class DoubleTermsAggregator extends BucketsAggregator {
     @Override
     public DoubleTerms buildAggregation(long owningBucketOrdinal) {
         assert owningBucketOrdinal == 0;
+
+        if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
+            // we need to fill-in the blanks
+            for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
+                context.setNextReader(ctx);
+                final DoubleValues values = valuesSource.doubleValues();
+                for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
+                    final int valueCount = values.setDocument(docId);
+                    for (int i = 0; i < valueCount; ++i) {
+                        bucketOrds.add(Double.doubleToLongBits(values.nextValue()));
+                    }
+                }
+            }
+        }
+
         final int size = (int) Math.min(bucketOrds.size(), shardSize);
 
         BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@@ -104,12 +122,12 @@ public class DoubleTermsAggregator extends BucketsAggregator {
             bucket.aggregations = bucketAggregations(bucket.bucketOrd);
             list[i] = bucket;
         }
-        return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
+        return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
     }
 
     @Override
     public DoubleTerms buildEmptyAggregation() {
-        return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
+        return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
     }
 
     @Override

+ 17 - 17
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java

@@ -18,6 +18,7 @@
  */
 package org.elasticsearch.search.aggregations.bucket.terms;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.elasticsearch.cache.recycler.CacheRecycler;
 import org.elasticsearch.common.io.stream.Streamable;
@@ -78,15 +79,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
 
     protected InternalOrder order;
     protected int requiredSize;
+    protected long minDocCount;
     protected Collection<Bucket> buckets;
     protected Map<String, Bucket> bucketMap;
 
     protected InternalTerms() {} // for serialization
 
-    protected InternalTerms(String name, InternalOrder order, int requiredSize, Collection<Bucket> buckets) {
+    protected InternalTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<Bucket> buckets) {
         super(name);
         this.order = order;
         this.requiredSize = requiredSize;
+        this.minDocCount = minDocCount;
         this.buckets = buckets;
     }
 
@@ -156,7 +159,10 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
         BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(null));
         for (Map.Entry<Text, List<Bucket>> entry : buckets.entrySet()) {
             List<Bucket> sameTermBuckets = entry.getValue();
-            ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
+            final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
+            if (b.docCount >= minDocCount) {
+                ordered.insertWithOverflow(b);
+            }
         }
         Bucket[] list = new Bucket[ordered.size()];
         for (int i = ordered.size() - 1; i >= 0; i--) {
@@ -166,23 +172,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
         return reduced;
     }
 
-    protected void trimExcessEntries() {
-        if (requiredSize >= buckets.size()) {
-            return;
-        }
-
-        if (buckets instanceof List) {
-            buckets = ((List) buckets).subList(0, requiredSize);
-            return;
-        }
-
-        int i = 0;
-        for (Iterator<Bucket> iter  = buckets.iterator(); iter.hasNext();) {
-            iter.next();
-            if (i++ >= requiredSize) {
-                iter.remove();
+    final void trimExcessEntries() {
+        final List<Bucket> newBuckets = Lists.newArrayList();
+        for (Bucket b : buckets) {
+            if (newBuckets.size() >= requiredSize) {
+                break;
+            }
+            if (b.docCount >= minDocCount) {
+                newBuckets.add(b);
             }
         }
+        buckets = newBuckets;
     }
 
 }

+ 8 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java

@@ -89,8 +89,8 @@ public class LongTerms extends InternalTerms {
 
     LongTerms() {} // for serialization
 
-    public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
-        super(name, order, requiredSize, buckets);
+    public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
+        super(name, order, requiredSize, minDocCount, buckets);
         this.valueFormatter = valueFormatter;
     }
 
@@ -144,7 +144,10 @@ public class LongTerms extends InternalTerms {
         for (int i = 0; i < states.length; i++) {
             if (states[i]) {
                 List<LongTerms.Bucket> sameTermBuckets = (List<LongTerms.Bucket>) internalBuckets[i];
-                ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
+                final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
+                if (b.getDocCount() >= minDocCount) {
+                    ordered.insertWithOverflow(b);
+                }
             }
         }
         buckets.release();
@@ -162,6 +165,7 @@ public class LongTerms extends InternalTerms {
         this.order = InternalOrder.Streams.readOrder(in);
         this.valueFormatter = ValueFormatterStreams.readOptional(in);
         this.requiredSize = in.readVInt();
+        this.minDocCount = in.readVLong();
         int size = in.readVInt();
         List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
         for (int i = 0; i < size; i++) {
@@ -177,6 +181,7 @@ public class LongTerms extends InternalTerms {
         InternalOrder.Streams.writeOrder(order, out);
         ValueFormatterStreams.writeOptional(valueFormatter, out);
         out.writeVInt(requiredSize);
+        out.writeVLong(minDocCount);
         out.writeVInt(buckets.size());
         for (InternalTerms.Bucket bucket : buckets) {
             out.writeLong(((Bucket) bucket).term);

+ 21 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java

@@ -18,6 +18,7 @@
  */
 package org.elasticsearch.search.aggregations.bucket.terms;
 
+import org.apache.lucene.index.AtomicReaderContext;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.index.fielddata.LongValues;
 import org.elasticsearch.search.aggregations.Aggregator;
@@ -40,16 +41,18 @@ public class LongTermsAggregator extends BucketsAggregator {
     private final InternalOrder order;
     private final int requiredSize;
     private final int shardSize;
+    private final long minDocCount;
     private final NumericValuesSource valuesSource;
     private final LongHash bucketOrds;
 
     public LongTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
-                               InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
+                               InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
         super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
         this.valuesSource = valuesSource;
         this.order = InternalOrder.validate(order, this);
         this.requiredSize = requiredSize;
         this.shardSize = shardSize;
+        this.minDocCount = minDocCount;
         bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
     }
 
@@ -77,6 +80,21 @@ public class LongTermsAggregator extends BucketsAggregator {
     @Override
     public LongTerms buildAggregation(long owningBucketOrdinal) {
         assert owningBucketOrdinal == 0;
+
+        if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
+            // we need to fill-in the blanks
+            for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
+                context.setNextReader(ctx);
+                final LongValues values = valuesSource.longValues();
+                for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
+                    final int valueCount = values.setDocument(docId);
+                    for (int i = 0; i < valueCount; ++i) {
+                        bucketOrds.add(values.nextValue());
+                    }
+                }
+            }
+        }
+
         final int size = (int) Math.min(bucketOrds.size(), shardSize);
 
         BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@@ -103,12 +121,12 @@ public class LongTermsAggregator extends BucketsAggregator {
             bucket.aggregations = bucketAggregations(bucket.bucketOrd);
             list[i] = bucket;
         }
-        return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
+        return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
     }
 
     @Override
     public LongTerms buildEmptyAggregation() {
-        return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
+        return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
     }
 
     @Override

+ 4 - 2
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java

@@ -83,8 +83,8 @@ public class StringTerms extends InternalTerms {
 
     StringTerms() {} // for serialization
 
-    public StringTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
-        super(name, order, requiredSize, buckets);
+    public StringTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
+        super(name, order, requiredSize, minDocCount, buckets);
     }
 
     @Override
@@ -97,6 +97,7 @@ public class StringTerms extends InternalTerms {
         this.name = in.readString();
         this.order = InternalOrder.Streams.readOrder(in);
         this.requiredSize = in.readVInt();
+        this.minDocCount = in.readVLong();
         int size = in.readVInt();
         List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
         for (int i = 0; i < size; i++) {
@@ -111,6 +112,7 @@ public class StringTerms extends InternalTerms {
         out.writeString(name);
         InternalOrder.Streams.writeOrder(order, out);
         out.writeVInt(requiredSize);
+        out.writeVLong(minDocCount);
         out.writeVInt(buckets.size());
         for (InternalTerms.Bucket bucket : buckets) {
             out.writeBytesRef(((Bucket) bucket).termBytes);

+ 127 - 10
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java

@@ -18,30 +18,31 @@
  */
 package org.elasticsearch.search.aggregations.bucket.terms;
 
-import com.sun.corba.se.impl.naming.cosnaming.InterOperableNamingImpl;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.UnmodifiableIterator;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.collect.Iterators2;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.common.lucene.ReaderContextAware;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.LongArray;
 import org.elasticsearch.index.fielddata.BytesValues;
 import org.elasticsearch.index.fielddata.ordinals.Ordinals;
-import org.elasticsearch.search.aggregations.AggregationExecutionException;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
 import org.elasticsearch.search.aggregations.bucket.BytesRefHash;
 import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
 import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
-import org.elasticsearch.search.aggregations.metrics.MetricsAggregator;
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 import org.elasticsearch.search.aggregations.support.ValuesSource;
 import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
+import java.util.*;
 
 /**
  * An aggregator of string values.
@@ -52,11 +53,12 @@ public class StringTermsAggregator extends BucketsAggregator {
     private final InternalOrder order;
     private final int requiredSize;
     private final int shardSize;
+    private final long minDocCount;
     protected final BytesRefHash bucketOrds;
     private final IncludeExclude includeExclude;
 
     public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
-                                 InternalOrder order, int requiredSize, int shardSize,
+                                 InternalOrder order, int requiredSize, int shardSize, long minDocCount,
                                  IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
 
         super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
@@ -64,6 +66,7 @@ public class StringTermsAggregator extends BucketsAggregator {
         this.order = InternalOrder.validate(order, this);
         this.requiredSize = requiredSize;
         this.shardSize = shardSize;
+        this.minDocCount = minDocCount;
         this.includeExclude = includeExclude;
         bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
     }
@@ -94,9 +97,122 @@ public class StringTermsAggregator extends BucketsAggregator {
         }
     }
 
+    /** Returns an iterator over the field data terms. */
+    private static Iterator<BytesRef> terms(final BytesValues.WithOrdinals bytesValues, boolean reverse) {
+        final Ordinals.Docs ordinals = bytesValues.ordinals();
+        if (reverse) {
+            return new UnmodifiableIterator<BytesRef>() {
+
+                long i = ordinals.getMaxOrd() - 1;
+
+                @Override
+                public boolean hasNext() {
+                    return i >= Ordinals.MIN_ORDINAL;
+                }
+
+                @Override
+                public BytesRef next() {
+                    bytesValues.getValueByOrd(i--);
+                    return bytesValues.copyShared();
+                }
+
+            };
+        } else {
+            return new UnmodifiableIterator<BytesRef>() {
+
+                long i = Ordinals.MIN_ORDINAL;
+
+                @Override
+                public boolean hasNext() {
+                    return i < ordinals.getMaxOrd();
+                }
+
+                @Override
+                public BytesRef next() {
+                    bytesValues.getValueByOrd(i++);
+                    return bytesValues.copyShared();
+                }
+
+            };
+        }
+    }
+
     @Override
     public StringTerms buildAggregation(long owningBucketOrdinal) {
         assert owningBucketOrdinal == 0;
+
+        if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
+            // we need to fill-in the blanks
+            List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
+            for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
+                context.setNextReader(ctx);
+                final BytesValues values = valuesSource.bytesValues();
+                if (values instanceof BytesValues.WithOrdinals) {
+                    valuesWithOrdinals.add((BytesValues.WithOrdinals) values);
+                } else {
+                    // brute force
+                    for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
+                        final int valueCount = values.setDocument(docId);
+                        for (int i = 0; i < valueCount; ++i) {
+                            final BytesRef term = values.nextValue();
+                            if (includeExclude == null || includeExclude.accept(term)) {
+                                bucketOrds.add(term, values.currentValueHash());
+                            }
+                        }
+                    }
+                }
+            }
+
+            // With ordinals we can be smarter and add just as many terms as necessary to the hash table
+            // For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would
+            // either be excluded by the priority queue or at reduce time.
+            if (valuesWithOrdinals.size() > 0) {
+                final boolean reverse = order == InternalOrder.TERM_DESC;
+                Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+                if (reverse) {
+                    comparator = Collections.reverseOrder(comparator);
+                }
+                Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()];
+                for (int i = 0; i < valuesWithOrdinals.size(); ++i) {
+                    iterators[i] = terms(valuesWithOrdinals.get(i), reverse);
+                }
+                Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true);
+                if (includeExclude != null) {
+                    terms = Iterators.filter(terms, new Predicate<BytesRef>() {
+                        @Override
+                        public boolean apply(BytesRef input) {
+                            return includeExclude.accept(input);
+                        }
+                    });
+                }
+                if (order == InternalOrder.COUNT_ASC) {
+                    // let's try to find `shardSize` terms that matched no hit
+                    // this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
+                    // they might have higher counts on other shards
+                    for (int added = 0; added < shardSize && terms.hasNext(); ) {
+                        if (bucketOrds.add(terms.next()) >= 0) {
+                            ++added;
+                        }
+                    }
+                } else if (order == InternalOrder.COUNT_DESC) {
+                    // add terms until there are enough buckets
+                    while (bucketOrds.size() < requiredSize && terms.hasNext()) {
+                        bucketOrds.add(terms.next());
+                    }
+                } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
+                    // add the `requiredSize` least terms
+                    for (int i = 0; i < requiredSize && terms.hasNext(); ++i) {
+                        bucketOrds.add(terms.next());
+                    }
+                } else {
+                    // other orders (aggregations) are not optimizable
+                    while (terms.hasNext()) {
+                        bucketOrds.add(terms.next());
+                    }
+                }
+            }
+        }
+
         final int size = (int) Math.min(bucketOrds.size(), shardSize);
 
         BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@@ -117,12 +233,13 @@ public class StringTermsAggregator extends BucketsAggregator {
             bucket.aggregations = bucketAggregations(bucket.bucketOrd);
             list[i] = bucket;
         }
-        return new StringTerms(name, order, requiredSize, Arrays.asList(list));
+
+        return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list));
     }
 
     @Override
     public StringTerms buildEmptyAggregation() {
-        return new StringTerms(name, order, requiredSize, Collections.<InternalTerms.Bucket>emptyList());
+        return new StringTerms(name, order, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
     }
 
     @Override
@@ -141,8 +258,8 @@ public class StringTermsAggregator extends BucketsAggregator {
         private LongArray ordinalToBucket;
 
         public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, long esitmatedBucketCount,
-                InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
-            super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, null, aggregationContext, parent);
+                InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
+            super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent);
             this.valuesSource = valuesSource;
         }
 

+ 8 - 6
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

@@ -41,21 +41,23 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
     private final InternalOrder order;
     private final int requiredSize;
     private final int shardSize;
+    private final long minDocCount;
     private final IncludeExclude includeExclude;
     private final String executionHint;
 
-    public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) {
+    public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, String executionHint) {
         super(name, StringTerms.TYPE.name(), valueSourceConfig);
         this.order = order;
         this.requiredSize = requiredSize;
         this.shardSize = shardSize;
+        this.minDocCount = minDocCount;
         this.includeExclude = includeExclude;
         this.executionHint = executionHint;
     }
 
     @Override
     protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
-        return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent);
+        return new UnmappedTermsAggregator(name, order, requiredSize, minDocCount, aggregationContext, parent);
     }
 
     private static boolean hasParentBucketAggregator(Aggregator parent) {
@@ -107,11 +109,11 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
             if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
                 assert includeExclude == null;
                 final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name,
-                        factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
+                        factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
                 aggregationContext.registerReaderContextAware(aggregator);
                 return aggregator;
             } else {
-                return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
+                return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
             }
         }
 
@@ -122,9 +124,9 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
 
         if (valuesSource instanceof NumericValuesSource) {
             if (((NumericValuesSource) valuesSource).isFloatingPoint()) {
-                return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
+                return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
             }
-            return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
+            return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
         }
 
         throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + valuesSourceConfig.fieldContext().field() +

+ 12 - 0
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java

@@ -33,6 +33,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
 
     private int size = -1;
     private int shardSize = -1;
+    private long minDocCount = -1;
     private Terms.ValueType valueType;
     private Terms.Order order;
     private String includePattern;
@@ -62,6 +63,14 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
         return this;
     }
 
+    /**
+     * Set the minimum document count terms should have in order to appear in the response.
+     */
+    public TermsBuilder minDocCount(long minDocCount) {
+        this.minDocCount = minDocCount;
+        return this;
+    }
+
     /**
      * Define a regular expression that will determine what terms should be aggregated. The regular expression is based
      * on the {@link java.util.regex.Pattern} class.
@@ -135,6 +144,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
         if (shardSize >= 0) {
             builder.field("shard_size", shardSize);
         }
+        if (minDocCount >= 0) {
+            builder.field("min_doc_count", minDocCount);
+        }
         if (valueType != null) {
             builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT));
         }

+ 6 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java

@@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser {
         String exclude = null;
         int excludeFlags = 0; // 0 means no flags
         String executionHint = null;
+        long minDocCount = 1;
 
 
         XContentParser.Token token;
@@ -110,6 +111,8 @@ public class TermsParser implements Aggregator.Parser {
                     requiredSize = parser.intValue();
                 } else if ("shard_size".equals(currentFieldName) || "shardSize".equals(currentFieldName)) {
                     shardSize = parser.intValue();
+                } else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
+                    minDocCount = parser.intValue();
                 } else {
                     throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
                 }
@@ -206,14 +209,14 @@ public class TermsParser implements Aggregator.Parser {
             if (!assumeUnique) {
                 config.ensureUnique(true);
             }
-            return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
+            return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
         }
 
         FieldMapper<?> mapper = context.smartNameFieldMapper(field);
         if (mapper == null) {
             ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
             config.unmapped(true);
-            return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
+            return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
         }
         IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
 
@@ -255,7 +258,7 @@ public class TermsParser implements Aggregator.Parser {
             config.ensureUnique(true);
         }
 
-        return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
+        return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
     }
 
     static InternalOrder resolveOrder(String key, boolean asc) {

+ 4 - 2
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java

@@ -53,8 +53,8 @@ public class UnmappedTerms extends InternalTerms {
 
     UnmappedTerms() {} // for serialization
 
-    public UnmappedTerms(String name, InternalOrder order, int requiredSize) {
-        super(name, order, requiredSize, BUCKETS);
+    public UnmappedTerms(String name, InternalOrder order, int requiredSize, long minDocCount) {
+        super(name, order, requiredSize, minDocCount, BUCKETS);
     }
 
     @Override
@@ -67,6 +67,7 @@ public class UnmappedTerms extends InternalTerms {
         this.name = in.readString();
         this.order = InternalOrder.Streams.readOrder(in);
         this.requiredSize = in.readVInt();
+        this.minDocCount = in.readVLong();
         this.buckets = BUCKETS;
         this.bucketMap = BUCKETS_MAP;
     }
@@ -76,6 +77,7 @@ public class UnmappedTerms extends InternalTerms {
         out.writeString(name);
         InternalOrder.Streams.writeOrder(order, out);
         out.writeVInt(requiredSize);
+        out.writeVLong(minDocCount);
     }
 
     @Override

+ 5 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTermsAggregator.java

@@ -32,11 +32,13 @@ public class UnmappedTermsAggregator extends Aggregator {
 
     private final InternalOrder order;
     private final int requiredSize;
+    private final long minDocCount;
 
-    public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, AggregationContext aggregationContext, Aggregator parent) {
+    public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
         super(name, BucketAggregationMode.PER_BUCKET, AggregatorFactories.EMPTY, 0, aggregationContext, parent);
         this.order = order;
         this.requiredSize = requiredSize;
+        this.minDocCount = minDocCount;
     }
 
     @Override
@@ -51,11 +53,11 @@ public class UnmappedTermsAggregator extends Aggregator {
     @Override
     public InternalAggregation buildAggregation(long owningBucketOrdinal) {
         assert owningBucketOrdinal == 0;
-        return new UnmappedTerms(name, order, requiredSize);
+        return new UnmappedTerms(name, order, requiredSize, minDocCount);
     }
 
     @Override
     public InternalAggregation buildEmptyAggregation() {
-        return new UnmappedTerms(name, order, requiredSize);
+        return new UnmappedTerms(name, order, requiredSize, minDocCount);
     }
 }

+ 50 - 0
src/test/java/org/elasticsearch/common/collect/Iterators2Tests.java

@@ -0,0 +1,50 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.collect;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Ordering;
+import com.google.common.collect.Sets;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.test.ElasticsearchTestCase;
+
+import java.util.Iterator;
+import java.util.List;
+
+public class Iterators2Tests extends ElasticsearchTestCase {
+
+    public void testDeduplicateSorted() {
+        final List<String> list = Lists.newArrayList();
+        for (int i = randomInt(100); i >= 0; --i) {
+            final int frequency = randomIntBetween(1, 10);
+            final String s = randomAsciiOfLength(randomIntBetween(2, 20));
+            for (int j = 0; j < frequency; ++j) {
+                list.add(s);
+            }
+        }
+        CollectionUtil.introSort(list);
+        final List<String> deduplicated = Lists.newArrayList();
+        for (Iterator<String> it = Iterators2.deduplicateSorted(list.iterator(), Ordering.natural()); it.hasNext(); ) {
+            deduplicated.add(it.next());
+        }
+        assertEquals(Lists.newArrayList(Sets.newTreeSet(list)), deduplicated);
+    }
+
+}

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramTests.java

@@ -888,7 +888,7 @@ public class DateHistogramTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateHistogram("date_histo").interval(1)))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateHistogram("date_histo").interval(1)))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/DateRangeTests.java

@@ -994,7 +994,7 @@ public class DateRangeTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java

@@ -600,7 +600,7 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(terms("terms")))
                 .execute().actionGet();
 

+ 2 - 2
src/test/java/org/elasticsearch/search/aggregations/bucket/FilterTests.java

@@ -23,8 +23,8 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
 import org.elasticsearch.search.aggregations.bucket.filter.Filter;
+import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
 import org.elasticsearch.search.aggregations.metrics.avg.Avg;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.hamcrest.Matchers;
@@ -159,7 +159,7 @@ public class FilterTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(filter("filter").filter(matchAllFilter())))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java

@@ -371,7 +371,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(geoDistance("geo_dist").field("location").point("52.3760, 4.894").addRange("0-100", 0.0, 100.0)))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/HistogramTests.java

@@ -756,7 +756,7 @@ public class HistogramTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(histogram("sub_histo").interval(1l)))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/IPv4RangeTests.java

@@ -842,7 +842,7 @@ public class IPv4RangeTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(ipRange("ip_range").field("ip").addRange("r1", "10.0.0.1", "10.0.0.10")))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java

@@ -594,7 +594,7 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(terms("terms")))
                 .execute().actionGet();
 

+ 322 - 0
src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountTests.java

@@ -0,0 +1,322 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket;
+
+import com.carrotsearch.hppc.LongOpenHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.randomizedtesting.generators.RandomStrings;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.search.SearchType;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.junit.Before;
+
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
+import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
+
+public class MinDocCountTests extends ElasticsearchIntegrationTest {
+
+    private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
+
+    @Override
+    public Settings indexSettings() {
+        return ImmutableSettings.builder()
+                .put("index.number_of_shards", between(1, 5))
+                .put("index.number_of_replicas", between(0, 1))
+                .build();
+    }
+
+    private int cardinality;
+
+    @Before
+    public void indexData() throws Exception {
+        createIndex("idx");
+
+        cardinality = randomIntBetween(8, 30);
+        final List<IndexRequestBuilder> indexRequests = new ArrayList<IndexRequestBuilder>();
+        final Set<String> stringTerms = new HashSet<String>();
+        final LongSet longTerms = new LongOpenHashSet();
+        for (int i = 0; i < cardinality; ++i) {
+            String stringTerm;
+            do {
+                stringTerm = RandomStrings.randomAsciiOfLength(getRandom(), 8);
+            } while (!stringTerms.add(stringTerm));
+            long longTerm;
+            do {
+                longTerm = randomInt(cardinality * 2);
+            } while (!longTerms.add(longTerm));
+            double doubleTerm = longTerm * Math.PI;
+            final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
+            for (int j = 0; j < frequency; ++j) {
+                indexRequests.add(client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("s", stringTerm).field("l", longTerm).field("d", doubleTerm).field("match", randomBoolean()).endObject()));
+            }
+        }
+        cardinality = stringTerms.size();
+
+        indexRandom(true, indexRequests);
+        ensureSearchable();
+    }
+
+    private enum Script {
+        NO {
+            @Override
+            TermsBuilder apply(TermsBuilder builder, String field) {
+                return builder.field(field);
+            }
+        },
+        YES {
+            @Override
+            TermsBuilder apply(TermsBuilder builder, String field) {
+                return builder.script("doc['" + field + "'].values");
+            }
+        };
+        abstract TermsBuilder apply(TermsBuilder builder, String field);
+    }
+
+    // check that terms2 is a subset of terms1
+    private void assertSubset(Terms terms1, Terms terms2, long minDocCount, int size, String include) {
+        final Matcher matcher = include == null ? null : Pattern.compile(include).matcher("");;
+        final Iterator<Terms.Bucket> it1 = terms1.iterator();
+        final Iterator<Terms.Bucket> it2 = terms2.iterator();
+        int size2 = 0;
+        while (it1.hasNext()) {
+            final Terms.Bucket bucket1 = it1.next();
+            if (bucket1.getDocCount() >= minDocCount && (matcher == null || matcher.reset(bucket1.getKey().string()).matches())) {
+                if (size2++ == size) {
+                    break;
+                }
+                assertTrue(it2.hasNext());
+                final Terms.Bucket bucket2 = it2.next();
+                assertEquals(bucket1.getKey(), bucket2.getKey());
+                assertEquals(bucket1.getDocCount(), bucket2.getDocCount());
+            }
+        }
+        assertFalse(it2.hasNext());
+    }
+
+    private void assertSubset(Histogram histo1, Histogram histo2, long minDocCount) {
+        final Iterator<Histogram.Bucket> it2 = histo2.iterator();
+        for (Histogram.Bucket b1 : histo1) {
+            if (b1.getDocCount() >= minDocCount) {
+                final Histogram.Bucket b2 = it2.next();
+                assertEquals(b1.getKey(), b2.getKey());
+                assertEquals(b1.getDocCount(), b2.getDocCount());
+            }
+        }
+    }
+
+    public void testStringTermAsc() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(true));
+    }
+
+    public void testStringScriptTermAsc() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(true));
+    }
+
+    public void testStringTermDesc() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(false));
+    }
+
+    public void testStringScriptTermDesc() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(false));
+    }
+
+    public void testStringCountAsc() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true));
+    }
+
+    public void testStringScriptCountAsc() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true));
+    }
+
+    public void testStringCountDesc() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false));
+    }
+
+    public void testStringScriptCountDesc() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false));
+    }
+
+    public void testStringCountAscWithInclude() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true), ".*a.*");
+    }
+
+    public void testStringScriptCountAscWithInclude() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true), ".*a.*");
+    }
+
+    public void testStringCountDescWithInclude() throws Exception {
+        testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false), ".*a.*");
+    }
+
+    public void testStringScriptCountDescWithInclude() throws Exception {
+        testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false), ".*a.*");
+    }
+
+    public void testLongTermAsc() throws Exception {
+        testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(true));
+    }
+
+    public void testLongScriptTermAsc() throws Exception {
+        testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(true));
+    }
+
+    public void testLongTermDesc() throws Exception {
+        testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(false));
+    }
+
+    public void testLongScriptTermDesc() throws Exception {
+        testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(false));
+    }
+
+    public void testLongCountAsc() throws Exception {
+        testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(true));
+    }
+
+    public void testLongScriptCountAsc() throws Exception {
+        testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(true));
+    }
+
+    public void testLongCountDesc() throws Exception {
+        testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(false));
+    }
+
+    public void testLongScriptCountDesc() throws Exception {
+        testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(false));
+    }
+
+    public void testDoubleTermAsc() throws Exception {
+        testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(true));
+    }
+
+    public void testDoubleScriptTermAsc() throws Exception {
+        testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(true));
+    }
+
+    public void testDoubleTermDesc() throws Exception {
+        testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(false));
+    }
+
+    public void testDoubleScriptTermDesc() throws Exception {
+        testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(false));
+    }
+
+    public void testDoubleCountAsc() throws Exception {
+        testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(true));
+    }
+
+    public void testDoubleScriptCountAsc() throws Exception {
+        testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(true));
+    }
+
+    public void testDoubleCountDesc() throws Exception {
+        testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(false));
+    }
+
+    public void testDoubleScriptCountDesc() throws Exception {
+        testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(false));
+    }
+
+    private void testMinDocCountOnTerms(String field, Script script, Terms.Order order) throws Exception {
+        testMinDocCountOnTerms(field, script, order, null);
+    }
+
+    private void testMinDocCountOnTerms(String field, Script script, Terms.Order order, String include) throws Exception {
+        // all terms
+        final SearchResponse allTermsResponse = client().prepareSearch("idx").setTypes("type")
+                .setSearchType(SearchType.COUNT)
+                .setQuery(QUERY)
+                .addAggregation(script.apply(terms("terms"), field)
+                        .executionHint(StringTermsTests.randomExecutionHint())
+                        .order(order)
+                        .size(cardinality + randomInt(10))
+                        .minDocCount(0))
+                .execute().actionGet();
+        final Terms allTerms = allTermsResponse.getAggregations().get("terms");
+        assertEquals(cardinality, allTerms.buckets().size());
+
+        for (long minDocCount = 0; minDocCount < 20; ++minDocCount) {
+            final int size = randomIntBetween(1, cardinality + 2);
+            final SearchResponse response = client().prepareSearch("idx").setTypes("type")
+                    .setSearchType(SearchType.COUNT)
+                    .setQuery(QUERY)
+                    .addAggregation(script.apply(terms("terms"), field)
+                            .executionHint(StringTermsTests.randomExecutionHint())
+                            .order(order)
+                            .size(size)
+                            .include(include)
+                            .shardSize(cardinality + randomInt(10))
+                            .minDocCount(minDocCount))
+                    .execute().actionGet();
+            assertSubset(allTerms, (Terms) response.getAggregations().get("terms"), minDocCount, size, include);
+        }
+
+    }
+
+    public void testHistogramCountAsc() throws Exception {
+        testMinDocCountOnHistogram(Histogram.Order.COUNT_ASC);
+    }
+
+    public void testHistogramCountDesc() throws Exception {
+        testMinDocCountOnHistogram(Histogram.Order.COUNT_DESC);
+    }
+
+    public void testHistogramKeyAsc() throws Exception {
+        testMinDocCountOnHistogram(Histogram.Order.KEY_ASC);
+    }
+
+    public void testHistogramKeyDesc() throws Exception {
+        testMinDocCountOnHistogram(Histogram.Order.KEY_DESC);
+    }
+
+    private void testMinDocCountOnHistogram(Histogram.Order order) throws Exception {
+        final int interval = randomIntBetween(1, 3);
+        final SearchResponse allResponse = client().prepareSearch("idx").setTypes("type")
+                .setSearchType(SearchType.COUNT)
+                .setQuery(QUERY)
+                .addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(0))
+                .execute().actionGet();
+
+        final Histogram allHisto = allResponse.getAggregations().get("histo");
+
+        for (long minDocCount = 0; minDocCount < 50; ++minDocCount) {
+            final SearchResponse response = client().prepareSearch("idx").setTypes("type")
+                    .setSearchType(SearchType.COUNT)
+                    .setQuery(QUERY)
+                    .addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(minDocCount))
+                    .execute().actionGet();
+            assertSubset(allHisto, (Histogram) response.getAggregations().get("histo"), minDocCount);
+        }
+
+    }
+
+}

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/MissingTests.java

@@ -202,7 +202,7 @@ public class MissingTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(missing("missing")))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java

@@ -251,7 +251,7 @@ public class NestedTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(nested("nested").path("nested")))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/bucket/RangeTests.java

@@ -921,7 +921,7 @@ public class RangeTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(range("range").addRange("0-2", 0.0, 2.0)))
                 .execute().actionGet();
 

+ 2 - 2
src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsTests.java

@@ -66,7 +66,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
                 .build();
     }
 
-    private String randomExecutionHint() {
+    public static String randomExecutionHint() {
         return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS));
     }
 
@@ -758,7 +758,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
                         .subAggregation(terms("terms")))
                 .execute().actionGet();
 

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/AvgTests.java

@@ -39,7 +39,7 @@ public class AvgTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(avg("avg")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(avg("avg")))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsTests.java

@@ -53,7 +53,7 @@ public class ExtendedStatsTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(extendedStats("stats")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(extendedStats("stats")))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/MaxTests.java

@@ -39,7 +39,7 @@ public class MaxTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(max("max")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(max("max")))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/MinTests.java

@@ -39,7 +39,7 @@ public class MinTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(min("min")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(min("min")))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/StatsTests.java

@@ -40,7 +40,7 @@ public class StatsTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(stats("stats")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(stats("stats")))
                 .execute().actionGet();
 
         assertShardExecutionState(searchResponse, 0);

+ 1 - 1
src/test/java/org/elasticsearch/search/aggregations/metrics/SumTests.java

@@ -39,7 +39,7 @@ public class SumTests extends AbstractNumericTests {
 
         SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
                 .setQuery(matchAllQuery())
-                .addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(sum("sum")))
+                .addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(sum("sum")))
                 .execute().actionGet();
 
         assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));