8 years ago · 528bd25fa7
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java
@@ -128,6 +128,7 @@ public abstract class InternalMappedSignificantTerms<
 
				     @Override
			
 
				     public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
			
 
				         builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
			
 
				+        builder.field(BG_COUNT, supersetSize);
			
 
				         builder.startArray(CommonFields.BUCKETS.getPreferredName());
			
 
				         for (Bucket bucket : buckets) {
			
 
				             //There is a condition (presumably when only one shard has a bucket?) where reduce is not called
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantLongTerms.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantLongTerms.java
@@ -39,9 +39,7 @@ public class ParsedSignificantLongTerms extends ParsedSignificantTerms {
 
				     }
			
 
				 
			
 
				     public static ParsedSignificantLongTerms fromXContent(XContentParser parser, String name) throws IOException {
			
 
				-        ParsedSignificantLongTerms aggregation = PARSER.parse(parser, null);
			
 
				-        aggregation.setName(name);
			
 
				-        return aggregation;
			
 
				+        return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
			
 
				     }
			
 
				 
			
 
				     public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantStringTerms.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantStringTerms.java
@@ -40,9 +40,7 @@ public class ParsedSignificantStringTerms extends ParsedSignificantTerms {
 
				     }
			
 
				 
			
 
				     public static ParsedSignificantStringTerms fromXContent(XContentParser parser, String name) throws IOException {
			
 
				-        ParsedSignificantStringTerms aggregation = PARSER.parse(parser, null);
			
 
				-        aggregation.setName(name);
			
 
				-        return aggregation;
			
 
				+        return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
			
 
				     }
			
 
				 
			
 
				     public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantTerms.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantTerms.java
@@ -21,6 +21,8 @@ package org.elasticsearch.search.aggregations.bucket.significant;
 
				 
			
 
				 import org.elasticsearch.common.CheckedBiConsumer;
			
 
				 import org.elasticsearch.common.CheckedFunction;
			
 
				+import org.elasticsearch.common.CheckedSupplier;
			
 
				+import org.elasticsearch.common.ParseField;
			
 
				 import org.elasticsearch.common.xcontent.ObjectParser;
			
 
				 import org.elasticsearch.common.xcontent.XContentBuilder;
			
 
				 import org.elasticsearch.common.xcontent.XContentParser;
			
@@ -42,11 +44,16 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio
 
				 
			
 
				     private Map<String, ParsedBucket> bucketMap;
			
 
				     protected long subsetSize;
			
 
				+    protected long supersetSize;
			
 
				 
			
 
				     protected long getSubsetSize() {
			
 
				         return subsetSize;
			
 
				     }
			
 
				 
			
 
				+    protected long getSupersetSize() {
			
 
				+        return supersetSize;
			
 
				+    }
			
 
				+
			
 
				     @Override
			
 
				     public List<? extends SignificantTerms.Bucket> getBuckets() {
			
 
				         return buckets;
			
@@ -68,6 +75,7 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio
 
				     @Override
			
 
				     protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
			
 
				         builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
			
 
				+        builder.field(InternalMappedSignificantTerms.BG_COUNT, supersetSize);
			
 
				         builder.startArray(CommonFields.BUCKETS.getPreferredName());
			
 
				         for (SignificantTerms.Bucket bucket : buckets) {
			
 
				             bucket.toXContent(builder, params);
			
@@ -76,16 +84,31 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio
 
				         return builder;
			
 
				     }
			
 
				 
			
 
				+    static <T extends ParsedSignificantTerms> T parseSignificantTermsXContent(final CheckedSupplier<T, IOException> aggregationSupplier,
			
 
				+                                                                              final String name) throws IOException {
			
 
				+        T aggregation = aggregationSupplier.get();
			
 
				+        aggregation.setName(name);
			
 
				+        for (ParsedBucket bucket : aggregation.buckets) {
			
 
				+            bucket.subsetSize = aggregation.subsetSize;
			
 
				+            bucket.supersetSize = aggregation.supersetSize;
			
 
				+        }
			
 
				+        return aggregation;
			
 
				+    }
			
 
				+
			
 
				     static void declareParsedSignificantTermsFields(final ObjectParser<? extends ParsedSignificantTerms, Void> objectParser,
			
 
				                                      final CheckedFunction<XContentParser, ParsedSignificantTerms.ParsedBucket, IOException> bucketParser) {
			
 
				         declareMultiBucketAggregationFields(objectParser, bucketParser::apply, bucketParser::apply);
			
 
				         objectParser.declareLong((parsedTerms, value) -> parsedTerms.subsetSize = value , CommonFields.DOC_COUNT);
			
 
				+        objectParser.declareLong((parsedTerms, value) -> parsedTerms.supersetSize = value ,
			
 
				+                new ParseField(InternalMappedSignificantTerms.BG_COUNT));
			
 
				     }
			
 
				 
			
 
				     public abstract static class ParsedBucket extends ParsedMultiBucketAggregation.ParsedBucket implements SignificantTerms.Bucket {
			
 
				 
			
 
				         protected long subsetDf;
			
 
				+        protected long subsetSize;
			
 
				         protected long supersetDf;
			
 
				+        protected long supersetSize;
			
 
				         protected double score;
			
 
				 
			
 
				         @Override
			
@@ -110,12 +133,12 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio
 
				 
			
 
				         @Override
			
 
				         public long getSupersetSize() {
			
 
				-            throw new UnsupportedOperationException();
			
 
				+            return supersetSize;
			
 
				         }
			
 
				 
			
 
				         @Override
			
 
				         public long getSubsetSize() {
			
 
				-            throw new UnsupportedOperationException();
			
 
				+            return subsetSize;
			
 
				         }
			
 
				 
			
 
				         @Override
			
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTerms.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTerms.java
@@ -29,17 +29,39 @@ public interface SignificantTerms extends MultiBucketsAggregation, Iterable<Sign
 
				 
			
 
				     interface Bucket extends MultiBucketsAggregation.Bucket {
			
 
				 
			
 
				+        /**
			
 
				+         * @return The significant score for the subset
			
 
				+         */
			
 
				         double getSignificanceScore();
			
 
				 
			
 
				-        Number getKeyAsNumber();
			
 
				-
			
 
				+        /**
			
 
				+         * @return The number of docs in the subset containing a particular term.
			
 
				+         * This number is equal to the document count of the bucket.
			
 
				+         */
			
 
				         long getSubsetDf();
			
 
				 
			
 
				+        /**
			
 
				+         * @return The numbers of docs in the subset (also known as "foreground set").
			
 
				+         * This number is equal to the document count of the containing aggregation.
			
 
				+         */
			
 
				+        long getSubsetSize();
			
 
				+
			
 
				+        /**
			
 
				+         * @return The number of docs in the superset containing a particular term (also
			
 
				+         * known as the "background count" of the bucket)
			
 
				+         */
			
 
				         long getSupersetDf();
			
 
				 
			
 
				+        /**
			
 
				+         * @return The numbers of docs in the superset (ordinarily the background count
			
 
				+         * of the containing aggregation).
			
 
				+         */
			
 
				         long getSupersetSize();
			
 
				 
			
 
				-        long getSubsetSize();
			
 
				+        /**
			
 
				+         * @return The key, expressed as a number
			
 
				+         */
			
 
				+        Number getKeyAsNumber();
			
 
				     }
			
 
				 
			
 
				     @Override
			
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
@@ -310,6 +310,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
 
				                 + "\"doc_count\":4,"
			
 
				                 + "\"sig_terms\":{"
			
 
				                 + "\"doc_count\":4,"
			
 
				+                + "\"bg_count\":7,"
			
 
				                 + "\"buckets\":["
			
 
				                 + "{"
			
 
				                 + "\"key\":" + (type.equals("long") ? "0," : "\"0\",")
			
@@ -325,6 +326,7 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
 
				                 + "\"doc_count\":3,"
			
 
				                 + "\"sig_terms\":{"
			
 
				                 + "\"doc_count\":3,"
			
 
				+                + "\"bg_count\":7,"
			
 
				                 + "\"buckets\":["
			
 
				                 + "{"
			
 
				                 + "\"key\":" + (type.equals("long") ? "1," : "\"1\",")
			
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
@@ -19,8 +19,14 @@
 
				 
			
 
				 package org.elasticsearch.search.aggregations.bucket.significant;
			
 
				 
			
 
				+import org.elasticsearch.search.aggregations.InternalAggregations;
			
 
				 import org.elasticsearch.search.aggregations.InternalMultiBucketAggregationTestCase;
			
 
				 import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
			
 
				+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
			
 
				+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
			
 
				+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
			
 
				+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
			
 
				+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
			
 
				 import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
			
 
				 
			
 
				 import java.util.Arrays;
			
@@ -33,6 +39,51 @@ import java.util.stream.Stream;
 
				 
			
 
				 public abstract class InternalSignificantTermsTestCase extends InternalMultiBucketAggregationTestCase<InternalSignificantTerms<?, ?>> {
			
 
				 
			
 
				+    private SignificanceHeuristic significanceHeuristic;
			
 
				+
			
 
				+    @Override
			
 
				+    public void setUp() throws Exception {
			
 
				+        super.setUp();
			
 
				+        significanceHeuristic = randomSignificanceHeuristic();
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected final InternalSignificantTerms createTestInstance(String name,
			
 
				+                                                          List<PipelineAggregator> pipelineAggregators,
			
 
				+                                                          Map<String, Object> metaData,
			
 
				+                                                          InternalAggregations aggregations) {
			
 
				+        final int requiredSize = randomIntBetween(1, 5);
			
 
				+        final int numBuckets = randomInt(requiredSize + 2);
			
 
				+
			
 
				+        long subsetSize = 0;
			
 
				+        long supersetSize = 0;
			
 
				+
			
 
				+        int[] subsetDfs = new int[numBuckets];
			
 
				+        int[] supersetDfs = new int[numBuckets];
			
 
				+
			
 
				+        for (int i = 0; i < numBuckets; ++i) {
			
 
				+            int subsetDf = randomIntBetween(1, 10);
			
 
				+            subsetDfs[i] = subsetDf;
			
 
				+
			
 
				+            int supersetDf = randomIntBetween(subsetDf, 20);
			
 
				+            supersetDfs[i] = supersetDf;
			
 
				+
			
 
				+            subsetSize += subsetDf;
			
 
				+            supersetSize += supersetDf;
			
 
				+        }
			
 
				+        return createTestInstance(name, pipelineAggregators, metaData, aggregations, requiredSize, numBuckets, subsetSize, subsetDfs,
			
 
				+                supersetSize, supersetDfs, significanceHeuristic);
			
 
				+    }
			
 
				+
			
 
				+    protected abstract InternalSignificantTerms createTestInstance(String name,
			
 
				+                                                                   List<PipelineAggregator> pipelineAggregators,
			
 
				+                                                                   Map<String, Object> metaData,
			
 
				+                                                                   InternalAggregations aggregations,
			
 
				+                                                                   int requiredSize, int numBuckets,
			
 
				+                                                                   long subsetSize, int[] subsetDfs,
			
 
				+                                                                   long supersetSize, int[] supersetDfs,
			
 
				+                                                                   SignificanceHeuristic significanceHeuristic);
			
 
				+
			
 
				     @Override
			
 
				     protected InternalSignificantTerms createUnmappedInstance(String name,
			
 
				                                                               List<PipelineAggregator> pipelineAggregators,
			
@@ -72,6 +123,7 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck
 
				         InternalSignificantTerms expectedSigTerms = (InternalSignificantTerms) expected;
			
 
				         ParsedSignificantTerms actualSigTerms = (ParsedSignificantTerms) actual;
			
 
				         assertEquals(expectedSigTerms.getSubsetSize(), actualSigTerms.getSubsetSize());
			
 
				+        assertEquals(expectedSigTerms.getSupersetSize(), actualSigTerms.getSupersetSize());
			
 
				 
			
 
				         for (SignificantTerms.Bucket bucket : (SignificantTerms) expected) {
			
 
				             String key = bucket.getKeyAsString();
			
@@ -91,14 +143,22 @@ public abstract class InternalSignificantTermsTestCase extends InternalMultiBuck
 
				 
			
 
				         assertEquals(expectedSigTerm.getSignificanceScore(), actualSigTerm.getSignificanceScore(), 0.0);
			
 
				         assertEquals(expectedSigTerm.getSubsetDf(), actualSigTerm.getSubsetDf());
			
 
				+        assertEquals(expectedSigTerm.getDocCount(), actualSigTerm.getSubsetDf());
			
 
				         assertEquals(expectedSigTerm.getSupersetDf(), actualSigTerm.getSupersetDf());
			
 
				-
			
 
				-        expectThrows(UnsupportedOperationException.class, actualSigTerm::getSubsetSize);
			
 
				-        expectThrows(UnsupportedOperationException.class, actualSigTerm::getSupersetSize);
			
 
				+        assertEquals(expectedSigTerm.getSubsetSize(), actualSigTerm.getSubsetSize());
			
 
				+        assertEquals(expectedSigTerm.getSupersetSize(), actualSigTerm.getSupersetSize());
			
 
				     }
			
 
				 
			
 
				     private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
			
 
				                                               Function<SignificantTerms.Bucket, Long> fn) {
			
 
				         return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
			
 
				     }
			
 
				+
			
 
				+    private static SignificanceHeuristic randomSignificanceHeuristic() {
			
 
				+        return randomFrom(
			
 
				+                new JLHScore(),
			
 
				+                new MutualInformation(randomBoolean(), randomBoolean()),
			
 
				+                new GND(randomBoolean()),
			
 
				+                new ChiSquare(randomBoolean(), randomBoolean()));
			
 
				+    }
			
 
				 }
			
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
@@ -23,10 +23,6 @@ import org.elasticsearch.common.io.stream.Writeable;
 
				 import org.elasticsearch.search.DocValueFormat;
			
 
				 import org.elasticsearch.search.aggregations.InternalAggregations;
			
 
				 import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
			
 
				 import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
			
 
				 import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
			
 
				 
			
@@ -38,13 +34,11 @@ import java.util.Set;
 
				 
			
 
				 public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
			
 
				 
			
 
				-    private SignificanceHeuristic significanceHeuristic;
			
 
				     private DocValueFormat format;
			
 
				 
			
 
				     @Override
			
 
				     public void setUp() throws Exception {
			
 
				         super.setUp();
			
 
				-        significanceHeuristic = randomSignificanceHeuristic();
			
 
				         format = randomNumericDocValueFormat();
			
 
				     }
			
 
				 
			
@@ -52,30 +46,20 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase
 
				     protected InternalSignificantTerms createTestInstance(String name,
			
 
				                                                           List<PipelineAggregator> pipelineAggregators,
			
 
				                                                           Map<String, Object> metaData,
			
 
				-                                                          InternalAggregations aggregations) {
			
 
				-        int requiredSize = randomIntBetween(1, 5);
			
 
				-        int shardSize = requiredSize + 2;
			
 
				-        final int numBuckets = randomInt(shardSize);
			
 
				-
			
 
				-        long globalSubsetSize = 0;
			
 
				-        long globalSupersetSize = 0;
			
 
				+                                                          InternalAggregations aggs,
			
 
				+                                                          int requiredSize, int numBuckets,
			
 
				+                                                          long subsetSize, int[] subsetDfs,
			
 
				+                                                          long supersetSize, int[] supersetDfs,
			
 
				+                                                          SignificanceHeuristic significanceHeuristic) {
			
 
				 
			
 
				         List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
			
 
				         Set<Long> terms = new HashSet<>();
			
 
				         for (int i = 0; i < numBuckets; ++i) {
			
 
				             long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
			
 
				-
			
 
				-            int subsetDf = randomIntBetween(1, 10);
			
 
				-            int supersetDf = randomIntBetween(subsetDf, 20);
			
 
				-            int supersetSize = randomIntBetween(supersetDf, 30);
			
 
				-
			
 
				-            globalSubsetSize += subsetDf;
			
 
				-            globalSupersetSize += supersetSize;
			
 
				-
			
 
				-            buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, aggregations, format));
			
 
				+            buckets.add(new SignificantLongTerms.Bucket(subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, term, aggs, format));
			
 
				         }
			
 
				-        return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
			
 
				-                globalSupersetSize, significanceHeuristic, buckets);
			
 
				+        return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
			
 
				+                supersetSize, significanceHeuristic, buckets);
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -87,12 +71,4 @@ public class SignificantLongTermsTests extends InternalSignificantTermsTestCase
 
				     protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
			
 
				         return ParsedSignificantLongTerms.class;
			
 
				     }
			
 
				-
			
 
				-    private static SignificanceHeuristic randomSignificanceHeuristic() {
			
 
				-        return randomFrom(
			
 
				-                new JLHScore(),
			
 
				-                new MutualInformation(randomBoolean(), randomBoolean()),
			
 
				-                new GND(randomBoolean()),
			
 
				-                new ChiSquare(randomBoolean(), randomBoolean()));
			
 
				-    }
			
 
				 }
			
--- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
@@ -24,10 +24,6 @@ import org.elasticsearch.common.io.stream.Writeable;
 
				 import org.elasticsearch.search.DocValueFormat;
			
 
				 import org.elasticsearch.search.aggregations.InternalAggregations;
			
 
				 import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
			
 
				-import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
			
 
				 import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
			
 
				 import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
			
 
				 
			
@@ -39,43 +35,24 @@ import java.util.Set;
 
				 
			
 
				 public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
			
 
				 
			
 
				-    private SignificanceHeuristic significanceHeuristic;
			
 
				-
			
 
				-    @Override
			
 
				-    public void setUp() throws Exception {
			
 
				-        super.setUp();
			
 
				-        significanceHeuristic = randomSignificanceHeuristic();
			
 
				-    }
			
 
				-
			
 
				     @Override
			
 
				     protected InternalSignificantTerms createTestInstance(String name,
			
 
				                                                           List<PipelineAggregator> pipelineAggregators,
			
 
				                                                           Map<String, Object> metaData,
			
 
				-                                                          InternalAggregations aggregations) {
			
 
				+                                                          InternalAggregations aggs,
			
 
				+                                                          int requiredSize, int numBuckets,
			
 
				+                                                          long subsetSize, int[] subsetDfs,
			
 
				+                                                          long supersetSize, int[] supersetDfs,
			
 
				+                                                          SignificanceHeuristic significanceHeuristic) {
			
 
				         DocValueFormat format = DocValueFormat.RAW;
			
 
				-        int requiredSize = randomIntBetween(1, 5);
			
 
				-        int shardSize = requiredSize + 2;
			
 
				-        final int numBuckets = randomInt(shardSize);
			
 
				-
			
 
				-        long globalSubsetSize = 0;
			
 
				-        long globalSupersetSize = 0;
			
 
				-
			
 
				         List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
			
 
				         Set<BytesRef> terms = new HashSet<>();
			
 
				         for (int i = 0; i < numBuckets; ++i) {
			
 
				             BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10)));
			
 
				-
			
 
				-            int subsetDf = randomIntBetween(1, 10);
			
 
				-            int supersetDf = randomIntBetween(subsetDf, 20);
			
 
				-            int supersetSize = randomIntBetween(supersetDf, 30);
			
 
				-
			
 
				-            globalSubsetSize += subsetDf;
			
 
				-            globalSupersetSize += supersetSize;
			
 
				-
			
 
				-            buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, aggregations, format));
			
 
				+            buckets.add(new SignificantStringTerms.Bucket(term, subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, aggs, format));
			
 
				         }
			
 
				-        return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
			
 
				-                globalSupersetSize, significanceHeuristic, buckets);
			
 
				+        return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
			
 
				+                supersetSize, significanceHeuristic, buckets);
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -87,12 +64,4 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas
 
				     protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
			
 
				         return ParsedSignificantStringTerms.class;
			
 
				     }
			
 
				-
			
 
				-    private static SignificanceHeuristic randomSignificanceHeuristic() {
			
 
				-        return randomFrom(
			
 
				-                new JLHScore(),
			
 
				-                new MutualInformation(randomBoolean(), randomBoolean()),
			
 
				-                new GND(randomBoolean()),
			
 
				-                new ChiSquare(randomBoolean(), randomBoolean()));
			
 
				-    }
			
 
				 }
			
--- a/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc
@@ -66,9 +66,10 @@ Response:
 
				     ...
			
 
				     "aggregations": {
			
 
				         "my_unbiased_sample": {
			
 
				-            "doc_count": 1000,<1>
			
 
				+            "doc_count": 151,<1>
			
 
				             "keywords": {<2>
			
 
				-                "doc_count": 1000,
			
 
				+                "doc_count": 151,
			
 
				+                "bg_count": 650,
			
 
				                 "buckets": [
			
 
				                     {
			
 
				                         "key": "kibana",
			
@@ -83,10 +84,9 @@ Response:
 
				 }
			
 
				 --------------------------------------------------
			
 
				 // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
			
 
				-// TESTRESPONSE[s/1000/151/]
			
 
				 // TESTRESPONSE[s/2.213/$body.aggregations.my_unbiased_sample.keywords.buckets.0.score/]
			
 
				 
			
 
				-<1> 1000 documents were sampled in total because we asked for a maximum of 200 from an index with 5 shards. The cost of performing the nested significant_terms aggregation was therefore limited rather than unbounded.
			
 
				+<1> 151 documents were sampled in total.
			
 
				 <2> The results of the significant_terms aggregation are not skewed by any single author's quirks because we asked for a maximum of one post from any one author in our sample.
			
 
				 
			
 
				 ==== Scripted example:
			
@@ -136,9 +136,10 @@ Response:
 
				     ...
			
 
				     "aggregations": {
			
 
				         "my_unbiased_sample": {
			
 
				-            "doc_count": 1000,
			
 
				+            "doc_count": 6,
			
 
				             "keywords": {
			
 
				-                "doc_count": 1000,
			
 
				+                "doc_count": 6,
			
 
				+                "bg_count": 650,
			
 
				                 "buckets": [
			
 
				                     {
			
 
				                         "key": "logstash",
			
@@ -159,7 +160,6 @@ Response:
 
				 }
			
 
				 --------------------------------------------------
			
 
				 // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
			
 
				-// TESTRESPONSE[s/1000/6/]
			
 
				 // TESTRESPONSE[s/2.213/$body.aggregations.my_unbiased_sample.keywords.buckets.0.score/]
			
 
				 // TESTRESPONSE[s/1.34/$body.aggregations.my_unbiased_sample.keywords.buckets.1.score/]
			
 
				 
			
--- a/docs/reference/aggregations/bucket/sampler-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/sampler-aggregation.asciidoc
@@ -54,9 +54,10 @@ Response:
 
				     ...
			
 
				     "aggregations": {
			
 
				         "sample": {
			
 
				-            "doc_count": 1000,<1>
			
 
				+            "doc_count": 200,<1>
			
 
				             "keywords": {
			
 
				-                "doc_count": 1000,
			
 
				+                "doc_count": 200,
			
 
				+                "bg_count": 650,
			
 
				                 "buckets": [
			
 
				                     {
			
 
				                         "key": "elasticsearch",
			
@@ -77,9 +78,9 @@ Response:
 
				 }
			
 
				 --------------------------------------------------
			
 
				 // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
			
 
				-// TESTRESPONSE[s/1000/200/]
			
 
				 
			
 
				-<1> 1000 documents were sampled in total because we asked for a maximum of 200 from an index with 5 shards. The cost of performing the nested significant_terms aggregation was therefore limited rather than unbounded.
			
 
				+<1> 200 documents were sampled in total. The cost of performing the nested significant_terms aggregation was
			
 
				+therefore limited rather than unbounded.
			
 
				 
			
 
				 
			
 
				 Without the `sampler` aggregation the request query considers the full "long tail" of low-quality matches and therefore identifies
			
@@ -117,13 +118,14 @@ Response:
 
				     ...
			
 
				     "aggregations": {
			
 
				         "low_quality_keywords": {
			
 
				-            "doc_count": 1000,
			
 
				+            "doc_count": 600,
			
 
				+            "bg_count": 650,
			
 
				             "buckets": [
			
 
				                 {
			
 
				                     "key": "angular",
			
 
				                     "doc_count": 200,
			
 
				                     "score": 0.02777,
			
 
				-                   "bg_count": 200
			
 
				+                    "bg_count": 200
			
 
				                 },
			
 
				                 {
			
 
				                     "key": "jquery",
			
@@ -143,7 +145,6 @@ Response:
 
				 }
			
 
				 --------------------------------------------------
			
 
				 // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
			
 
				-// TESTRESPONSE[s/1000/600/]
			
 
				 // TESTRESPONSE[s/0.02777/$body.aggregations.low_quality_keywords.buckets.0.score/]
			
 
				 // TESTRESPONSE[s/0.0069/$body.aggregations.low_quality_keywords.buckets.2.score/]
			
 
				 
			
--- a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc
+++ b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc
@@ -48,6 +48,7 @@ Response:
 
				     "aggregations" : {
			
 
				         "significantCrimeTypes" : {
			
 
				             "doc_count": 47347,
			
 
				+            "bg_count": 5064554,
			
 
				             "buckets" : [
			
 
				                 {
			
 
				                     "key": "Bicycle theft",
			
@@ -111,6 +112,7 @@ Response:
 
				                 "doc_count": 894038,
			
 
				                 "significantCrimeTypes": {
			
 
				                     "doc_count": 894038,
			
 
				+                    "bg_count": 5064554,
			
 
				                     "buckets": [
			
 
				                         {
			
 
				                             "key": "Robbery",
			
@@ -127,6 +129,7 @@ Response:
 
				                 "doc_count": 47347,
			
 
				                 "significantCrimeTypes": {
			
 
				                     "doc_count": 47347,
			
 
				+                    "bg_count": 5064554,
			
 
				                     "buckets": [
			
 
				                         {
			
 
				                             "key": "Bicycle theft",