Bladeren bron

Aggregations enhancement - remove pointless term frequency lookups.
If the user has set a shard_min_doc_count setting then avoid looking up background frequencies if the term fails to meet the foreground threshold on a shard.

Closes #11093

markharwood 10 jaren geleden
bovenliggende
commit
89b95dccc8

+ 5 - 3
src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java

@@ -99,6 +99,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
             if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
                 continue;
             }
+            if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
+                continue;
+            }
+
             if (spare == null) {
                 spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
             }
@@ -113,9 +117,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
             // Back at the central reducer these properties will be updated with
             // global stats
             spare.updateScore(termsAggFactory.getSignificanceHeuristic());
-            if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
-                spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
-            }
+            spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
         }
 
         final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];

+ 7 - 5
src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java

@@ -24,8 +24,8 @@ import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
-import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator;
 import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
 import org.elasticsearch.search.aggregations.reducers.Reducer;
@@ -82,11 +82,15 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
         BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
         SignificantLongTerms.Bucket spare = null;
         for (long i = 0; i < bucketOrds.size(); i++) {
+            final int docCount = bucketDocCount(i);
+            if (docCount < bucketCountThresholds.getShardMinDocCount()) {
+                continue;
+            }
             if (spare == null) {
                 spare = new SignificantLongTerms.Bucket(0, 0, 0, 0, 0, null, formatter);
             }
             spare.term = bucketOrds.get(i);
-            spare.subsetDf = bucketDocCount(i);
+            spare.subsetDf = docCount;
             spare.subsetSize = subsetSize;
             spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.term);
             spare.supersetSize = supersetSize;
@@ -95,9 +99,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
             spare.updateScore(termsAggFactory.getSignificanceHeuristic());
 
             spare.bucketOrd = i;
-            if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
-                spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
-            }
+            spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
         }
 
         final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];

+ 8 - 5
src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java

@@ -24,8 +24,8 @@ import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
-import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 import org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator;
 import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
 import org.elasticsearch.search.aggregations.reducers.Reducer;
@@ -81,12 +81,17 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
         BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
         SignificantStringTerms.Bucket spare = null;
         for (int i = 0; i < bucketOrds.size(); i++) {
+            final int docCount = bucketDocCount(i);
+            if (docCount < bucketCountThresholds.getShardMinDocCount()) {
+                continue;
+            }
+
             if (spare == null) {
                 spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
             }
 
             bucketOrds.get(i, spare.termBytes);
-            spare.subsetDf = bucketDocCount(i);
+            spare.subsetDf = docCount;
             spare.subsetSize = subsetSize;
             spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
             spare.supersetSize = supersetSize;
@@ -97,9 +102,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
             spare.updateScore(termsAggFactory.getSignificanceHeuristic());
 
             spare.bucketOrd = i;
-            if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
-                spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
-            }
+            spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
         }
 
         final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];