Browse Source

Fix bug with significant terms background count (#76730)

When building empty responses for shards that don't have the term field in question,
significant terms ignored the background filter.

This commit fixes this bug by respecting the background filter count, even
when building empty results.

closes #76729
Benjamin Trent 4 years ago
parent
commit
55edc62b0d

+ 82 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml

@@ -73,6 +73,88 @@
   - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"}
 
 ---
+"Test background filter count ":
+  - skip:
+      # TODO Fix after backport
+      version: " - 7.99.99"
+      reason: introduced in 7.15.0
+
+  - do:
+      indices.create:
+        index:  goodbad
+        body:
+          settings:
+            number_of_shards: "1"
+          mappings:
+            properties:
+              text:
+                type: text
+                fielddata: true
+              class:
+                type: keyword
+  - do:
+      indices.create:
+        index: goodbad-2
+        body:
+          settings:
+            number_of_shards: "1"
+          mappings:
+            properties:
+              text:
+                type: text
+                fielddata: true
+              class:
+                type: keyword
+
+  - do:
+      index:
+        index: goodbad-2
+        id: 1
+        body: { class: "bad" }
+  - do:
+      index:
+        index: goodbad-2
+        id: 2
+        body: { class: "bad" }
+
+  - do:
+      index:
+        index:  goodbad
+        id:     1
+        body:   { text: "good", class: "good" }
+  - do:
+      index:
+        index:  goodbad
+        id:     2
+        body:   { text: "good", class: "good" }
+  - do:
+      index:
+        index:  goodbad
+        id:     3
+        body:   { text: "bad", class: "bad" }
+  - do:
+      index:
+        index:  goodbad
+        id:     4
+        body:   { text: "bad", class: "bad" }
+
+  - do:
+      indices.refresh:
+        index: [goodbad, goodbad-2]
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: goodbad*
+
+  - match: {hits.total: 6}
+
+  - do:
+      search:
+        index: goodbad*
+        body: {"aggs": {"sig_terms": {"significant_terms": {"field": "text", "background_filter": {"bool": {"filter": [{"term": {"class": "good" }}]}}}}}}
+  - match: { aggregations.sig_terms.bg_count: 2 }
+---
 "IP test":
   - do:
       indices.create:

+ 5 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java

@@ -36,9 +36,12 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator {
                 metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, 0, emptyList(), 0L);
     }
 
-    protected SignificantStringTerms buildEmptySignificantTermsAggregation(long subsetSize, SignificanceHeuristic significanceHeuristic) {
+    protected SignificantStringTerms buildEmptySignificantTermsAggregation(
+        long subsetSize,
+        long supersetSize,
+        SignificanceHeuristic significanceHeuristic
+    ) {
         // We need to account for the significance of a miss in our global stats - provide corpus size as context
-        int supersetSize = searcher().getIndexReader().numDocs();
         return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
                 metadata(), format, subsetSize, supersetSize, significanceHeuristic, emptyList());
     }

+ 2 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

@@ -894,12 +894,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
 
         @Override
         SignificantStringTerms buildEmptyResult() {
-            return buildEmptySignificantTermsAggregation(0, significanceHeuristic);
+            return buildEmptySignificantTermsAggregation(0, supersetSize, significanceHeuristic);
         }
 
         @Override
         SignificantStringTerms buildNoValuesResult(long owningBucketOrdinal) {
-            return buildEmptySignificantTermsAggregation(subsetSizes.get(owningBucketOrdinal), significanceHeuristic);
+            return buildEmptySignificantTermsAggregation(subsetSizes.get(owningBucketOrdinal), supersetSize, significanceHeuristic);
         }
 
         @Override

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java

@@ -568,7 +568,7 @@ public class MapStringTermsAggregator extends AbstractStringTermsAggregator {
 
         @Override
         SignificantStringTerms buildEmptyResult() {
-            return buildEmptySignificantTermsAggregation(0, significanceHeuristic);
+            return buildEmptySignificantTermsAggregation(0, supersetSize, significanceHeuristic);
         }
 
         @Override

+ 0 - 2
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java

@@ -573,8 +573,6 @@ public class NumericTermsAggregator extends TermsAggregator {
 
         @Override
         SignificantLongTerms buildEmptyResult() {
-            // We need to account for the significance of a miss in our global stats - provide corpus size as context
-            int supersetSize = searcher().getIndexReader().numDocs();
             return new SignificantLongTerms(
                 name,
                 bucketCountThresholds.getRequiredSize(),