소스 검색

Propagate alias filters to significance aggs filters (#88221)

Propagate alias filters to significance aggs filters

If we have an alias filter, use it as part of the background filter on a
signficant terms agg. Previously, alias filters did not apply to background
filters so this will change bg_count results for some significant terms aggs
using background filter.

Closes #81585
tmgordeeva 3 년 전
부모
커밋
ab2602ecb0

+ 6 - 0
docs/changelog/88221.yaml

@@ -0,0 +1,6 @@
+pr: 88221
+summary: Propagate alias filters to significance aggs filters
+area: Aggregations
+type: bug
+issues:
+ - 81585

+ 49 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yml

@@ -233,3 +233,52 @@
                 significant_terms:
                   field: foo
                   jlp: {}
+
+---
+"Test alias background filter":
+  - skip:
+      version: " - 8.3.99"
+      reason: fixed in 8.4
+
+  - do:
+      indices.create:
+        index:  test_index
+        body:
+          mappings:
+            properties:
+              field1:
+                type: keyword
+              field2:
+                type: keyword
+
+  - do:
+      indices.put_alias:
+        index: test_index
+        name: test_alias
+        body: {"filter": {"bool": {"filter": [{"term": {"field2": "foo"}}]}}}
+  - do:
+      index:
+        index: test_index
+        id: "1"
+        body: { "field1" : "1", "field2": "foo" }
+
+  - do:
+      index:
+        index: test_index
+        id: "2"
+        body: { "field1": "2", "field2": "bar" }
+
+  - do:
+      index:
+        index: test_index
+        id: "3"
+        body: { "field1": "3", "field2": "foo" }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        index: test_alias
+        body: {"aggs": {"sig_terms": {"significant_terms": {"field": "field1"}}}}
+  - match: { aggregations.sig_terms.bg_count: 2 }

+ 15 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java

@@ -15,6 +15,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
@@ -74,9 +75,22 @@ class SignificanceLookup {
         // If there is no provided background filter, but we are within a sampling context, our background docs need to take the sampling
         // context into account.
         // If there is a filter, that filter needs to take the sampling into account (if we are within a sampling context)
-        this.backgroundFilter = backgroundFilter == null
+        Query backgroundQuery = backgroundFilter == null
             ? samplingContext.buildSamplingQueryIfNecessary(context).orElse(null)
             : samplingContext.buildQueryWithSampler(backgroundFilter, context);
+        // Refilter to account for alias filters, if there are any.
+        if (backgroundQuery == null) {
+            Query matchAllDocsQuery = new MatchAllDocsQuery();
+            Query contextFiltered = context.filterQuery(matchAllDocsQuery);
+            if (contextFiltered != matchAllDocsQuery) {
+                this.backgroundFilter = contextFiltered;
+            } else {
+                this.backgroundFilter = null;
+            }
+        } else {
+            Query contextFiltered = context.filterQuery(backgroundQuery);
+            this.backgroundFilter = contextFiltered;
+        }
         /*
          * We need to use a superset size that includes deleted docs or we
          * could end up blowing up with bad statistics that cause us to blow

+ 21 - 14
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java

@@ -101,7 +101,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
 
     private IncludeExclude includeExclude = null;
     private String executionHint = null;
-    private QueryBuilder filterBuilder = null;
+    private QueryBuilder backgroundFilter = null;
     private TermsAggregator.BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS);
     private SignificanceHeuristic significanceHeuristic = DEFAULT_SIGNIFICANCE_HEURISTIC;
 
@@ -116,7 +116,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
         super(in);
         bucketCountThresholds = new BucketCountThresholds(in);
         executionHint = in.readOptionalString();
-        filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class);
+        backgroundFilter = in.readOptionalNamedWriteable(QueryBuilder.class);
         includeExclude = in.readOptionalWriteable(IncludeExclude::new);
         significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
     }
@@ -129,7 +129,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
         super(clone, factoriesBuilder, metadata);
         this.bucketCountThresholds = new BucketCountThresholds(clone.bucketCountThresholds);
         this.executionHint = clone.executionHint;
-        this.filterBuilder = clone.filterBuilder;
+        this.backgroundFilter = clone.backgroundFilter;
         this.includeExclude = clone.includeExclude;
         this.significanceHeuristic = clone.significanceHeuristic;
     }
@@ -151,9 +151,9 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
 
     @Override
     protected AggregationBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
-        if (filterBuilder != null) {
-            QueryBuilder rewrittenFilter = filterBuilder.rewrite(queryRewriteContext);
-            if (rewrittenFilter != filterBuilder) {
+        if (backgroundFilter != null) {
+            QueryBuilder rewrittenFilter = backgroundFilter.rewrite(queryRewriteContext);
+            if (rewrittenFilter != backgroundFilter) {
                 SignificantTermsAggregationBuilder rewritten = shallowCopy(factoriesBuilder, metadata);
                 rewritten.backgroundFilter(rewrittenFilter);
                 return rewritten;
@@ -166,7 +166,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
     protected void innerWriteTo(StreamOutput out) throws IOException {
         bucketCountThresholds.writeTo(out);
         out.writeOptionalString(executionHint);
-        out.writeOptionalNamedWriteable(filterBuilder);
+        out.writeOptionalNamedWriteable(backgroundFilter);
         out.writeOptionalWriteable(includeExclude);
         out.writeNamedWriteable(significanceHeuristic);
     }
@@ -265,12 +265,12 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
         if (backgroundFilter == null) {
             throw new IllegalArgumentException("[backgroundFilter] must not be null: [" + name + "]");
         }
-        this.filterBuilder = backgroundFilter;
+        this.backgroundFilter = backgroundFilter;
         return this;
     }
 
     public QueryBuilder backgroundFilter() {
-        return filterBuilder;
+        return backgroundFilter;
     }
 
     /**
@@ -320,7 +320,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
             config,
             includeExclude,
             executionHint,
-            filterBuilder,
+            backgroundFilter,
             bucketCountThresholds,
             executionHeuristic,
             context,
@@ -337,8 +337,8 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
         if (executionHint != null) {
             builder.field(TermsAggregationBuilder.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint);
         }
-        if (filterBuilder != null) {
-            builder.field(BACKGROUND_FILTER.getPreferredName(), filterBuilder);
+        if (backgroundFilter != null) {
+            builder.field(BACKGROUND_FILTER.getPreferredName(), backgroundFilter);
         }
         if (includeExclude != null) {
             includeExclude.toXContent(builder, params);
@@ -349,7 +349,14 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
 
     @Override
     public int hashCode() {
-        return Objects.hash(super.hashCode(), bucketCountThresholds, executionHint, filterBuilder, includeExclude, significanceHeuristic);
+        return Objects.hash(
+            super.hashCode(),
+            bucketCountThresholds,
+            executionHint,
+            backgroundFilter,
+            includeExclude,
+            significanceHeuristic
+        );
     }
 
     @Override
@@ -360,7 +367,7 @@ public class SignificantTermsAggregationBuilder extends ValuesSourceAggregationB
         SignificantTermsAggregationBuilder other = (SignificantTermsAggregationBuilder) obj;
         return Objects.equals(bucketCountThresholds, other.bucketCountThresholds)
             && Objects.equals(executionHint, other.executionHint)
-            && Objects.equals(filterBuilder, other.filterBuilder)
+            && Objects.equals(backgroundFilter, other.backgroundFilter)
             && Objects.equals(includeExclude, other.includeExclude)
             && Objects.equals(significanceHeuristic, other.significanceHeuristic);
     }

+ 43 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java

@@ -408,6 +408,49 @@ public class SignificantTermsAggregatorTests extends AggregatorTestCase {
         }
     }
 
+    public void testFieldBackground() throws IOException {
+        TextFieldType textFieldType = new TextFieldType("text");
+        textFieldType.setFielddata(true);
+
+        IndexWriterConfig indexWriterConfig = newIndexWriterConfig(new StandardAnalyzer());
+        indexWriterConfig.setMaxBufferedDocs(100);
+        indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
+
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            addMixedTextDocs(w);
+
+            SignificantTermsAggregationBuilder agg = significantTerms("sig_text").field("text");
+            SignificantTermsAggregationBuilder backgroundAgg = significantTerms("sig_text").field("text");
+
+            String executionHint = randomExecutionHint();
+            agg.executionHint(executionHint);
+            backgroundAgg.executionHint(executionHint);
+
+            QueryBuilder backgroundFilter = QueryBuilders.termsQuery("text", "odd");
+            backgroundAgg.backgroundFilter(backgroundFilter);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                assertEquals("test expects a single segment", 1, reader.leaves().size());
+                IndexSearcher searcher = new IndexSearcher(reader);
+
+                SignificantTerms evenTerms = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), agg, textFieldType);
+                SignificantTerms backgroundEvenTerms = searchAndReduce(
+                    searcher,
+                    new TermQuery(new Term("text", "even")),
+                    backgroundAgg,
+                    textFieldType
+                );
+
+                assertFalse(evenTerms.getBuckets().isEmpty());
+                assertFalse(backgroundEvenTerms.getBuckets().isEmpty());
+                assertEquals(((InternalMappedSignificantTerms) evenTerms).getSubsetSize(), 5);
+                assertEquals(((InternalMappedSignificantTerms) evenTerms).getSupersetSize(), 10);
+                assertEquals(((InternalMappedSignificantTerms) backgroundEvenTerms).getSubsetSize(), 5);
+                assertEquals(((InternalMappedSignificantTerms) backgroundEvenTerms).getSupersetSize(), 5);
+            }
+        }
+    }
+
     public void testAllDocsWithoutStringFieldviaGlobalOrds() throws IOException {
         testAllDocsWithoutStringField("global_ordinals");
     }