Bladeren bron

Fix filter by filter execution with doc_count (#68930)

This fixed "filter by filter" execution order so it doesn't ignore
`doc_count`. The "filter by filter" execution is fairly performance
sensitive but when I reran performance numbers everything looked fine.
Nik Everett 4 jaren geleden
bovenliggende
commit
477d287f4a

+ 31 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml

@@ -147,3 +147,34 @@ setup:
   - match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 }
   - match: { aggregations.composite_agg.buckets.2.doc_count: 11 }
 
+---
+"Test filters agg with doc_count":
+  - skip:
+      version: " - 7.99.99"
+      reason: "fixed in 8.0.0 to be backported to 7.11.2"
+  - do:
+      search:
+        body:
+          profile: true
+          size: 0
+          aggs:
+            f:
+              filters:
+                filters:
+                  abc:
+                    match:
+                      str: abc
+                  foo:
+                    match:
+                      str: foo
+                  xyz:
+                    match:
+                      str: xyz
+
+  - match: { hits.total.value: 5 }
+  - length: { aggregations.f.buckets: 3 }
+  - match: { aggregations.f.buckets.abc.doc_count: 11 }
+  - match: { aggregations.f.buckets.foo.doc_count: 8 }
+  - match: { aggregations.f.buckets.xyz.doc_count: 5 }
+  - match: { profile.shards.0.aggregations.0.type: FiltersAggregator.FilterByFilter }
+  - gte: { profile.shards.0.aggregations.0.debug.segments_with_doc_count: 1 }

+ 1 - 0
server/src/internalClusterTest/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java

@@ -633,6 +633,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
             assertThat(delegate.get("delegate"), equalTo("FiltersAggregator.FilterByFilter"));
             Map<?, ?> delegateDebug = (Map<?, ?>) delegate.get("delegate_debug");
             assertThat(delegateDebug, hasEntry("segments_with_deleted_docs", 0));
+            assertThat(delegateDebug, hasEntry("segments_with_doc_count", 0));
             assertThat(delegateDebug, hasEntry("max_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
             assertThat(delegateDebug, hasEntry("estimated_cost", (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 2));
             assertThat((long) delegateDebug.get("estimate_cost_time"), greaterThanOrEqualTo(0L));  // ~1,276,734 nanos is normal

+ 9 - 0
server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java

@@ -43,4 +43,13 @@ public class DocCountProvider {
     public void setLeafReaderContext(LeafReaderContext ctx) throws IOException {
         docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME));
     }
+
+    public boolean alwaysOne() {
+        return docCountPostings == null;
+    }
+
+    @Override
+    public String toString() {
+        return "doc counts are " + (alwaysOne() ? "always one" : "based on " + docCountPostings);
+    }
 }

+ 43 - 4
server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java

@@ -15,11 +15,12 @@ import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
+import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PointRangeQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorable;
 import org.apache.lucene.search.ScoreMode;
-import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.Bits;
 import org.elasticsearch.common.ParseField;
@@ -38,6 +39,7 @@ import org.elasticsearch.search.aggregations.InternalAggregations;
 import org.elasticsearch.search.aggregations.LeafBucketCollector;
 import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
+import org.elasticsearch.search.aggregations.bucket.DocCountProvider;
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 
 import java.io.IOException;
@@ -275,6 +277,11 @@ public abstract class FiltersAggregator extends BucketsAggregator {
          */
         private BulkScorer[][] scorers;
         private int segmentsWithDeletedDocs;
+        /**
+         * Count of segments with documents have consult the {@code doc_count}
+         * field.
+         */
+        private int segmentsWithDocCount;
 
         private FilterByFilter(
             String name,
@@ -354,6 +361,10 @@ public abstract class FiltersAggregator extends BucketsAggregator {
                 weights = buildWeights(topLevelQuery(), filters);
             }
             Bits live = ctx.reader().getLiveDocs();
+            Counter counter = new Counter(docCountProvider);
+            if (false == docCountProvider.alwaysOne()) {
+                segmentsWithDocCount++;
+            }
             for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) {
                 BulkScorer scorer;
                 if (scorers == null) {
@@ -367,9 +378,8 @@ public abstract class FiltersAggregator extends BucketsAggregator {
                     // the filter doesn't match any docs
                     continue;
                 }
-                TotalHitCountCollector collector = new TotalHitCountCollector();
-                scorer.score(collector, live);
-                incrementBucketDocCount(filterOrd, collector.getTotalHits());
+                scorer.score(counter, live);
+                incrementBucketDocCount(filterOrd, counter.readAndReset(ctx));
             }
             // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment.
             throw new CollectionTerminatedException();
@@ -379,6 +389,7 @@ public abstract class FiltersAggregator extends BucketsAggregator {
         public void collectDebugInfo(BiConsumer<String, Object> add) {
             super.collectDebugInfo(add);
             add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs);
+            add.accept("segments_with_doc_count", segmentsWithDocCount);
             if (estimatedCost != -1) {
                 // -1 means we didn't estimate it.
                 add.accept("estimated_cost", estimatedCost);
@@ -386,6 +397,34 @@ public abstract class FiltersAggregator extends BucketsAggregator {
                 add.accept("estimate_cost_time", estimateCostTime);
             }
         }
+
+        /**
+         * Counts collected documents, delegating to {@link DocCountProvider} for
+         * how many documents each search hit is "worth".
+         */
+        private static class Counter implements LeafCollector {
+            private final DocCountProvider docCount;
+            private long count;
+
+            Counter(DocCountProvider docCount) {
+                this.docCount = docCount;
+            }
+
+            public long readAndReset(LeafReaderContext ctx) throws IOException {
+                long result = count;
+                count = 0;
+                docCount.setLeafReaderContext(ctx);
+                return result;
+            }
+
+            @Override
+            public void collect(int doc) throws IOException {
+                count += docCount.getDocCount(doc);
+            }
+
+            @Override
+            public void setScorer(Scorable scorer) throws IOException {}
+        }
     }
 
     /**