Przeglądaj źródła

Introduce a filtered collector manager (#96824)

In order to add support for inter-segment search concurrency, we need to implement collector managers for all of our custom collectors.

This PR introduces a collector manager that is based on FilteredCollector, used when a post_filter is provided as part of a search request.

Note that the collector manager is not yet integrated in the query phase.
Luca Cavanna 2 lat temu
rodzic
commit
d98b9cb051

+ 5 - 0
docs/changelog/96824.yaml

@@ -0,0 +1,5 @@
+pr: 96824
+summary: Introduce a filtered collector manager
+area: Search
+type: enhancement
+issues: []

+ 26 - 0
server/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java

@@ -9,6 +9,7 @@ package org.elasticsearch.common.lucene.search;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.CollectorManager;
 import org.apache.lucene.search.FilterLeafCollector;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.ScoreMode;
@@ -18,6 +19,8 @@ import org.apache.lucene.util.Bits;
 import org.elasticsearch.common.lucene.Lucene;
 
 import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
 
 /**
  * Collector that wraps another collector and collects only documents that match the provided filter.
@@ -60,4 +63,27 @@ public class FilteredCollector implements Collector {
     public ScoreMode scoreMode() {
         return collector.scoreMode();
     }
+
+    /**
+     * Creates a {@link CollectorManager} for {@link FilteredCollector}, which enables inter-segment search concurrency
+     * when a <code>post_filter</code> is provided as part of a search request.
+     */
+    public static <C extends Collector, T> CollectorManager<FilteredCollector, T> createManager(
+        CollectorManager<C, T> collectorManager,
+        Weight filter
+    ) {
+        return new CollectorManager<>() {
+            @Override
+            public FilteredCollector newCollector() throws IOException {
+                return new FilteredCollector(collectorManager.newCollector(), filter);
+            }
+
+            @Override
+            public T reduce(Collection<FilteredCollector> collectors) throws IOException {
+                @SuppressWarnings("unchecked")
+                List<C> innerCollectors = collectors.stream().map(filteredCollector -> (C) filteredCollector.collector).toList();
+                return collectorManager.reduce(innerCollectors);
+            }
+        };
+    }
 }

+ 30 - 4
server/src/test/java/org/elasticsearch/common/lucene/search/FilteredCollectorTests.java

@@ -13,10 +13,12 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.CollectorManager;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.Weight;
@@ -39,7 +41,7 @@ public class FilteredCollectorTests extends ESTestCase {
         super.setUp();
         directory = newDirectory();
         RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig());
-        numDocs = randomIntBetween(10, 100);
+        numDocs = randomIntBetween(900, 1000);
         for (int i = 0; i < numDocs; i++) {
             Document doc = new Document();
             doc.add(new StringField("field1", "value", Field.Store.NO));
@@ -62,19 +64,19 @@ public class FilteredCollectorTests extends ESTestCase {
 
     public void testFiltering() throws IOException {
         {
-            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 100);
+            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 1000);
             searcher.search(new MatchAllDocsQuery(), topScoreDocCollector);
             assertEquals(numDocs, topScoreDocCollector.topDocs().totalHits.value);
         }
         {
-            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 100);
+            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 1000);
             TermQuery termQuery = new TermQuery(new Term("field2", "value"));
             Weight filterWeight = termQuery.createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
             searcher.search(new MatchAllDocsQuery(), new FilteredCollector(topScoreDocCollector, filterWeight));
             assertEquals(1, topScoreDocCollector.topDocs().totalHits.value);
         }
         {
-            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 100);
+            TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(1, 1000);
             TermQuery termQuery = new TermQuery(new Term("field1", "value"));
             Weight filterWeight = termQuery.createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
             searcher.search(new MatchAllDocsQuery(), new FilteredCollector(topScoreDocCollector, filterWeight));
@@ -96,4 +98,28 @@ public class FilteredCollectorTests extends ESTestCase {
             assertEquals(1, totalHitCountCollector.getTotalHits());
         }
     }
+
+    public void testManager() throws IOException {
+        {
+            CollectorManager<TopScoreDocCollector, TopDocs> topDocsManager = TopScoreDocCollector.createSharedManager(1, null, 1000);
+            TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topDocsManager);
+            assertEquals(numDocs, topDocs.totalHits.value);
+        }
+        {
+            CollectorManager<TopScoreDocCollector, TopDocs> topDocsManager = TopScoreDocCollector.createSharedManager(1, null, 1000);
+            TermQuery termQuery = new TermQuery(new Term("field2", "value"));
+            Weight filterWeight = termQuery.createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
+            CollectorManager<FilteredCollector, TopDocs> filteredManager = FilteredCollector.createManager(topDocsManager, filterWeight);
+            TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), filteredManager);
+            assertEquals(1, topDocs.totalHits.value);
+        }
+        {
+            CollectorManager<TopScoreDocCollector, TopDocs> topDocsManager = TopScoreDocCollector.createSharedManager(1, null, 1000);
+            TermQuery termQuery = new TermQuery(new Term("field1", "value"));
+            Weight filterWeight = termQuery.createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
+            CollectorManager<FilteredCollector, TopDocs> filteredManager = FilteredCollector.createManager(topDocsManager, filterWeight);
+            TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), filteredManager);
+            assertEquals(numDocs, topDocs.totalHits.value);
+        }
+    }
 }