Browse Source

Support field collapsing with search_after (#73023)

This change adds support for using `search_after` with field collapsing. When
using these in conjunction, the same field must be used for both sorting and
field collapsing. This helps keep the behavior simple and predictable.
Otherwise it would be possible for a group to appear on multiple pages of
results.

Currently search after is handled directly in `CollapsingTopDocsCollector`. As
a follow-up, we could generalize the logic and move support to the Lucene
grouping framework.

Closes #53115.
Julie Tibshirani 4 years ago
parent
commit
f85a9dddb9

+ 30 - 3
docs/reference/search/search-your-data/collapse-search-results.asciidoc

@@ -118,8 +118,35 @@ The `max_concurrent_group_searches` request parameter can be used to control
 the maximum number of concurrent searches allowed in this phase.
 the maximum number of concurrent searches allowed in this phase.
 The default is based on the number of data nodes and the default search thread pool size.
 The default is based on the number of data nodes and the default search thread pool size.
 
 
-WARNING: `collapse` cannot be used in conjunction with <<scroll-search-results, scroll>>,
-<<rescore, rescore>> or <<search-after, search after>>.
+WARNING: `collapse` cannot be used in conjunction with <<scroll-search-results, scroll>> or
+<<rescore, rescore>>.
+
+[discrete]
+[[collapsing-with-search-after]]
+=== Collapsing with `search_after`
+Field collapsing can be used with the <<search-after, `search_after`>>
+parameter. Using `search_after` is only supported when sorting and collapsing
+on the same field. Secondary sorts are also not allowed. For example, we can
+collapse and sort on `user.id`, while paging through the results using
+`search_after`:
+
+[source,console]
+--------------------------------------------------
+GET /my-index-000001/_search
+{
+  "query": {
+    "match": {
+      "message": "GET /search"
+    }
+  },
+  "collapse": {
+    "field": "user.id"
+  },
+  "sort": [ "user.id" ],
+  "search_after": ["dd5ce1ad"]
+}
+--------------------------------------------------
+// TEST[setup:my_index]
 
 
 [discrete]
 [discrete]
 [[second-level-of-collapsing]]
 [[second-level-of-collapsing]]
@@ -223,4 +250,4 @@ Response:
 --------------------------------------------------
 --------------------------------------------------
 // NOTCONSOLE
 // NOTCONSOLE
 
 
-NOTE: Second level of collapsing doesn't allow `inner_hits`.
+NOTE: Second level of collapsing doesn't allow `inner_hits`.

+ 1 - 0
rest-api-spec/build.gradle

@@ -207,6 +207,7 @@ tasks.named("yamlRestCompatTest").configure {
     'mtermvectors/30_mix_typeless_typeful/mtermvectors without types on an index that has types',
     'mtermvectors/30_mix_typeless_typeful/mtermvectors without types on an index that has types',
     'search/10_source_filtering/docvalue_fields with default format', //use_field_mapping change
     'search/10_source_filtering/docvalue_fields with default format', //use_field_mapping change
     'search/40_indices_boost/Indices boost using object', //indices_boost
     'search/40_indices_boost/Indices boost using object', //indices_boost
+    'search/110_field_collapsing/field collapsing and search_after', // temporarily disabled while we backport the change
     'search/150_rewrite_on_coordinator/Ensure that we fetch the document only once', //terms_lookup
     'search/150_rewrite_on_coordinator/Ensure that we fetch the document only once', //terms_lookup
     'search/171_terms_query_with_types/Terms Query with No.of terms exceeding index.max_terms_count should FAIL', //bulk
     'search/171_terms_query_with_types/Terms Query with No.of terms exceeding index.max_terms_count should FAIL', //bulk
     'search/260_parameter_validation/test size=-1 is deprecated', //size=-1 change
     'search/260_parameter_validation/test size=-1 is deprecated', //size=-1 change

+ 54 - 4
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/110_field_collapsing.yml

@@ -233,18 +233,68 @@ setup:
           collapse: { field: numeric_group }
           collapse: { field: numeric_group }
 
 
 ---
 ---
-"field collapsing and search_after":
-
+"field collapsing and search_after with invalid sort":
+  - skip:
+      version: " - 7.99.99"
+      reason: "support for collapsing with search_after not yet backported"
   - do:
   - do:
-      catch:      /cannot use \`collapse\` in conjunction with \`search_after\`/
+      catch: /Cannot use \[collapse\] in conjunction with \[search_after\] unless the search is sorted on the same field. Multiple sort fields are not allowed./
       search:
       search:
-        rest_total_hits_as_int: true
         index: test
         index: test
         body:
         body:
           collapse: { field: numeric_group }
           collapse: { field: numeric_group }
           search_after: [6]
           search_after: [6]
           sort: [{ sort: desc }]
           sort: [{ sort: desc }]
 
 
+  - do:
+      catch: /Cannot use \[collapse\] in conjunction with \[search_after\] unless the search is sorted on the same field. Multiple sort fields are not allowed./
+      search:
+        index: test
+        body:
+          collapse: { field: numeric_group }
+          search_after: [6, 42]
+          sort: [{ numeric_group: asc, sort: desc }]
+
+---
+"field collapsing and search_after":
+  - skip:
+      version: " - 7.99.99"
+      reason: "support for collapsing with search_after not yet backported"
+  - do:
+      search:
+        index: test
+        body:
+          collapse: { field: numeric_group }
+          sort: [{ numeric_group: desc}]
+          size: 1
+
+  - match: { hits.total.value: 6 }
+  - length: { hits.hits: 1 }
+
+  - match: { hits.hits.0._index: test }
+  - match: { hits.hits.0.fields.numeric_group: [25] }
+  - match: { hits.hits.0.sort: [25] }
+
+  - do:
+      search:
+        index: test
+        body:
+          collapse: { field: numeric_group }
+          sort: [{ numeric_group: desc}]
+          search_after: [25]
+          size: 2
+
+  - match: { hits.total.value: 6 }
+  - length: { hits.hits: 2 }
+
+  - match: { hits.hits.0._index: test }
+  - match: { hits.hits.0.fields.numeric_group: [3] }
+  - match: { hits.hits.0.sort: [3] }
+
+  - match: { hits.hits.1._index: test }
+  - match: { hits.hits.1.fields.numeric_group: [1] }
+  - match: { hits.hits.1.sort: [1] }
+
 ---
 ---
 "field collapsing and rescore":
 "field collapsing and rescore":
 
 

+ 40 - 13
server/src/main/java/org/apache/lucene/search/grouping/CollapsingTopDocsCollector.java

@@ -7,13 +7,16 @@
  */
  */
 package org.apache.lucene.search.grouping;
 package org.apache.lucene.search.grouping;
 
 
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.FieldDoc;
-import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.LeafFieldComparator;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.search.TotalHits;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MappedFieldType;
 
 
 import java.io.IOException;
 import java.io.IOException;
@@ -27,21 +30,35 @@ import static org.apache.lucene.search.SortField.Type.SCORE;
  * output. The collapsing is done in a single pass by selecting only the top sorted document per collapse key.
  * output. The collapsing is done in a single pass by selecting only the top sorted document per collapse key.
  * The value used for the collapse key of each group can be found in {@link CollapseTopFieldDocs#collapseValues}.
  * The value used for the collapse key of each group can be found in {@link CollapseTopFieldDocs#collapseValues}.
  *
  *
+ * This collector optionally supports searching after a previous result through the 'after' parameter.
+ *
  * TODO: If the sort is based on score we should propagate the mininum competitive score when <code>orderedGroups</code> is full.
  * TODO: If the sort is based on score we should propagate the mininum competitive score when <code>orderedGroups</code> is full.
  * This is safe for collapsing since the group <code>sort</code> is the same as the query sort.
  * This is safe for collapsing since the group <code>sort</code> is the same as the query sort.
  */
  */
 public final class CollapsingTopDocsCollector<T> extends FirstPassGroupingCollector<T> {
 public final class CollapsingTopDocsCollector<T> extends FirstPassGroupingCollector<T> {
     protected final String collapseField;
     protected final String collapseField;
-
     protected final Sort sort;
     protected final Sort sort;
-    protected Scorable scorer;
-
     private int totalHitCount;
     private int totalHitCount;
 
 
-    CollapsingTopDocsCollector(GroupSelector<T> groupSelector, String collapseField, Sort sort, int topN) {
+    private final FieldDoc after;
+    private final FieldComparator<?> comparator;
+    private final int reversed;
+    private LeafFieldComparator leafComparator;
+
+    @SuppressWarnings("unchecked")
+    CollapsingTopDocsCollector(GroupSelector<T> groupSelector, String collapseField, Sort sort, int topN, FieldDoc after) {
         super(groupSelector, sort, topN);
         super(groupSelector, sort, topN);
         this.collapseField = collapseField;
         this.collapseField = collapseField;
         this.sort = sort;
         this.sort = sort;
+        this.after = after;
+        assert after == null || (sort.getSort().length == 1 && after.doc == Integer.MAX_VALUE);
+
+        SortField sortField = sort.getSort()[0];
+        this.comparator = sortField.getComparator(0, 0);
+        if (after != null) {
+            ((FieldComparator<Object>) comparator).setTopValue(after.fields[0]);
+        }
+        this.reversed = sortField.getReverse() ? -1 : 1;
     }
     }
 
 
     /**
     /**
@@ -92,15 +109,21 @@ public final class CollapsingTopDocsCollector<T> extends FirstPassGroupingCollec
     }
     }
 
 
     @Override
     @Override
-    public void setScorer(Scorable scorer) throws IOException {
-        super.setScorer(scorer);
-        this.scorer = scorer;
+    protected void doSetNextReader(LeafReaderContext readerContext) throws IOException {
+        leafComparator = comparator.getLeafComparator(readerContext);
+        super.doSetNextReader(readerContext);
     }
     }
 
 
     @Override
     @Override
     public void collect(int doc) throws IOException {
     public void collect(int doc) throws IOException {
-        super.collect(doc);
         totalHitCount++;
         totalHitCount++;
+        if (after != null) {
+            int cmp = reversed * leafComparator.compareTop(doc);
+            if (cmp >= 0) {
+                return;
+            }
+        }
+        super.collect(doc);
     }
     }
 
 
     /**
     /**
@@ -116,13 +139,15 @@ public final class CollapsingTopDocsCollector<T> extends FirstPassGroupingCollec
      *                          This must be non-null, ie, if you want to groupSort by relevance
      *                          This must be non-null, ie, if you want to groupSort by relevance
      *                          use Sort.RELEVANCE.
      *                          use Sort.RELEVANCE.
      * @param topN              How many top groups to keep.
      * @param topN              How many top groups to keep.
+     * @param after             The field values to search after. Can be null.
      */
      */
     public static CollapsingTopDocsCollector<?> createNumeric(String collapseField,
     public static CollapsingTopDocsCollector<?> createNumeric(String collapseField,
                                                               MappedFieldType collapseFieldType,
                                                               MappedFieldType collapseFieldType,
                                                               Sort sort,
                                                               Sort sort,
-                                                              int topN)  {
+                                                              int topN,
+                                                              @Nullable FieldDoc after)  {
         return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Numeric(collapseFieldType),
         return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Numeric(collapseFieldType),
-                collapseField, sort, topN);
+                collapseField, sort, topN, after);
     }
     }
 
 
     /**
     /**
@@ -137,12 +162,14 @@ public final class CollapsingTopDocsCollector<T> extends FirstPassGroupingCollec
      *                          document per collapsed key.
      *                          document per collapsed key.
      *                          This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
      *                          This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
      * @param topN              How many top groups to keep.
      * @param topN              How many top groups to keep.
+     * @param after             The field values to search after. Can be null.
      */
      */
     public static CollapsingTopDocsCollector<?> createKeyword(String collapseField,
     public static CollapsingTopDocsCollector<?> createKeyword(String collapseField,
                                                               MappedFieldType collapseFieldType,
                                                               MappedFieldType collapseFieldType,
                                                               Sort sort,
                                                               Sort sort,
-                                                              int topN)  {
+                                                              int topN,
+                                                              @Nullable FieldDoc after)  {
         return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Keyword(collapseFieldType),
         return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Keyword(collapseFieldType),
-                collapseField, sort, topN);
+                collapseField, sort, topN, after);
     }
     }
 }
 }

+ 4 - 5
server/src/main/java/org/elasticsearch/search/SearchService.java

@@ -55,8 +55,8 @@ import org.elasticsearch.index.query.MatchAllQueryBuilder;
 import org.elasticsearch.index.query.MatchNoneQueryBuilder;
 import org.elasticsearch.index.query.MatchNoneQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryRewriteContext;
 import org.elasticsearch.index.query.QueryRewriteContext;
-import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.query.Rewriteable;
 import org.elasticsearch.index.query.Rewriteable;
+import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.shard.IndexEventListener;
 import org.elasticsearch.index.shard.IndexEventListener;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.index.shard.SearchOperationListener;
 import org.elasticsearch.index.shard.SearchOperationListener;
@@ -1055,7 +1055,9 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
             if (context.from() > 0) {
             if (context.from() > 0) {
                 throw new SearchException(shardTarget, "`from` parameter must be set to 0 when `search_after` is used.");
                 throw new SearchException(shardTarget, "`from` parameter must be set to 0 when `search_after` is used.");
             }
             }
-            FieldDoc fieldDoc = SearchAfterBuilder.buildFieldDoc(context.sort(), source.searchAfter());
+
+            String collapseField = source.collapse() != null ? source.collapse().getField() : null;
+            FieldDoc fieldDoc = SearchAfterBuilder.buildFieldDoc(context.sort(), source.searchAfter(), collapseField);
             context.searchAfter(fieldDoc);
             context.searchAfter(fieldDoc);
         }
         }
 
 
@@ -1082,9 +1084,6 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
             if (context.scrollContext() != null) {
             if (context.scrollContext() != null) {
                 throw new SearchException(shardTarget, "cannot use `collapse` in a scroll context");
                 throw new SearchException(shardTarget, "cannot use `collapse` in a scroll context");
             }
             }
-            if (context.searchAfter() != null) {
-                throw new SearchException(shardTarget, "cannot use `collapse` in conjunction with `search_after`");
-            }
             if (context.rescore() != null && context.rescore().isEmpty() == false) {
             if (context.rescore() != null && context.rescore().isEmpty() == false) {
                 throw new SearchException(shardTarget, "cannot use `collapse` in conjunction with `rescore`");
                 throw new SearchException(shardTarget, "cannot use `collapse` in conjunction with `rescore`");
             }
             }

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java

@@ -348,7 +348,7 @@ final class CompositeAggregator extends BucketsAggregator {
             formats[i] = sources[i].format;
             formats[i] = sources[i].format;
         }
         }
         FieldDoc fieldDoc = SearchAfterBuilder.buildFieldDoc(new SortAndFormats(indexSortPrefix, formats),
         FieldDoc fieldDoc = SearchAfterBuilder.buildFieldDoc(new SortAndFormats(indexSortPrefix, formats),
-            Arrays.copyOfRange(rawAfterKey.values(), 0, formats.length));
+            Arrays.copyOfRange(rawAfterKey.values(), 0, formats.length), null);
         if (indexSortPrefix.getSort().length < sources.length) {
         if (indexSortPrefix.getSort().length < sources.length) {
             // include all docs that belong to the partial bucket
             // include all docs that belong to the partial bucket
             fieldDoc.doc = -1;
             fieldDoc.doc = -1;

+ 4 - 3
server/src/main/java/org/elasticsearch/search/collapse/CollapseContext.java

@@ -7,6 +7,7 @@
  */
  */
 package org.elasticsearch.search.collapse;
 package org.elasticsearch.search.collapse;
 
 
+import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.grouping.CollapsingTopDocsCollector;
 import org.apache.lucene.search.grouping.CollapsingTopDocsCollector;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MappedFieldType;
@@ -48,11 +49,11 @@ public class CollapseContext {
         return innerHits;
         return innerHits;
     }
     }
 
 
-    public CollapsingTopDocsCollector<?> createTopDocs(Sort sort, int topN) {
+    public CollapsingTopDocsCollector<?> createTopDocs(Sort sort, int topN, FieldDoc after) {
         if (fieldType.collapseType() == CollapseType.KEYWORD) {
         if (fieldType.collapseType() == CollapseType.KEYWORD) {
-            return CollapsingTopDocsCollector.createKeyword(fieldName, fieldType, sort, topN);
+            return CollapsingTopDocsCollector.createKeyword(fieldName, fieldType, sort, topN, after);
         } else if (fieldType.collapseType() == CollapseType.NUMERIC) {
         } else if (fieldType.collapseType() == CollapseType.NUMERIC) {
-            return CollapsingTopDocsCollector.createNumeric(fieldName, fieldType, sort, topN);
+            return CollapsingTopDocsCollector.createNumeric(fieldName, fieldType, sort, topN, after);
         } else {
         } else {
             throw new IllegalStateException("collapse is not supported on this field type");
             throw new IllegalStateException("collapse is not supported on this field type");
         }
         }

+ 5 - 3
server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java

@@ -166,13 +166,14 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
         private CollapsingTopDocsCollectorContext(CollapseContext collapseContext,
         private CollapsingTopDocsCollectorContext(CollapseContext collapseContext,
                                                   @Nullable SortAndFormats sortAndFormats,
                                                   @Nullable SortAndFormats sortAndFormats,
                                                   int numHits,
                                                   int numHits,
-                                                  boolean trackMaxScore) {
+                                                  boolean trackMaxScore,
+                                                  @Nullable FieldDoc after) {
             super(REASON_SEARCH_TOP_HITS, numHits);
             super(REASON_SEARCH_TOP_HITS, numHits);
             assert numHits > 0;
             assert numHits > 0;
             assert collapseContext != null;
             assert collapseContext != null;
             Sort sort = sortAndFormats == null ? Sort.RELEVANCE : sortAndFormats.sort;
             Sort sort = sortAndFormats == null ? Sort.RELEVANCE : sortAndFormats.sort;
             this.sortFmt = sortAndFormats == null ? new DocValueFormat[] { DocValueFormat.RAW } : sortAndFormats.formats;
             this.sortFmt = sortAndFormats == null ? new DocValueFormat[] { DocValueFormat.RAW } : sortAndFormats.formats;
-            this.topDocsCollector = collapseContext.createTopDocs(sort, numHits);
+            this.topDocsCollector = collapseContext.createTopDocs(sort, numHits, after);
 
 
             MaxScoreCollector maxScoreCollector;
             MaxScoreCollector maxScoreCollector;
             if (trackMaxScore) {
             if (trackMaxScore) {
@@ -436,7 +437,8 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
         } else if (searchContext.collapse() != null) {
         } else if (searchContext.collapse() != null) {
             boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores();
             boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores();
             int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
             int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
-            return new CollapsingTopDocsCollectorContext(searchContext.collapse(), searchContext.sort(), numDocs, trackScores);
+            return new CollapsingTopDocsCollectorContext(searchContext.collapse(), searchContext.sort(),
+                numDocs, trackScores, searchContext.searchAfter());
         } else {
         } else {
             int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
             int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
             final boolean rescore = searchContext.rescore().isEmpty() == false;
             final boolean rescore = searchContext.rescore().isEmpty() == false;

+ 8 - 1
server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java

@@ -13,6 +13,7 @@ import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.SortedSetSortField;
 import org.apache.lucene.search.SortedSetSortField;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.Strings;
@@ -93,7 +94,7 @@ public class SearchAfterBuilder implements ToXContentObject, Writeable {
         return Arrays.copyOf(sortValues, sortValues.length);
         return Arrays.copyOf(sortValues, sortValues.length);
     }
     }
 
 
-    public static FieldDoc buildFieldDoc(SortAndFormats sort, Object[] values) {
+    public static FieldDoc buildFieldDoc(SortAndFormats sort, Object[] values, @Nullable String collapseField) {
         if (sort == null || sort.sort.getSort() == null || sort.sort.getSort().length == 0) {
         if (sort == null || sort.sort.getSort() == null || sort.sort.getSort().length == 0) {
             throw new IllegalArgumentException("Sort must contain at least one field.");
             throw new IllegalArgumentException("Sort must contain at least one field.");
         }
         }
@@ -104,6 +105,12 @@ public class SearchAfterBuilder implements ToXContentObject, Writeable {
                     SEARCH_AFTER.getPreferredName() + " has " + values.length + " value(s) but sort has "
                     SEARCH_AFTER.getPreferredName() + " has " + values.length + " value(s) but sort has "
                             + sort.sort.getSort().length + ".");
                             + sort.sort.getSort().length + ".");
         }
         }
+
+        if (collapseField != null && (sortFields.length > 1 || sortFields[0].getField().equals(collapseField) == false)) {
+            throw new IllegalArgumentException("Cannot use [collapse] in conjunction with [" + SEARCH_AFTER.getPreferredName()
+                + "] unless the search is sorted on the same field. Multiple sort fields are not allowed.");
+        }
+
         Object[] fieldValues = new Object[sortFields.length];
         Object[] fieldValues = new Object[sortFields.length];
         for (int i = 0; i < sortFields.length; i++) {
         for (int i = 0; i < sortFields.length; i++) {
             SortField sortField = sortFields[i];
             SortField sortField = sortFields[i];

+ 254 - 0
server/src/test/java/org/apache/lucene/grouping/CollapsingTopDocsCollectorSearchAfterTests.java

@@ -0,0 +1,254 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+package org.apache.lucene.grouping;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.search.grouping.CollapseTopFieldDocs;
+import org.apache.lucene.search.grouping.CollapsingTopDocsCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.MockFieldMapper;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * This test is adapted from {@link CollapsingTopDocsCollectorTests} with
+ * modifications to test {@link FieldDoc after} parameter.
+ */
+public class CollapsingTopDocsCollectorSearchAfterTests extends ESTestCase {
+    interface CollapsingDocValuesProducer<T extends Comparable<?>> {
+        T randomGroup(int maxGroup);
+        void add(Document doc, T value);
+        SortField sortField(boolean reversed);
+    }
+
+    private <T extends Comparable<T>> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric)
+        throws IOException {
+        assertSearchCollapse(dvProducers, numeric, false);
+        assertSearchCollapse(dvProducers, numeric, true);
+    }
+
+    private <T extends Comparable<T>> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers,
+                                                                boolean numeric, boolean reverseSort) throws IOException {
+        Directory dir = newDirectory();
+        RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+        Set<T> values = new HashSet<>();
+        int totalHits = 0;
+        boolean docsWithMissingField = false;
+
+        int numDocs = randomIntBetween(1000, 2000);
+        int maxGroup = randomIntBetween(2, 500);
+        for (int i = 0; i < numDocs; i++) {
+            Document doc = new Document();
+            if (frequently()) {
+                T value = dvProducers.randomGroup(maxGroup);
+                values.add(value);
+                dvProducers.add(doc, value);
+            } else {
+                // Introduce some documents with missing sort values.
+                doc.add(new SortedNumericDocValuesField("other-field", randomInt()));
+                docsWithMissingField = true;
+            }
+            w.addDocument(doc);
+            totalHits++;
+        }
+
+        IndexReader reader = w.getReader();
+        IndexSearcher searcher = newSearcher(reader);
+
+        SortField sortField = dvProducers.sortField(reverseSort);
+        MappedFieldType fieldType = new MockFieldMapper.FakeFieldType(sortField.getField());
+        Sort sort = new Sort(sortField);
+
+        Comparator<T> comparator = reverseSort ? Collections.reverseOrder(): Comparator.naturalOrder();
+        List<T> sortedValues = new ArrayList<>(values);
+        sortedValues.sort(comparator);
+
+        int randomIndex = randomIntBetween(0, sortedValues.size() - 1);
+        int expectedNumGroups = values.size() - randomIndex - 1;
+        if (docsWithMissingField) {
+            expectedNumGroups++;
+        }
+
+        FieldDoc after = new FieldDoc(Integer.MAX_VALUE, 0, new Object[]{sortedValues.get(randomIndex)});
+        CollapsingTopDocsCollector<?> collapsingCollector = numeric
+            ? CollapsingTopDocsCollector.createNumeric("field", fieldType, sort, expectedNumGroups, after)
+            : CollapsingTopDocsCollector.createKeyword("field", fieldType, sort, expectedNumGroups, after);
+
+        TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, after, Integer.MAX_VALUE);
+        Query query = new MatchAllDocsQuery();
+        searcher.search(query, collapsingCollector);
+        searcher.search(query, topFieldCollector);
+        CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
+        TopFieldDocs topDocs = topFieldCollector.topDocs();
+        assertEquals(sortField.getField(), collapseTopFieldDocs.field);
+        assertEquals(totalHits, collapseTopFieldDocs.totalHits.value);
+        assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
+
+        assertEquals(TotalHits.Relation.EQUAL_TO, collapseTopFieldDocs.totalHits.relation);
+        assertEquals(totalHits, topDocs.totalHits.value);
+
+        Object currentValue = null;
+        int topDocsIndex = 0;
+        for (int i = 0; i < expectedNumGroups; i++) {
+            FieldDoc fieldDoc = null;
+            for (; topDocsIndex < topDocs.scoreDocs.length; topDocsIndex++) {
+                fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
+                if (Objects.equals(fieldDoc.fields[0], currentValue) == false) {
+                    break;
+                }
+            }
+            FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
+            assertNotNull(fieldDoc);
+            assertEquals(collapseFieldDoc.fields[0], fieldDoc.fields[0]);
+            currentValue = fieldDoc.fields[0];
+        }
+
+        if (docsWithMissingField == false) {
+            assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length - 1);
+            assertEquals(topDocs.scoreDocs.length - 1, topDocsIndex);
+        }
+        w.close();
+        reader.close();
+        dir.close();
+    }
+
+    public void testCollapseLong() throws Exception {
+        CollapsingDocValuesProducer<Long> producer = new CollapsingDocValuesProducer<>() {
+            @Override
+            public Long randomGroup(int maxGroup) {
+                return randomNonNegativeLong() % maxGroup;
+            }
+
+            @Override
+            public void add(Document doc, Long value) {
+                doc.add(new NumericDocValuesField("field", value));
+            }
+
+            @Override
+            public SortField sortField(boolean reversed) {
+                SortField sortField = new SortField("field", SortField.Type.LONG, reversed);
+                sortField.setMissingValue(reversed ? Long.MIN_VALUE : Long.MAX_VALUE);
+                return sortField;
+            }
+        };
+        assertSearchCollapse(producer, true);
+    }
+
+    public void testCollapseInt() throws Exception {
+        CollapsingDocValuesProducer<Integer> producer = new CollapsingDocValuesProducer<>() {
+            @Override
+            public Integer randomGroup(int maxGroup) {
+                return randomIntBetween(0, maxGroup - 1);
+            }
+
+            @Override
+            public void add(Document doc, Integer value) {
+                doc.add(new NumericDocValuesField("field", value));
+            }
+
+            @Override
+            public SortField sortField(boolean reversed) {
+                SortField sortField = new SortField("field", SortField.Type.INT, reversed);
+                sortField.setMissingValue(reversed ? Integer.MIN_VALUE : Integer.MAX_VALUE);
+                return sortField;
+            }
+        };
+        assertSearchCollapse(producer, true);
+    }
+
+    public void testCollapseFloat() throws Exception {
+        CollapsingDocValuesProducer<Float> producer = new CollapsingDocValuesProducer<>() {
+            @Override
+            public Float randomGroup(int maxGroup) {
+                return Float.valueOf(randomIntBetween(0, maxGroup - 1));
+            }
+
+            @Override
+            public void add(Document doc, Float value) {
+                doc.add(new NumericDocValuesField("field", Float.floatToIntBits(value)));
+            }
+
+            @Override
+            public SortField sortField(boolean reversed) {
+                SortField sortField = new SortField("field", SortField.Type.FLOAT, reversed);
+                sortField.setMissingValue(reversed ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY);
+                return sortField;
+            }
+        };
+        assertSearchCollapse(producer, true);
+    }
+
+    public void testCollapseDouble() throws Exception {
+        CollapsingDocValuesProducer<Double> producer = new CollapsingDocValuesProducer<>() {
+            @Override
+            public Double randomGroup(int maxGroup) {
+                return Double.valueOf(randomIntBetween(0, maxGroup - 1));
+            }
+
+            @Override
+            public void add(Document doc, Double value) {
+                doc.add(new NumericDocValuesField("field", Double.doubleToLongBits(value)));
+            }
+
+            @Override
+            public SortField sortField(boolean reversed) {
+                SortField sortField = new SortField("field", SortField.Type.DOUBLE, reversed);
+                sortField.setMissingValue(reversed ? Double.MIN_VALUE : Double.MAX_VALUE);
+                return sortField;
+            }
+        };
+        assertSearchCollapse(producer, true);
+    }
+
+    public void testCollapseString() throws Exception {
+        CollapsingDocValuesProducer<BytesRef> producer = new CollapsingDocValuesProducer<>() {
+            @Override
+            public BytesRef randomGroup(int maxGroup) {
+                return new BytesRef(Integer.toString(randomIntBetween(0, maxGroup - 1)));
+            }
+
+            @Override
+            public void add(Document doc, BytesRef value) {
+                doc.add(new SortedDocValuesField("field", value));
+            }
+
+            @Override
+            public SortField sortField(boolean reversed) {
+                SortField sortField = new SortField("field", SortField.Type.STRING_VAL, reversed);
+                sortField.setMissingValue(reversed ? SortField.STRING_FIRST : SortField.STRING_LAST);
+                return sortField;
+            }
+        };
+        assertSearchCollapse(producer, false);
+    }
+}

+ 6 - 6
server/src/test/java/org/apache/lucene/grouping/CollapsingTopDocsCollectorTests.java

@@ -116,10 +116,10 @@ public class CollapsingTopDocsCollectorTests extends ESTestCase {
         final CollapsingTopDocsCollector<?> collapsingCollector;
         final CollapsingTopDocsCollector<?> collapsingCollector;
         if (numeric) {
         if (numeric) {
             collapsingCollector =
             collapsingCollector =
-                CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, expectedNumGroups);
+                CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, expectedNumGroups, null);
         } else {
         } else {
             collapsingCollector =
             collapsingCollector =
-                CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, expectedNumGroups);
+                CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, expectedNumGroups, null);
         }
         }
 
 
         TopFieldCollector topFieldCollector =
         TopFieldCollector topFieldCollector =
@@ -189,9 +189,9 @@ public class CollapsingTopDocsCollectorTests extends ESTestCase {
             final SegmentSearcher subSearcher = subSearchers[shardIDX];
             final SegmentSearcher subSearcher = subSearchers[shardIDX];
             final CollapsingTopDocsCollector<?> c;
             final CollapsingTopDocsCollector<?> c;
             if (numeric) {
             if (numeric) {
-                c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, expectedNumGroups);
+                c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, expectedNumGroups, null);
             } else {
             } else {
-                c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, expectedNumGroups);
+                c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, expectedNumGroups, null);
             }
             }
             subSearcher.search(weight, c);
             subSearcher.search(weight, c);
             shardHits[shardIDX] = c.getTopDocs();
             shardHits[shardIDX] = c.getTopDocs();
@@ -376,7 +376,7 @@ public class CollapsingTopDocsCollectorTests extends ESTestCase {
         Sort sort = new Sort(sortField);
         Sort sort = new Sort(sortField);
 
 
         final CollapsingTopDocsCollector<?> collapsingCollector =
         final CollapsingTopDocsCollector<?> collapsingCollector =
-                CollapsingTopDocsCollector.createNumeric("group", fieldType, sort, 10);
+                CollapsingTopDocsCollector.createNumeric("group", fieldType, sort, 10, null);
         searcher.search(new MatchAllDocsQuery(), collapsingCollector);
         searcher.search(new MatchAllDocsQuery(), collapsingCollector);
         CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
         CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
         assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
         assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
@@ -416,7 +416,7 @@ public class CollapsingTopDocsCollectorTests extends ESTestCase {
         Sort sort = new Sort(new SortField("group", SortField.Type.STRING_VAL));
         Sort sort = new Sort(new SortField("group", SortField.Type.STRING_VAL));
 
 
         final CollapsingTopDocsCollector<?> collapsingCollector =
         final CollapsingTopDocsCollector<?> collapsingCollector =
-            CollapsingTopDocsCollector.createKeyword("group", fieldType, sort, 10);
+            CollapsingTopDocsCollector.createKeyword("group", fieldType, sort, 10, null);
         searcher.search(new MatchAllDocsQuery(), collapsingCollector);
         searcher.search(new MatchAllDocsQuery(), collapsingCollector);
         CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
         CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
         assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
         assertEquals(4, collapseTopFieldDocs.scoreDocs.length);