Forráskód Böngészése

Aggs: Let terms run in global ords mode no match (#124782)

Allows the `terms` agg to run with global ords if the top level query
matches no docs *but* the agg is configured to collect buckets with 0
docs.
Nik Everett 7 hónapja
szülő
commit
e897a1422f

+ 5 - 0
docs/changelog/124782.yaml

@@ -0,0 +1,5 @@
+pr: 124782
+summary: "Aggs: Let terms run in global ords mode no match"
+area: Aggregations
+type: bug
+issues: []

+ 5 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

@@ -109,9 +109,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
             ExecutionMode execution = null;
             if (executionHint != null) {
                 execution = ExecutionMode.fromString(executionHint);
+            } else {
+                if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) {
+                    execution = ExecutionMode.MAP;
+                }
             }
             // In some cases, using ordinals is just not supported: override it
-            if (valuesSource.hasOrdinals() == false || matchNoDocs(context, parent)) {
+            if (valuesSource.hasOrdinals() == false) {
                 execution = ExecutionMode.MAP;
             }
             if (execution == null) {

+ 100 - 7
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

@@ -141,6 +141,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
 import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript;
 import static org.elasticsearch.test.MapMatcher.assertMap;
 import static org.elasticsearch.test.MapMatcher.matchesMap;
+import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.either;
 import static org.hamcrest.Matchers.equalTo;
@@ -286,8 +287,18 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     }
 
     public void testMatchNoDocsQuery() throws Exception {
+        for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
+            testMatchNoDocsQuery(executionMode);
+        }
+        testMatchNoDocsQuery(null);
+    }
+
+    private void testMatchNoDocsQuery(TermsAggregatorFactory.ExecutionMode hint) throws Exception {
         MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap());
         TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string");
+        if (hint != null) {
+            aggregationBuilder.executionHint(hint.toString());
+        }
         CheckedConsumer<RandomIndexWriter, IOException> createIndex = iw -> {
             iw.addDocument(doc(fieldType, "a", "b"));
             iw.addDocument(doc(fieldType, "", "c", "a"));
@@ -298,11 +309,8 @@ public class TermsAggregatorTests extends AggregatorTestCase {
             createIndex,
             (InternalTerms<?, ?> result) -> { assertEquals(0, result.getBuckets().size()); },
             new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery())
+                .withCheckAggregator(checkTopLevelAggregator(hint == null ? TermsAggregatorFactory.ExecutionMode.MAP : hint))
         );
-
-        debugTestCase(aggregationBuilder, new MatchNoDocsQuery(), createIndex, (result, impl, debug) -> {
-            assertEquals(impl, MapStringTermsAggregator.class);
-        }, fieldType);
     }
 
     public void testStringShardMinDocCount() throws IOException {
@@ -337,7 +345,6 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                 .executionHint(executionMode.toString())
                 .size(2)
                 .minDocCount(0)
-                .executionHint("map")
                 .excludeDeletedDocs(true)
                 .order(BucketOrder.key(true));
 
@@ -360,7 +367,80 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                         assertEquals("b", result.getBuckets().get(1).getKeyAsString());
                     }
                     assertEquals(0L, result.getBuckets().get(1).getDocCount());
-                }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))));
+                },
+                    new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))
+                        .withCheckAggregator(checkTopLevelAggregator(executionMode))
+                );
+            }
+
+            {
+                boolean delete = randomBoolean();
+                // force single shard/segment
+                testCase(iw -> {
+                    // force single shard/segment
+                    iw.addDocuments(
+                        Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b"))
+                    );
+                    if (delete) {
+                        iw.deleteDocuments(new TermQuery(new Term("string", "b")));
+                    }
+                }, (InternalTerms<?, ?> result) -> {
+                    assertEquals(2, result.getBuckets().size());
+                    assertEquals("a", result.getBuckets().get(0).getKeyAsString());
+                    assertEquals(0L, result.getBuckets().get(0).getDocCount());
+                    if (delete) {
+                        assertEquals("c", result.getBuckets().get(1).getKeyAsString());
+                    } else {
+                        assertEquals("b", result.getBuckets().get(1).getKeyAsString());
+                    }
+                    assertEquals(0L, result.getBuckets().get(1).getDocCount());
+                },
+                    new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))
+                        .withCheckAggregator(checkTopLevelAggregator(executionMode))
+                );
+            }
+        }
+    }
+
+    /**
+     * Asserts that sane output for {@code min_doc_count: 0} when the top level
+     * query is {@code match_none}. Especially important is that we respect the
+     * execution hint in this case. It's a lot faster to collect {@code min_doc_count: 0}
+     * via ordinals.
+     */
+    public void testStringZeroMinDocCountMatchNone() throws IOException {
+        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap());
+        for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
+            TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string")
+                .executionHint(executionMode.toString())
+                .size(2)
+                .minDocCount(0)
+                .excludeDeletedDocs(true)
+                .order(BucketOrder.key(true));
+
+            {
+                boolean delete = randomBoolean();
+                // force single shard/segment
+                testCase(iw -> {
+                    // force single shard/segment
+                    iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d")));
+                    if (delete) {
+                        iw.deleteDocuments(new TermQuery(new Term("string", "b")));
+                    }
+                }, (InternalTerms<?, ?> result) -> {
+                    assertEquals(2, result.getBuckets().size());
+                    assertEquals("a", result.getBuckets().get(0).getKeyAsString());
+                    assertEquals(0L, result.getBuckets().get(0).getDocCount());
+                    if (delete) {
+                        assertEquals("c", result.getBuckets().get(1).getKeyAsString());
+                    } else {
+                        assertEquals("b", result.getBuckets().get(1).getKeyAsString());
+                    }
+                    assertEquals(0L, result.getBuckets().get(1).getDocCount());
+                },
+                    new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery())
+                        .withCheckAggregator(checkTopLevelAggregator(executionMode))
+                );
             }
 
             {
@@ -384,7 +464,10 @@ public class TermsAggregatorTests extends AggregatorTestCase {
                         assertEquals("b", result.getBuckets().get(1).getKeyAsString());
                     }
                     assertEquals(0L, result.getBuckets().get(1).getDocCount());
-                }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))));
+                },
+                    new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery())
+                        .withCheckAggregator(checkTopLevelAggregator(executionMode))
+                );
             }
         }
     }
@@ -2279,4 +2362,14 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     private String randomHint() {
         return randomFrom(TermsAggregatorFactory.ExecutionMode.values()).toString();
     }
+
+    private Consumer<Aggregator> checkTopLevelAggregator(TermsAggregatorFactory.ExecutionMode executionMode) {
+        return switch (executionMode) {
+            case MAP -> a -> assertThat(a, instanceOf(MapStringTermsAggregator.class));
+            case GLOBAL_ORDINALS -> a -> assertThat(
+                a,
+                anyOf(instanceOf(GlobalOrdinalsStringTermsAggregator.class), instanceOf(StringTermsAggregatorFromFilters.class))
+            );
+        };
+    }
 }

+ 36 - 1
test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java

@@ -685,6 +685,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                     }
                     assertEquals(shouldBeCached, context.isCacheable());
                     List<InternalAggregation> internalAggregations = List.of(a.buildTopLevel());
+                    aggTestConfig.checkAggregator().accept(a);
                     assertRoundTrip(internalAggregations);
                     internalAggs.add(InternalAggregations.from(internalAggregations));
                 } finally {
@@ -1703,11 +1704,23 @@ public abstract class AggregatorTestCase extends ESTestCase {
 
         boolean useLogDocMergePolicy,
         boolean testReductionCancellation,
+        Consumer<Aggregator> checkAggregator,
         MappedFieldType... fieldTypes
     ) {
 
         public AggTestConfig(AggregationBuilder builder, MappedFieldType... fieldTypes) {
-            this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), false, true, fieldTypes);
+            this(
+                new MatchAllDocsQuery(),
+                builder,
+                DEFAULT_MAX_BUCKETS,
+                randomBoolean(),
+                true,
+                randomBoolean(),
+                false,
+                true,
+                a -> {},
+                fieldTypes
+            );
         }
 
         public AggTestConfig withQuery(Query query) {
@@ -1720,6 +1733,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1734,6 +1748,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1748,6 +1763,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1762,6 +1778,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1776,6 +1793,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1790,6 +1808,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 true,
                 testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }
@@ -1804,6 +1823,22 @@ public abstract class AggregatorTestCase extends ESTestCase {
                 incrementalReduce,
                 useLogDocMergePolicy,
                 false,
+                checkAggregator,
+                fieldTypes
+            );
+        }
+
+        public AggTestConfig withCheckAggregator(Consumer<Aggregator> checkAggregator) {
+            return new AggTestConfig(
+                query,
+                builder,
+                maxBuckets,
+                splitLeavesIntoSeparateAggregators,
+                shouldBeCached,
+                incrementalReduce,
+                useLogDocMergePolicy,
+                testReductionCancellation,
+                checkAggregator,
                 fieldTypes
             );
         }