1
0
Эх сурвалжийг харах

Fix an optimization in terms agg (#57438)

When the `terms` agg runs against strings and uses global ordinals it
has an optimization when it collects segments that only ever have a
single value for the particular string. This is *very* common. But I
broke it in #57241. This fixes that optimization and adds `debug`
information that you can use to see how often we collect segments of
each type. And adds a test to make sure that I don't break the
optimization again.

We also had a specialiation for when there isn't a filter on the terms
to aggregate. I had removed that specialization in #57241 which resulted
in some slow down as well. This adds it back but in a more clear way.
And, hopefully, a way that is marginally faster when there *is* a
filter.

Closes #57407
Nik Everett 5 жил өмнө
parent
commit
b072f5f002

+ 78 - 3
rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

@@ -724,10 +724,10 @@ setup:
         body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
 
 ---
-"profiler":
+"global ords profiler":
   - skip:
-      version: " - 7.8.99"
-      reason: debug information added in 7.9.0
+      version: " - 7.9.99"
+      reason: debug information added in 8.0.0 (backport to 7.9.0 pending)
   - do:
       bulk:
         index: test_1
@@ -753,6 +753,7 @@ setup:
               terms:
                 field: str
                 collect_mode: breadth_first
+                execution_hint: global_ordinals
               aggs:
                 max_number:
                   max:
@@ -768,9 +769,83 @@ setup:
   - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
   - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
   - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
+  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
+  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
   - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
   - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
 
+  - do:
+      indices.create:
+          index: test_3
+          body:
+            settings:
+              number_of_shards: 1
+              number_of_replicas: 0
+            mappings:
+              properties:
+                str:
+                   type: keyword
+  - do:
+      bulk:
+        index: test_3
+        refresh: true
+        body: |
+          { "index": {} }
+          { "str": ["pig", "sheep"], "number": 100 }
+
+  - do:
+      search:
+        index: test_3
+        body:
+          profile: true
+          size: 0
+          aggs:
+            str_terms:
+              terms:
+                field: str
+                collect_mode: breadth_first
+                execution_hint: global_ordinals
+              aggs:
+                max_number:
+                  max:
+                    field: number
+  - match: { aggregations.str_terms.buckets.0.key: pig }
+  - match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
+  - match: { aggregations.str_terms.buckets.1.key: sheep }
+  - match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
+  - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
+  - match: { profile.shards.0.aggregations.0.description: str_terms }
+  - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
+  - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
+  - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
+  - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
+  - match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
+  - gt:    { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.has_filter: false }
+  - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
+  - match: { profile.shards.0.aggregations.0.children.0.description: max_number }
+
+---
+"numeric profiler":
+  - skip:
+      version: " - 7.8.99"
+      reason: debug information added in 7.9.0
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body: |
+          { "index": {} }
+          { "number": 1 }
+          { "index": {} }
+          { "number": 3 }
+          { "index": {} }
+          { "number": 1 }
+          { "index": {} }
+          { "number": 1 }
+
   - do:
       search:
         index: test_1

+ 75 - 67
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

@@ -33,7 +33,6 @@ import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.common.util.IntArray;
 import org.elasticsearch.common.util.LongHash;
 import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
@@ -73,6 +72,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
     private final long valueCount;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
+    protected int segmentsWithSingleValuedOrds = 0;
+    protected int segmentsWithMultiValuedOrds = 0;
 
     public interface GlobalOrdLookupFunction {
         BytesRef apply(long ord) throws IOException;
@@ -102,7 +103,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
             valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet();
         this.valueCount = values.getValueCount();
         this.lookupGlobalOrd = values::lookupOrd;
-        this.acceptedGlobalOrdinals = includeExclude == null ? l -> true : includeExclude.acceptedGlobalOrdinals(values)::get;
+        this.acceptedGlobalOrdinals = includeExclude == null ? ALWAYS_TRUE : includeExclude.acceptedGlobalOrdinals(values)::get;
         this.collectionStrategy = remapGlobalOrds ? new RemapGlobalOrds() : new DenseGlobalOrds();
     }
 
@@ -110,24 +111,60 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         return collectionStrategy.describe();
     }
 
-    private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException {
-        return acceptedGlobalOrdinals == null ?
-            valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals);
-    }
-
     @Override
     public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
-        SortedSetDocValues globalOrds = getGlobalOrds(ctx);
+        SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
         SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
         if (singleValues != null) {
+            segmentsWithSingleValuedOrds++;
+            if (acceptedGlobalOrdinals == ALWAYS_TRUE) {
+                /*
+                 * Optimize when there isn't a filter because that is very
+                 * common and marginally faster.
+                 */
+                return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
+                    @Override
+                    public void collect(int doc, long owningBucketOrd) throws IOException {
+                        assert owningBucketOrd == 0;
+                        if (false == singleValues.advanceExact(doc)) {
+                            return;
+                        }
+                        int globalOrd = singleValues.ordValue();
+                        collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
+                    }
+                });
+            }
             return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
                 @Override
                 public void collect(int doc, long owningBucketOrd) throws IOException {
                     assert owningBucketOrd == 0;
-                    if (singleValues.advanceExact(doc)) {
-                        int ord = singleValues.ordValue();
-                        collectionStrategy.collectGlobalOrd(doc, ord, sub);
+                    if (false == singleValues.advanceExact(doc)) {
+                        return;
+                    }
+                    int globalOrd = singleValues.ordValue();
+                    if (false == acceptedGlobalOrdinals.test(globalOrd)) {
+                        return;
+                    }
+                    collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
+                }
+            });
+        }
+        segmentsWithMultiValuedOrds++;
+        if (acceptedGlobalOrdinals == ALWAYS_TRUE) {
+            /*
+             * Optimize when there isn't a filter because that is very
+             * common and marginally faster.
+             */
+            return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
+                @Override
+                public void collect(int doc, long owningBucketOrd) throws IOException {
+                    assert owningBucketOrd == 0;
+                    if (false == globalOrds.advanceExact(doc)) {
+                        return;
+                    }
+                    for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
+                        collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
                     }
                 }
             });
@@ -136,10 +173,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
             @Override
             public void collect(int doc, long owningBucketOrd) throws IOException {
                 assert owningBucketOrd == 0;
-                if (globalOrds.advanceExact(doc)) {
-                    for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
-                        collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
+                if (false == globalOrds.advanceExact(doc)) {
+                    return;
+                }
+                for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
+                    if (false == acceptedGlobalOrdinals.test(globalOrd)) {
+                        continue;
                     }
+                    collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
                 }
             }
         });
@@ -160,6 +201,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         super.collectDebugInfo(add);
         add.accept("collection_strategy", collectionStrategy.describe());
         add.accept("result_strategy", resultStrategy.describe());
+        add.accept("segments_with_single_valued_ords", segmentsWithSingleValuedOrds);
+        add.accept("segments_with_multi_valued_ords", segmentsWithMultiValuedOrds);
+        add.accept("has_filter", acceptedGlobalOrdinals != ALWAYS_TRUE);
     }
 
     /**
@@ -253,26 +297,31 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
             mapping = valuesSource.globalOrdinalsMapping(ctx);
+            // Dense mode doesn't support include/exclude so we don't have to check it here.
             if (singleValues != null) {
+                segmentsWithSingleValuedOrds++;
                 return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) {
                     @Override
                     public void collect(int doc, long owningBucketOrd) throws IOException {
                         assert owningBucketOrd == 0;
-                        if (singleValues.advanceExact(doc)) {
-                            final int ord = singleValues.ordValue();
-                            segmentDocCounts.increment(ord + 1, 1);
+                        if (false == singleValues.advanceExact(doc)) {
+                            return;
                         }
+                        int ord = singleValues.ordValue();
+                        segmentDocCounts.increment(ord + 1, 1);
                     }
                 });
             }
+            segmentsWithMultiValuedOrds++;
             return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) {
                 @Override
                 public void collect(int doc, long owningBucketOrd) throws IOException {
                     assert owningBucketOrd == 0;
-                    if (segmentOrds.advanceExact(doc)) {
-                        for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
-                            segmentDocCounts.increment(segmentOrd + 1, 1);
-                        }
+                    if (false == segmentOrds.advanceExact(doc)) {
+                        return;
+                    }
+                    for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
+                        segmentDocCounts.increment(segmentOrd + 1, 1);
                     }
                 }
             });
@@ -306,52 +355,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
         }
     }
 
-    private static final class FilteredOrdinals extends AbstractSortedSetDocValues {
-
-        private final SortedSetDocValues inner;
-        private final LongPredicate accepted;
-
-        private FilteredOrdinals(SortedSetDocValues inner, LongPredicate accepted) {
-            this.inner = inner;
-            this.accepted = accepted;
-        }
-
-        @Override
-        public long getValueCount() {
-            return inner.getValueCount();
-        }
-
-        @Override
-        public BytesRef lookupOrd(long ord) throws IOException {
-            return inner.lookupOrd(ord);
-        }
-
-        @Override
-        public long nextOrd() throws IOException {
-            for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) {
-                if (accepted.test(ord)) {
-                    return ord;
-                }
-            }
-            return NO_MORE_ORDS;
-        }
-
-        @Override
-        public boolean advanceExact(int target) throws IOException {
-            if (inner.advanceExact(target)) {
-                for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) {
-                    if (accepted.test(ord)) {
-                        // reset the iterator
-                        boolean advanced = inner.advanceExact(target);
-                        assert advanced;
-                        return true;
-                    }
-                }
-            }
-            return false;
-        }
-    }
-
     /**
      * Strategy for collecting global ordinals.
      * <p>
@@ -800,4 +803,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
             System.arraycopy(from.bytes, from.offset, to.bytes, 0, from.length);
         }
     }
+
+    /**
+     * Predicate used for {@link #acceptedGlobalOrdinals} if there is no filter.
+     */
+    private static final LongPredicate ALWAYS_TRUE = l -> true;
 }