1
0
Эх сурвалжийг харах

Speed up top_metrics on hot shards (#70579)

This moves `top_metrics` from global ordinals to segment ordinals which
should should speed it up a ton on shards receiving a lot of writes at
the cost of a little speed on shards that aren't receiving many writes.

The rally tests on hot shards so the performance cost is between .5% and
5%:
```
|                               |  before |  after  |   diff   |    |
|  50th percentile service time | 706.406 | 716.434 |  10.0277 | ms |
|  90th percentile service time | 738.705 | 742.636 |   3.9311 | ms |
| 100th percentile service time | 773.643 | 812.476 |  38.8331 | ms |
```

That seems worth it for 0 startup cost on hot shards.

Closes #70453
Nik Everett 4 жил өмнө
parent
commit
cd0778a523

+ 1 - 1
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/topmetrics/TopMetricsAggregationBuilder.java

@@ -58,7 +58,7 @@ public class TopMetricsAggregationBuilder extends AbstractAggregationBuilder<Top
         registry.register(
             REGISTRY_KEY,
             List.of(CoreValuesSourceType.KEYWORD, CoreValuesSourceType.IP),
-            TopMetricsAggregator.GlobalOrdsValues::new,
+            TopMetricsAggregator.SegmentOrdsValues::new,
             false
         );
     }

+ 45 - 27
x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/topmetrics/TopMetricsAggregator.java

@@ -19,6 +19,7 @@ import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.BitArray;
 import org.elasticsearch.common.util.DoubleArray;
 import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.ObjectArray;
 import org.elasticsearch.index.fielddata.NumericDoubleValues;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.MultiValueMode;
@@ -33,8 +34,8 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 import org.elasticsearch.search.sort.BucketedSort;
 import org.elasticsearch.search.sort.SortBuilder;
 import org.elasticsearch.search.sort.SortValue;
-import org.elasticsearch.xpack.core.common.search.aggregations.MissingHelper;
 import org.elasticsearch.xpack.analytics.topmetrics.InternalTopMetrics.MetricValue;
+import org.elasticsearch.xpack.core.common.search.aggregations.MissingHelper;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -398,20 +399,32 @@ class TopMetricsAggregator extends NumericMetricsAggregator.MultiValue {
     }
 
     /**
-     * Loads metrics for whole numbers.
+     * Loads metrics fields with segment ordinals.
      */
-    static class GlobalOrdsValues extends CollectingMetricValues {
+    static class SegmentOrdsValues extends CollectingMetricValues {
         private final ValuesSource.Bytes.WithOrdinals valuesSource;
-        private SortedSetDocValues globalOrds;
-        private LongArray values;
+        /**
+         * Reference to the segment's doc values so we can convert the
+         * {@link #segmentOrds} into strings on the way out.
+         */
+        private ObjectArray<SortedSetDocValues> segmentResolve;
+        /**
+         * Terms ordinal in the segment.
+         */
+        private LongArray segmentOrds;
 
-        GlobalOrdsValues(int size, BigArrays bigArrays, String name, ValuesSourceConfig config) {
+        SegmentOrdsValues(int size, BigArrays bigArrays, String name, ValuesSourceConfig config) {
             super(bigArrays, name, config);
-            if (false == config.hasGlobalOrdinals()) {
-                throw new IllegalArgumentException("top_metrics can only collect bytes that have global ordinals");
+            try {
+                valuesSource = (ValuesSource.Bytes.WithOrdinals) config.getValuesSource();
+            } catch (ClassCastException e) {
+                throw new IllegalArgumentException(
+                    "top_metrics can only collect bytes that have segment ordinals but " + config.getDescription() + " does not",
+                    e
+                );
             }
-            this.valuesSource = (ValuesSource.Bytes.WithOrdinals) config.getValuesSource();
-            values = bigArrays.newLongArray(size, false);
+            segmentResolve = bigArrays.newObjectArray(size);
+            segmentOrds = bigArrays.newLongArray(size, false);
         }
 
         @Override
@@ -421,43 +434,48 @@ class TopMetricsAggregator extends NumericMetricsAggregator.MultiValue {
 
         @Override
         public MetricValue metricValue(long index) throws IOException {
-            if (globalOrds == null) {
-                // We didn't collect a single segment.
-                return null;
-            }
-            long ord = values.get(index);
+            long ord = segmentOrds.get(index);
             if (ord == -1) {
                 return null;
             }
-            return new MetricValue(config.format(), SortValue.from(BytesRef.deepCopyOf(globalOrds.lookupOrd(ord))));
+            SortedSetDocValues resolve = segmentResolve.get(index);
+            return new MetricValue(config.format(), SortValue.from(BytesRef.deepCopyOf(resolve.lookupOrd(ord))));
         }
 
         @Override
         public void swap(long lhs, long rhs) {
-            long tmp = values.get(lhs);
-            values.set(lhs, values.get(rhs));
-            values.set(rhs, tmp);
+            SortedSetDocValues tempSegmentResolve = segmentResolve.get(lhs);
+            segmentResolve.set(lhs, segmentResolve.get(rhs));
+            segmentResolve.set(rhs, tempSegmentResolve);
+            long tmpSegmentOrd = segmentOrds.get(lhs);
+            segmentOrds.set(lhs, segmentOrds.get(rhs));
+            segmentOrds.set(rhs, tmpSegmentOrd);
         }
 
         @Override
         public Loader loader(LeafReaderContext ctx) throws IOException {
-            globalOrds = valuesSource.globalOrdinalsValues(ctx);
+            SortedSetDocValues segmentOrdValues = valuesSource.ordinalsValues(ctx);
             // For now just return the value that sorts first.
             return (index, doc) -> {
-                if (false == globalOrds.advanceExact(doc)) {
-                    values.set(index, -1);
-                    return;
+                if (index >= segmentResolve.size()) {
+                    segmentResolve = bigArrays.grow(segmentResolve, index + 1);
                 }
-                if (index >= values.size()) {
-                    values = bigArrays.grow(values, index + 1);
+                if (index >= segmentOrds.size()) {
+                    segmentOrds = bigArrays.grow(segmentOrds, index + 1);
                 }
-                values.set(index, globalOrds.nextOrd());
+                if (false == segmentOrdValues.advanceExact(doc)) {
+                    segmentResolve.set(index, null);
+                    segmentOrds.set(index, -1);
+                    return;
+                }
+                segmentResolve.set(index, segmentOrdValues);
+                segmentOrds.set(index, segmentOrdValues.nextOrd());
             };
         }
 
         @Override
         public void close() {
-            Releasables.close(values);
+            Releasables.close(segmentResolve, segmentOrds);
         }
     }
 

+ 2 - 2
x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/topmetrics/TopMetricsAggregatorMetricsTests.java

@@ -227,9 +227,9 @@ public class TopMetricsAggregatorMetricsTests extends ESTestCase {
 
     private ValuesSourceConfig toConfig(SortedSetDocValues values) throws IOException {
         ValuesSource.Bytes.WithOrdinals source = mock(ValuesSource.Bytes.WithOrdinals.class);
-        when(source.globalOrdinalsValues(null)).thenReturn(values);
+        when(source.ordinalsValues(null)).thenReturn(values);
         ValuesSourceConfig config = toConfig(source, CoreValuesSourceType.KEYWORD, DocValueFormat.RAW, true);
-        when(config.hasGlobalOrdinals()).thenReturn(true);
+        when(config.hasGlobalOrdinals()).thenReturn(false); // We don't need global orindals. Only segment ordinals.
         return config;
     }
 

+ 66 - 0
x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/top_metrics.yml

@@ -583,3 +583,69 @@
   - match: { aggregations.tm.top.1.sort: [2] }
   - match: { aggregations.tm.top.2.metrics.animal\.keyword: chicken}
   - match: { aggregations.tm.top.2.sort: [3] }
+
+---
+"fetch runtime keyword fails with nice error message":
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"s": 1, "animal": "cat"}'
+          - '{"index": {}}'
+          - '{"s": 2, "animal": "dog"}'
+          - '{"index": {}}'
+          - '{"s": 3, "animal": "chicken"}'
+
+  - do:
+      catch: /top_metrics can only collect bytes that have segment ordinals but Field \[species\] of type \[keyword\] does not/
+      search:
+        size: 0
+        body:
+          runtime_mappings:
+            species:
+              type: keyword
+              script: |
+                emit(doc['animal.keyword'].value)
+          aggs:
+            tm:
+              top_metrics:
+                metrics:
+                  field: species
+                sort:
+                  s: asc
+                size: 3
+
+---
+"fetch keyword fields with some missing":
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"s": 1, "animal": "cat"}'
+          - '{"index": {}}'
+          - '{"s": 2, "animal": "dog"}'
+          - '{"index": {}}'
+          - '{"s": 3}'
+
+  - do:
+      search:
+        size: 0
+        body:
+          aggs:
+            tm:
+              top_metrics:
+                metrics:
+                  field: animal.keyword
+                sort:
+                  s: asc
+                size: 3
+  - match: { aggregations.tm.top.0.metrics.animal\.keyword: cat}
+  - match: { aggregations.tm.top.0.sort: [1] }
+  - match: { aggregations.tm.top.1.metrics.animal\.keyword: dog}
+  - match: { aggregations.tm.top.1.sort: [2] }
+  - match: { aggregations.tm.top.2.metrics.animal\.keyword: null}
+  - match: { aggregations.tm.top.2.sort: [3] }