Browse Source

Improve transport stats histogram (#93598)

- omits empty buckets at the start and end of the histogram
- includes human-readable representation of the bucket boundaries if `?human` specified
David Turner 2 years ago
parent
commit
e43e7c2f4a

+ 28 - 8
docs/reference/cluster/nodes-stats.asciidoc

@@ -1957,15 +1957,25 @@ thread, represented as a histogram.
 .Properties of `inbound_handling_time_histogram`
 [%collapsible]
 =======
+`ge`::
+(string)
+The inclusive lower bound of the bucket as a human-readable string. May be
+omitted on the first bucket if this bucket has no lower bound.
+
 `ge_millis`::
 (integer)
-The inclusive lower bound of the bucket in milliseconds. Omitted on the first
-bucket since this bucket has no lower bound.
+The inclusive lower bound of the bucket in milliseconds. May be omitted on the
+first bucket if this bucket has no lower bound.
+
+`lt`::
+(string)
+The exclusive upper bound of the bucket as a human-readable string. May be
+omitted on the last bucket if this bucket has no upper bound.
 
 `lt_millis`::
 (integer)
-The exclusive upper bound of the bucket in milliseconds. Omitted on the last
-bucket since this bucket has no upper bound.
+The exclusive upper bound of the bucket in milliseconds. May be omitted on the
+last bucket if this bucket has no upper bound.
 
 `count`::
 (integer)
@@ -1981,15 +1991,25 @@ transport thread, represented as a histogram.
 .Properties of `outbound_handling_time_histogram`
 [%collapsible]
 =======
+`ge`::
+(string)
+The inclusive lower bound of the bucket as a human-readable string. May be
+omitted on the first bucket if this bucket has no lower bound.
+
 `ge_millis`::
 (integer)
-The inclusive lower bound of the bucket in milliseconds. Omitted on the first
-bucket since this bucket has no lower bound.
+The inclusive lower bound of the bucket in milliseconds. May be omitted on the
+first bucket if this bucket has no lower bound.
+
+`lt`::
+(string)
+The exclusive upper bound of the bucket as a human-readable string. May be
+omitted on the last bucket if this bucket has no upper bound.
 
 `lt_millis`::
 (integer)
-The exclusive upper bound of the bucket in milliseconds. Omitted on the last
-bucket since this bucket has no upper bound.
+The exclusive upper bound of the bucket in milliseconds. May be omitted on the
+last bucket if this bucket has no upper bound.
 
 `count`::
 (integer)

+ 2 - 28
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/60_transport_stats.yml

@@ -37,31 +37,5 @@
       nodes.stats:
         metric: [ transport ]
 
-  - length: { nodes.$node_id.transport.inbound_handling_time_histogram: 18 }
-
-  - gte:    { nodes.$node_id.transport.inbound_handling_time_histogram.0.count: 0 }
-  - is_false: nodes.$node_id.transport.inbound_handling_time_histogram.0.ge_millis
-  - match:  { nodes.$node_id.transport.inbound_handling_time_histogram.0.lt_millis: 1 }
-
-  - gte:    { nodes.$node_id.transport.inbound_handling_time_histogram.1.count: 0 }
-  - match:  { nodes.$node_id.transport.inbound_handling_time_histogram.1.ge_millis: 1 }
-  - match:  { nodes.$node_id.transport.inbound_handling_time_histogram.1.lt_millis: 2 }
-
-  - gte:    { nodes.$node_id.transport.inbound_handling_time_histogram.17.count: 0 }
-  - match:  { nodes.$node_id.transport.inbound_handling_time_histogram.17.ge_millis: 65536 }
-  - is_false: nodes.$node_id.transport.inbound_handling_time_histogram.17.lt_millis
-
-
-  - length: { nodes.$node_id.transport.outbound_handling_time_histogram: 18 }
-
-  - gte:    { nodes.$node_id.transport.outbound_handling_time_histogram.0.count: 0 }
-  - is_false: nodes.$node_id.transport.outbound_handling_time_histogram.0.ge_millis
-  - match:  { nodes.$node_id.transport.outbound_handling_time_histogram.0.lt_millis: 1 }
-
-  - gte:    { nodes.$node_id.transport.outbound_handling_time_histogram.1.count: 0 }
-  - match:  { nodes.$node_id.transport.outbound_handling_time_histogram.1.ge_millis: 1 }
-  - match:  { nodes.$node_id.transport.outbound_handling_time_histogram.1.lt_millis: 2 }
-
-  - gte:    { nodes.$node_id.transport.outbound_handling_time_histogram.17.count: 0 }
-  - match:  { nodes.$node_id.transport.outbound_handling_time_histogram.17.ge_millis: 65536 }
-  - is_false: nodes.$node_id.transport.outbound_handling_time_histogram.17.lt_millis
+  - is_true: nodes.$node_id.transport.inbound_handling_time_histogram
+  - is_true: nodes.$node_id.transport.outbound_handling_time_histogram

+ 16 - 4
server/src/main/java/org/elasticsearch/transport/TransportStats.java

@@ -15,6 +15,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.network.HandlingTimeTracker;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.xcontent.ToXContentFragment;
 import org.elasticsearch.xcontent.XContentBuilder;
 
@@ -183,19 +184,30 @@ public class TransportStats implements Writeable, ToXContentFragment {
         return builder;
     }
 
-    private static void histogramToXContent(XContentBuilder builder, long[] bucketFrequencies, String fieldName) throws IOException {
+    static void histogramToXContent(XContentBuilder builder, long[] bucketFrequencies, String fieldName) throws IOException {
         final int[] bucketBounds = HandlingTimeTracker.getBucketUpperBounds();
+
+        int firstBucket = 0;
+        long remainingCount = 0L;
+        for (int i = 0; i < bucketFrequencies.length; i++) {
+            if (remainingCount == 0) {
+                firstBucket = i;
+            }
+            remainingCount += bucketFrequencies[i];
+        }
+
         assert bucketFrequencies.length == bucketBounds.length + 1;
         builder.startArray(fieldName);
-        for (int i = 0; i < bucketFrequencies.length; i++) {
+        for (int i = firstBucket; i < bucketFrequencies.length && 0 < remainingCount; i++) {
             builder.startObject();
             if (i > 0 && i <= bucketBounds.length) {
-                builder.field("ge_millis", bucketBounds[i - 1]);
+                builder.humanReadableField("ge_millis", "ge", TimeValue.timeValueMillis(bucketBounds[i - 1]));
             }
             if (i < bucketBounds.length) {
-                builder.field("lt_millis", bucketBounds[i]);
+                builder.humanReadableField("lt_millis", "lt", TimeValue.timeValueMillis(bucketBounds[i]));
             }
             builder.field("count", bucketFrequencies[i]);
+            remainingCount -= bucketFrequencies[i];
             builder.endObject();
         }
         builder.endArray();

+ 146 - 0
server/src/test/java/org/elasticsearch/transport/TransportStatsTests.java

@@ -0,0 +1,146 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.transport;
+
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.network.HandlingTimeTracker;
+import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xcontent.ToXContentFragment;
+
+import java.util.Arrays;
+
+public class TransportStatsTests extends ESTestCase {
+    public void testToXContent() {
+        assertEquals(
+            Strings.toString(
+                new TransportStats(1, 2, 3, ByteSizeUnit.MB.toBytes(4), 5, ByteSizeUnit.MB.toBytes(6), new long[0], new long[0]),
+                false,
+                true
+            ),
+            """
+                {"transport":{"server_open":1,"total_outbound_connections":2,\
+                "rx_count":3,"rx_size":"4mb","rx_size_in_bytes":4194304,\
+                "tx_count":5,"tx_size":"6mb","tx_size_in_bytes":6291456\
+                }}"""
+        );
+
+        final var histogram = new long[HandlingTimeTracker.BUCKET_COUNT];
+        assertEquals(
+            Strings.toString(
+                new TransportStats(1, 2, 3, ByteSizeUnit.MB.toBytes(4), 5, ByteSizeUnit.MB.toBytes(6), histogram, histogram),
+                false,
+                true
+            ),
+            """
+                {"transport":{"server_open":1,"total_outbound_connections":2,\
+                "rx_count":3,"rx_size":"4mb","rx_size_in_bytes":4194304,\
+                "tx_count":5,"tx_size":"6mb","tx_size_in_bytes":6291456,\
+                "inbound_handling_time_histogram":[],\
+                "outbound_handling_time_histogram":[]\
+                }}"""
+        );
+
+        histogram[4] = 10;
+        assertEquals(
+            Strings.toString(
+                new TransportStats(1, 2, 3, ByteSizeUnit.MB.toBytes(4), 5, ByteSizeUnit.MB.toBytes(6), histogram, histogram),
+                false,
+                true
+            ),
+            """
+                {"transport":{"server_open":1,"total_outbound_connections":2,\
+                "rx_count":3,"rx_size":"4mb","rx_size_in_bytes":4194304,\
+                "tx_count":5,"tx_size":"6mb","tx_size_in_bytes":6291456,\
+                "inbound_handling_time_histogram":[{"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":10}],\
+                "outbound_handling_time_histogram":[{"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":10}]\
+                }}"""
+        );
+    }
+
+    private static void assertHistogram(long[] histogram, String expectedJson) {
+        assertEquals(expectedJson, Strings.toString((ToXContentFragment) (builder, params) -> {
+            TransportStats.histogramToXContent(builder, histogram, "h");
+            return builder;
+        }, false, true));
+    }
+
+    public void testHistogram() {
+        final var histogram = new long[HandlingTimeTracker.BUCKET_COUNT];
+
+        assertHistogram(histogram, """
+            {"h":[]}""");
+
+        histogram[0] = 10;
+        assertHistogram(histogram, """
+            {"h":[{"lt":"1ms","lt_millis":1,"count":10}]}""");
+
+        histogram[0] = 0;
+        histogram[4] = 10;
+        assertHistogram(histogram, """
+            {"h":[{"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":10}]}""");
+
+        histogram[6] = 20;
+        assertHistogram(histogram, """
+            {"h":[\
+            {"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":10},\
+            {"ge":"16ms","ge_millis":16,"lt":"32ms","lt_millis":32,"count":0},\
+            {"ge":"32ms","ge_millis":32,"lt":"64ms","lt_millis":64,"count":20}\
+            ]}""");
+
+        histogram[0] = 30;
+        assertHistogram(histogram, """
+            {"h":[\
+            {"lt":"1ms","lt_millis":1,"count":30},\
+            {"ge":"1ms","ge_millis":1,"lt":"2ms","lt_millis":2,"count":0},\
+            {"ge":"2ms","ge_millis":2,"lt":"4ms","lt_millis":4,"count":0},\
+            {"ge":"4ms","ge_millis":4,"lt":"8ms","lt_millis":8,"count":0},\
+            {"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":10},\
+            {"ge":"16ms","ge_millis":16,"lt":"32ms","lt_millis":32,"count":0},\
+            {"ge":"32ms","ge_millis":32,"lt":"64ms","lt_millis":64,"count":20}\
+            ]}""");
+
+        Arrays.fill(histogram, 0L);
+        histogram[HandlingTimeTracker.BUCKET_COUNT - 1] = 5;
+        assertHistogram(histogram, """
+            {"h":[{"ge":"1m","ge_millis":65536,"count":5}]}""");
+
+        histogram[HandlingTimeTracker.BUCKET_COUNT - 3] = 6;
+        assertHistogram(histogram, """
+            {"h":[\
+            {"ge":"16.3s","ge_millis":16384,"lt":"32.7s","lt_millis":32768,"count":6},\
+            {"ge":"32.7s","ge_millis":32768,"lt":"1m","lt_millis":65536,"count":0},\
+            {"ge":"1m","ge_millis":65536,"count":5}\
+            ]}""");
+
+        Arrays.fill(histogram, 1L);
+        assertHistogram(histogram, """
+            {"h":[\
+            {"lt":"1ms","lt_millis":1,"count":1},\
+            {"ge":"1ms","ge_millis":1,"lt":"2ms","lt_millis":2,"count":1},\
+            {"ge":"2ms","ge_millis":2,"lt":"4ms","lt_millis":4,"count":1},\
+            {"ge":"4ms","ge_millis":4,"lt":"8ms","lt_millis":8,"count":1},\
+            {"ge":"8ms","ge_millis":8,"lt":"16ms","lt_millis":16,"count":1},\
+            {"ge":"16ms","ge_millis":16,"lt":"32ms","lt_millis":32,"count":1},\
+            {"ge":"32ms","ge_millis":32,"lt":"64ms","lt_millis":64,"count":1},\
+            {"ge":"64ms","ge_millis":64,"lt":"128ms","lt_millis":128,"count":1},\
+            {"ge":"128ms","ge_millis":128,"lt":"256ms","lt_millis":256,"count":1},\
+            {"ge":"256ms","ge_millis":256,"lt":"512ms","lt_millis":512,"count":1},\
+            {"ge":"512ms","ge_millis":512,"lt":"1s","lt_millis":1024,"count":1},\
+            {"ge":"1s","ge_millis":1024,"lt":"2s","lt_millis":2048,"count":1},\
+            {"ge":"2s","ge_millis":2048,"lt":"4s","lt_millis":4096,"count":1},\
+            {"ge":"4s","ge_millis":4096,"lt":"8.1s","lt_millis":8192,"count":1},\
+            {"ge":"8.1s","ge_millis":8192,"lt":"16.3s","lt_millis":16384,"count":1},\
+            {"ge":"16.3s","ge_millis":16384,"lt":"32.7s","lt_millis":32768,"count":1},\
+            {"ge":"32.7s","ge_millis":32768,"lt":"1m","lt_millis":65536,"count":1},\
+            {"ge":"1m","ge_millis":65536,"count":1}\
+            ]}""");
+    }
+
+}