Browse Source

In ARS, correct default number of outstanding requests (#71022)

When computing node’s ARS rank, we use the number of outstanding search
requests to the node. If there are no connections to the node, we consider
there to be 1 outstanding request. This isn’t accurate, the default should be 0
to indicate no outstanding requests. The ARS rank we return in node stats
actually uses 0 instead of 1.

This small fix lets us remove a test workaround. It also ensures the ARS ranks
we return in node stats match the ranks we use to select shards during search.

Follow-up to #70283.
Julie Tibshirani 4 years ago
parent
commit
29af73c69c

+ 1 - 1
server/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java

@@ -281,7 +281,7 @@ public class IndexShardRoutingTable implements Iterable<ShardRouting> {
             Optional<ResponseCollectorService.ComputedNodeStats> maybeStats = entry.getValue();
             maybeStats.ifPresent(stats -> {
                 final String nodeId = entry.getKey();
-                nodeRanks.put(nodeId, stats.rank(nodeSearchCounts.getOrDefault(nodeId, 1L)));
+                nodeRanks.put(nodeId, stats.rank(nodeSearchCounts.getOrDefault(nodeId, 0L)));
             });
         }
         return nodeRanks;

+ 4 - 8
server/src/test/java/org/elasticsearch/cluster/routing/OperationRoutingTests.java

@@ -657,8 +657,6 @@ public class OperationRoutingTests extends ESTestCase{
         collector.addNodeStatistics("node_0", 1, TimeValue.timeValueMillis(50).nanos(), TimeValue.timeValueMillis(40).nanos());
         collector.addNodeStatistics("node_1", 1, TimeValue.timeValueMillis(51).nanos(), TimeValue.timeValueMillis(40).nanos());
         Map<String, Long> outstandingRequests = new HashMap<>();
-        outstandingRequests.put("node_0", 1L);
-        outstandingRequests.put("node_1", 1L);
 
         // Check that we choose to search over both nodes
         GroupShardsIterator<ShardIterator> groupIterator = opRouting.searchShards(
@@ -668,14 +666,12 @@ public class OperationRoutingTests extends ESTestCase{
         nodeIds.add(groupIterator.get(0).nextOrNull().currentNodeId());
         nodeIds.add(groupIterator.get(1).nextOrNull().currentNodeId());
         assertThat(nodeIds, equalTo(Set.of("node_0", "node_1")));
-        assertThat(outstandingRequests.get("node_0"), equalTo(2L));
-        assertThat(outstandingRequests.get("node_1"), equalTo(2L));
+        assertThat(outstandingRequests.get("node_0"), equalTo(1L));
+        assertThat(outstandingRequests.get("node_1"), equalTo(1L));
 
         // The first node becomes much more loaded
-        collector.addNodeStatistics("node_0", 5, TimeValue.timeValueMillis(300).nanos(), TimeValue.timeValueMillis(200).nanos());
+        collector.addNodeStatistics("node_0", 6, TimeValue.timeValueMillis(300).nanos(), TimeValue.timeValueMillis(200).nanos());
         outstandingRequests = new HashMap<>();
-        outstandingRequests.put("node_0", 1L);
-        outstandingRequests.put("node_1", 1L);
 
         // Check that we always choose the second node
         groupIterator = opRouting.searchShards(
@@ -685,7 +681,7 @@ public class OperationRoutingTests extends ESTestCase{
         nodeIds.add(groupIterator.get(0).nextOrNull().currentNodeId());
         nodeIds.add(groupIterator.get(1).nextOrNull().currentNodeId());
         assertThat(nodeIds, equalTo(Set.of("node_1")));
-        assertThat(outstandingRequests.get("node_1"), equalTo(3L));
+        assertThat(outstandingRequests.get("node_1"), equalTo(2L));
 
         IOUtils.close(clusterService);
         terminate(threadPool);