Browse Source

Account for simulated utilization threshold in WriteLoadConstraintDeciderIT (#133894)

Nick Tindall 1 month ago
parent
commit
068ca764d5

+ 0 - 3
muted-tests.yml

@@ -519,9 +519,6 @@ tests:
 - class: org.elasticsearch.xpack.esql.action.RandomizedTimeSeriesIT
   method: testGroupBySubset
   issue: https://github.com/elastic/elasticsearch/issues/133220
-- class: org.elasticsearch.cluster.routing.allocation.decider.WriteLoadConstraintDeciderIT
-  method: testHighNodeWriteLoadPreventsNewShardAllocation
-  issue: https://github.com/elastic/elasticsearch/issues/133857
 - class: org.elasticsearch.xpack.kql.parser.KqlParserBooleanQueryTests
   method: testParseOrQuery
   issue: https://github.com/elastic/elasticsearch/issues/133863

+ 18 - 9
server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java

@@ -50,6 +50,7 @@ import java.util.Map;
 
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
+import static org.elasticsearch.cluster.routing.ShardMovementWriteLoadSimulator.calculateUtilizationForWriteLoad;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
 public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
@@ -68,6 +69,8 @@ public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
      */
     public void testHighNodeWriteLoadPreventsNewShardAllocation() {
         int randomUtilizationThresholdPercent = randomIntBetween(50, 100);
+        int numberOfWritePoolThreads = randomIntBetween(2, 20);
+        float shardWriteLoad = randomFloatBetween(0.0f, 0.01f, false);
         Settings settings = Settings.builder()
             .put(
                 WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
@@ -115,7 +118,14 @@ public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
         );
 
         String indexName = randomIdentifier();
-        int randomNumberOfShards = randomIntBetween(15, 40); // Pick a high number of shards, so it is clear assignment is not accidental.
+        int randomNumberOfShards = randomIntBetween(10, 20); // Pick a high number of shards, so it is clear assignment is not accidental.
+
+        // Calculate the maximum utilization a node can report while still being able to accept all relocating shards
+        double additionalLoadFromAllShards = calculateUtilizationForWriteLoad(
+            shardWriteLoad * randomNumberOfShards,
+            numberOfWritePoolThreads
+        );
+        int maxUtilizationPercent = randomUtilizationThresholdPercent - (int) (additionalLoadFromAllShards * 100) - 1;
 
         var verifyAssignmentToFirstNodeListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> {
             var indexRoutingTable = clusterState.routingTable().index(indexName);
@@ -154,20 +164,20 @@ public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
         final DiscoveryNode thirdDiscoveryNode = getDiscoveryNode(thirdDataNodeName);
         final NodeUsageStatsForThreadPools firstNodeNonHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
             firstDiscoveryNode,
-            2,
-            0.5f,
+            numberOfWritePoolThreads,
+            randomIntBetween(0, maxUtilizationPercent) / 100f,
             0
         );
         final NodeUsageStatsForThreadPools secondNodeNonHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
             secondDiscoveryNode,
-            2,
-            0.5f,
+            numberOfWritePoolThreads,
+            randomIntBetween(0, maxUtilizationPercent) / 100f,
             0
         );
         final NodeUsageStatsForThreadPools thirdNodeHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
             thirdDiscoveryNode,
-            2,
-            randomUtilizationThresholdPercent + 1 / 100,
+            numberOfWritePoolThreads,
+            (randomUtilizationThresholdPercent + 1) / 100f,
             0
         );
 
@@ -197,12 +207,11 @@ public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
             .getMetadata()
             .getProject()
             .index(indexName);
-        double shardWriteLoadDefault = 0.2;
         MockTransportService.getInstance(firstDataNodeName)
             .addRequestHandlingBehavior(IndicesStatsAction.NAME + "[n]", (handler, request, channel, task) -> {
                 List<ShardStats> shardStats = new ArrayList<>(indexMetadata.getNumberOfShards());
                 for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
-                    shardStats.add(createShardStats(indexMetadata, i, shardWriteLoadDefault, firstDataNodeId));
+                    shardStats.add(createShardStats(indexMetadata, i, shardWriteLoad, firstDataNodeId));
                 }
                 TransportIndicesStatsAction instance = internalCluster().getInstance(TransportIndicesStatsAction.class, firstDataNodeName);
                 channel.sendResponse(instance.new NodeResponse(firstDataNodeId, indexMetadata.getNumberOfShards(), shardStats, List.of()));

+ 12 - 1
server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java

@@ -124,10 +124,21 @@ public class ShardMovementWriteLoadSimulator {
         float shardWriteLoadDelta,
         int numberOfWriteThreads
     ) {
-        float newNodeUtilization = nodeUtilization + (shardWriteLoadDelta / numberOfWriteThreads);
+        float newNodeUtilization = nodeUtilization + calculateUtilizationForWriteLoad(shardWriteLoadDelta, numberOfWriteThreads);
         return (float) Math.max(newNodeUtilization, 0.0);
     }
 
+    /**
+     * Calculate what percentage utilization increase would result from adding some amount of write-load
+     *
+     * @param totalShardWriteLoad The write-load being added/removed
+     * @param numberOfThreads The number of threads in the node-being-added-to's write thread pool
+     * @return The change in percentage utilization
+     */
+    public static float calculateUtilizationForWriteLoad(float totalShardWriteLoad, int numberOfThreads) {
+        return totalShardWriteLoad / numberOfThreads;
+    }
+
     /**
      * Adjust the max thread pool queue latency by accounting for whether shard has moved away from the node.
      * @param maxThreadPoolQueueLatencyMillis The current max thread pool queue latency.