|
@@ -11,7 +11,11 @@ import org.elasticsearch.ResourceAlreadyExistsException;
|
|
|
import org.elasticsearch.Version;
|
|
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
|
|
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
|
|
|
+import org.elasticsearch.cluster.node.DiscoveryNodes;
|
|
|
+import org.elasticsearch.cluster.routing.allocation.decider.AwarenessAllocationDecider;
|
|
|
import org.elasticsearch.common.collect.MapBuilder;
|
|
|
+import org.elasticsearch.common.settings.ClusterSettings;
|
|
|
+import org.elasticsearch.common.settings.Settings;
|
|
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
|
|
import org.elasticsearch.test.ESTestCase;
|
|
|
import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
|
|
@@ -20,12 +24,14 @@ import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingInfo;
|
|
|
import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingState;
|
|
|
import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
|
|
|
import org.elasticsearch.xpack.ml.MachineLearning;
|
|
|
+import org.elasticsearch.xpack.ml.autoscaling.NodeAvailabilityZoneMapper;
|
|
|
import org.elasticsearch.xpack.ml.job.NodeLoad;
|
|
|
|
|
|
import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.Optional;
|
|
|
+import java.util.Set;
|
|
|
|
|
|
import static org.hamcrest.Matchers.aMapWithSize;
|
|
|
import static org.hamcrest.Matchers.anEmptyMap;
|
|
@@ -37,10 +43,17 @@ import static org.hamcrest.Matchers.notNullValue;
|
|
|
|
|
|
public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
|
|
|
+ private NodeAvailabilityZoneMapper nodeAvailabilityZoneMapper = new NodeAvailabilityZoneMapper(
|
|
|
+ Settings.EMPTY,
|
|
|
+ new ClusterSettings(Settings.EMPTY, Set.of(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING)),
|
|
|
+ DiscoveryNodes.EMPTY_NODES
|
|
|
+ );
|
|
|
+
|
|
|
public void testRebalance_GivenNoAssignments() throws Exception {
|
|
|
TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
TrainedModelAssignmentMetadata.Builder.empty().build(),
|
|
|
Map.of(),
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
Optional.empty()
|
|
|
).rebalance().build();
|
|
|
assertThat(result.modelAssignments().isEmpty(), is(true));
|
|
@@ -68,14 +81,22 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
nodeLoads.put(buildNode("node-1", oneGbBytes, 4), NodeLoad.builder("node-1").setMaxMemory(oneGbBytes).build());
|
|
|
nodeLoads.put(buildNode("node-2", oneGbBytes, 4), NodeLoad.builder("node-2").setMaxMemory(oneGbBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(currentMetadata, equalTo(result));
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenAllAssignmentsAreSatisfied_GivenOutdatedRoutingEntry_ShouldRebalance() throws Exception {
|
|
|
+ long oneGbBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", oneGbBytes, 4);
|
|
|
+ DiscoveryNode node2 = buildNode("node-2", oneGbBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).add(node2).build());
|
|
|
+
|
|
|
String modelId1 = "model-1";
|
|
|
String modelId2 = "model-2";
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams1 = newParams(modelId1, 1024L, 1, 2);
|
|
@@ -93,13 +114,15 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long oneGbBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", oneGbBytes, 4), NodeLoad.builder("node-1").setMaxMemory(oneGbBytes).build());
|
|
|
- nodeLoads.put(buildNode("node-2", oneGbBytes, 4), NodeLoad.builder("node-2").setMaxMemory(oneGbBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(oneGbBytes).build());
|
|
|
+ nodeLoads.put(node2, NodeLoad.builder("node-2").setMaxMemory(oneGbBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(2)));
|
|
|
|
|
@@ -122,7 +145,8 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
.build();
|
|
|
expectThrows(
|
|
|
ResourceAlreadyExistsException.class,
|
|
|
- () -> new TrainedModelAssignmentRebalancer(currentMetadata, Map.of(), Optional.of(taskParams)).rebalance()
|
|
|
+ () -> new TrainedModelAssignmentRebalancer(currentMetadata, Map.of(), nodeAvailabilityZoneMapper, Optional.of(taskParams))
|
|
|
+ .rebalance()
|
|
|
);
|
|
|
}
|
|
|
|
|
@@ -131,9 +155,12 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams = newParams(modelId, 1024L, 1, 1);
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build();
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, Map.of(), Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ Map.of(),
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -144,16 +171,23 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenFirstModelToAdd_NotEnoughProcessors() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node = buildNode("node-1", nodeMemoryBytes, 3);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node).build());
|
|
|
+
|
|
|
String modelId = "model-to-add";
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams = newParams(modelId, 1024L, 1, 4);
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 3), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ nodeLoads.put(node, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -177,9 +211,12 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 3), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -203,9 +240,12 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).setError("error detecting load").build()
|
|
|
);
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -219,22 +259,23 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenProblemsOnMultipleNodes() throws Exception {
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", ByteSizeValue.ofGb(1).getBytes(), 8);
|
|
|
+ DiscoveryNode node2 = buildNode("node-2", ByteSizeValue.ofGb(10).getBytes(), 3);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).add(node2).build());
|
|
|
+
|
|
|
String modelId = "model-to-add";
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams = newParams(modelId, ByteSizeValue.ofGb(2).getBytes(), 1, 4);
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- nodeLoads.put(
|
|
|
- buildNode("node-1", ByteSizeValue.ofGb(1).getBytes(), 8),
|
|
|
- NodeLoad.builder("node-1").setMaxMemory(ByteSizeValue.ofGb(1).getBytes()).build()
|
|
|
- );
|
|
|
- nodeLoads.put(
|
|
|
- buildNode("node-2", ByteSizeValue.ofGb(10).getBytes(), 3),
|
|
|
- NodeLoad.builder("node-2").setMaxMemory(ByteSizeValue.ofGb(10).getBytes()).build()
|
|
|
- );
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(ByteSizeValue.ofGb(1).getBytes()).build());
|
|
|
+ nodeLoads.put(node2, NodeLoad.builder("node-2").setMaxMemory(ByteSizeValue.ofGb(10).getBytes()).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -252,16 +293,22 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenFirstModelToAdd_FitsFully() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).build());
|
|
|
+
|
|
|
String modelId = "model-to-add";
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams = newParams(modelId, 1024L, 1, 1);
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 4), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
TrainedModelAssignment assignment = result.getModelAssignment(modelId);
|
|
|
assertThat(assignment, is(notNullValue()));
|
|
@@ -275,6 +322,11 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenModelToAdd_AndPreviousAssignments_AndTwoNodes_AllFit() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 4);
|
|
|
+ DiscoveryNode node2 = buildNode("node-2", nodeMemoryBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).add(node2).build());
|
|
|
+
|
|
|
String modelToAddId = "model-to-add";
|
|
|
String previousModelId = "previous-model";
|
|
|
StartTrainedModelDeploymentAction.TaskParams taskParams = newParams(modelToAddId, 1024L, 1, 2);
|
|
@@ -287,13 +339,15 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 4), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
- nodeLoads.put(buildNode("node-2", nodeMemoryBytes, 4), NodeLoad.builder("node-2").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node2, NodeLoad.builder("node-2").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.of(taskParams))
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.of(taskParams)
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(2)));
|
|
|
|
|
@@ -326,6 +380,12 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenPreviousAssignments_AndNewNode() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 4);
|
|
|
+ DiscoveryNode node2 = buildNode("node-2", nodeMemoryBytes, 4);
|
|
|
+ DiscoveryNode node3 = buildNode("node-3", nodeMemoryBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).add(node2).add(node3).build());
|
|
|
+
|
|
|
String previousModel1Id = "previous-model-1";
|
|
|
String previousModel2Id = "previous-model-2";
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty()
|
|
@@ -342,14 +402,16 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 4), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
- nodeLoads.put(buildNode("node-2", nodeMemoryBytes, 4), NodeLoad.builder("node-2").setMaxMemory(nodeMemoryBytes).build());
|
|
|
- nodeLoads.put(buildNode("node-3", nodeMemoryBytes, 4), NodeLoad.builder("node-3").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node2, NodeLoad.builder("node-2").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node3, NodeLoad.builder("node-3").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(2)));
|
|
|
|
|
@@ -386,6 +448,10 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenPreviousAssignments_AndRemovedNode_AndRemainingNodeNotLargeEnough() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).build());
|
|
|
+
|
|
|
String previousModel1Id = "previous-model-1";
|
|
|
String previousModel2Id = "previous-model-2";
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty()
|
|
@@ -402,12 +468,14 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 4), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(2)));
|
|
|
|
|
@@ -450,6 +518,10 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenPreviousAssignments_AndRemovedNode_AndRemainingNodeLargeEnough() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 7);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).build());
|
|
|
+
|
|
|
String previousModel1Id = "previous-model-1";
|
|
|
String previousModel2Id = "previous-model-2";
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty()
|
|
@@ -466,12 +538,14 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 7), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(2)));
|
|
|
|
|
@@ -500,6 +574,10 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
}
|
|
|
|
|
|
public void testRebalance_GivenFailedAssignment_RestartsAssignment() throws Exception {
|
|
|
+ long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
+ DiscoveryNode node1 = buildNode("node-1", nodeMemoryBytes, 4);
|
|
|
+ givenNodeAvailabilityZoneMapper(DiscoveryNodes.builder().add(node1).build());
|
|
|
+
|
|
|
String modelId = "model-1";
|
|
|
TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty()
|
|
|
.addNewAssignment(
|
|
@@ -509,12 +587,14 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
)
|
|
|
.build();
|
|
|
Map<DiscoveryNode, NodeLoad> nodeLoads = new HashMap<>();
|
|
|
- long nodeMemoryBytes = ByteSizeValue.ofGb(1).getBytes();
|
|
|
- nodeLoads.put(buildNode("node-1", nodeMemoryBytes, 4), NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
+ nodeLoads.put(node1, NodeLoad.builder("node-1").setMaxMemory(nodeMemoryBytes).build());
|
|
|
|
|
|
- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(currentMetadata, nodeLoads, Optional.empty())
|
|
|
- .rebalance()
|
|
|
- .build();
|
|
|
+ TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer(
|
|
|
+ currentMetadata,
|
|
|
+ nodeLoads,
|
|
|
+ nodeAvailabilityZoneMapper,
|
|
|
+ Optional.empty()
|
|
|
+ ).rebalance().build();
|
|
|
|
|
|
assertThat(result.modelAssignments(), is(aMapWithSize(1)));
|
|
|
|
|
@@ -529,6 +609,15 @@ public class TrainedModelAssignmentRebalancerTests extends ESTestCase {
|
|
|
assertThat(assignment.getReason().isPresent(), is(false));
|
|
|
}
|
|
|
|
|
|
+ private void givenNodeAvailabilityZoneMapper(DiscoveryNodes discoveryNodes) {
|
|
|
+ Settings settings = Settings.EMPTY;
|
|
|
+ ClusterSettings clusterSettings = new ClusterSettings(
|
|
|
+ settings,
|
|
|
+ Set.of(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING)
|
|
|
+ );
|
|
|
+ nodeAvailabilityZoneMapper = new NodeAvailabilityZoneMapper(settings, clusterSettings, discoveryNodes);
|
|
|
+ }
|
|
|
+
|
|
|
private static StartTrainedModelDeploymentAction.TaskParams newParams(
|
|
|
String modelId,
|
|
|
long modelSize,
|