|
@@ -14,7 +14,6 @@ import org.elasticsearch.ResourceAlreadyExistsException;
|
|
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
|
|
import org.elasticsearch.common.Strings;
|
|
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
|
|
-import org.elasticsearch.common.unit.Processors;
|
|
|
import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
|
|
|
import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingInfo;
|
|
|
import org.elasticsearch.xpack.core.ml.inference.assignment.RoutingState;
|
|
@@ -24,6 +23,7 @@ import org.elasticsearch.xpack.ml.autoscaling.NodeAvailabilityZoneMapper;
|
|
|
import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan;
|
|
|
import org.elasticsearch.xpack.ml.inference.assignment.planning.ZoneAwareAssignmentPlanner;
|
|
|
import org.elasticsearch.xpack.ml.job.NodeLoad;
|
|
|
+import org.elasticsearch.xpack.ml.utils.MlProcessors;
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.Collection;
|
|
@@ -142,7 +142,7 @@ class TrainedModelAssignmentRebalancer {
|
|
|
// We subtract native inference memory as the planner expects available memory for
|
|
|
// native inference including current assignments.
|
|
|
getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load),
|
|
|
- getNodeAllocatedProcessors(discoveryNode).roundUp()
|
|
|
+ MlProcessors.get(discoveryNode).roundUp()
|
|
|
)
|
|
|
);
|
|
|
} else {
|
|
@@ -158,20 +158,6 @@ class TrainedModelAssignmentRebalancer {
|
|
|
}));
|
|
|
}
|
|
|
|
|
|
- private static Processors getNodeAllocatedProcessors(DiscoveryNode node) {
|
|
|
- String allocatedProcessorsString = node.getAttributes().get(MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR);
|
|
|
- try {
|
|
|
- double allocatedProcessorsAsDouble = allocatedProcessorsString == null ? 0.0 : Double.parseDouble(allocatedProcessorsString);
|
|
|
- return allocatedProcessorsAsDouble > 0 ? Processors.of(allocatedProcessorsAsDouble) : Processors.ZERO;
|
|
|
- } catch (NumberFormatException e) {
|
|
|
- assert e == null
|
|
|
- : MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR
|
|
|
- + " should parse because we set it internally: invalid value was "
|
|
|
- + allocatedProcessorsString;
|
|
|
- return Processors.ZERO;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) {
|
|
|
return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory();
|
|
|
}
|
|
@@ -267,7 +253,7 @@ class TrainedModelAssignmentRebalancer {
|
|
|
// But we should also check if we managed to assign a model during the rebalance for which
|
|
|
// we check if the node has used up any of its allocated processors.
|
|
|
boolean isPerNodeOverheadAccountedFor = load.getNumAssignedJobsAndModels() > 0
|
|
|
- || assignmentPlan.getRemainingNodeCores(load.getNodeId()) < getNodeAllocatedProcessors(node).roundUp();
|
|
|
+ || assignmentPlan.getRemainingNodeCores(load.getNodeId()) < MlProcessors.get(node).roundUp();
|
|
|
long requiredMemory = model.memoryBytes() + (isPerNodeOverheadAccountedFor
|
|
|
? 0
|
|
|
: MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes());
|
|
@@ -296,7 +282,7 @@ class TrainedModelAssignmentRebalancer {
|
|
|
"This node has insufficient allocated processors. Available processors [{}], free processors [{}], "
|
|
|
+ "processors required for each allocation of this model [{}]",
|
|
|
new Object[] {
|
|
|
- getNodeAllocatedProcessors(node).roundUp(),
|
|
|
+ MlProcessors.get(node).roundUp(),
|
|
|
assignmentPlan.getRemainingNodeCores(node.getId()),
|
|
|
model.threadsPerAllocation() }
|
|
|
)
|