4 ani în urmă · a3bc38ca6b
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
@@ -9,6 +9,7 @@ package org.elasticsearch.xpack.ml.integration;
 
				 
			
 
				 import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
			
 
				 import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				+import org.elasticsearch.common.bytes.BytesArray;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				 import org.elasticsearch.core.TimeValue;
			
@@ -16,6 +17,15 @@ import org.elasticsearch.xpack.autoscaling.action.GetAutoscalingCapacityAction;
 
				 import org.elasticsearch.xpack.autoscaling.action.PutAutoscalingPolicyAction;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResults;
			
 
				+import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.PutTrainedModelDefinitionPartAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.PutTrainedModelVocabularyAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.TrainedModelType;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.allocation.AllocationStatus;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.PassThroughConfig;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
			
@@ -24,6 +34,8 @@ import org.elasticsearch.xpack.core.ml.job.config.Job;
 
				 import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				 import org.elasticsearch.xpack.ml.autoscaling.MlAutoscalingDeciderService;
			
 
				 import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity;
			
 
				+import org.junit.After;
			
 
				+import org.junit.Before;
			
 
				 
			
 
				 import java.util.Arrays;
			
 
				 import java.util.Collections;
			
@@ -34,6 +46,7 @@ import java.util.TreeSet;
 
				 import java.util.stream.Collectors;
			
 
				 
			
 
				 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
			
 
				+import static org.elasticsearch.xpack.ml.integration.PyTorchModelIT.BASE_64_ENCODED_MODEL;
			
 
				 import static org.hamcrest.Matchers.containsString;
			
 
				 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
			
 
				 import static org.hamcrest.Matchers.hasKey;
			
@@ -44,6 +57,31 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
				     private static final long NATIVE_PROCESS_OVERHEAD_MB = 30;
			
 
				     private static final long BASELINE_OVERHEAD_MB = BASIC_REQUIREMENT_MB + NATIVE_PROCESS_OVERHEAD_MB;
			
 
				 
			
 
				+    @Before
			
 
				+    public void putSettings() {
			
 
				+        client().admin()
			
 
				+            .cluster()
			
 
				+            .prepareUpdateSettings()
			
 
				+            .setTransientSettings(Settings.builder()
			
 
				+                .put(MachineLearning.MAX_LAZY_ML_NODES.getKey(), 100)
			
 
				+                .put("logger.org.elasticsearch.xpack.ml", "TRACE")
			
 
				+            )
			
 
				+            .get();
			
 
				+    }
			
 
				+
			
 
				+    @After
			
 
				+    public void removeSettings() {
			
 
				+        client().admin()
			
 
				+            .cluster()
			
 
				+            .prepareUpdateSettings()
			
 
				+            .setTransientSettings(Settings.builder()
			
 
				+                .putNull(MachineLearning.MAX_LAZY_ML_NODES.getKey())
			
 
				+                .putNull("logger.org.elasticsearch.xpack.ml")
			
 
				+            )
			
 
				+            .get();
			
 
				+        cleanUp();
			
 
				+    }
			
 
				+
			
 
				     // This test assumes that xpack.ml.max_machine_memory_percent is 30
			
 
				     // and that xpack.ml.use_auto_machine_memory_percent is false
			
 
				     public void testMLAutoscalingCapacity() throws Exception {
			
@@ -145,6 +183,62 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
				             0L);
			
 
				     }
			
 
				 
			
 
				+    public void testMLAutoscalingForLargeModelAllocation() {
			
 
				+        String modelId = "really_big_model";
			
 
				+        SortedMap<String, Settings> deciders = new TreeMap<>();
			
 
				+        deciders.put(
			
 
				+            MlAutoscalingDeciderService.NAME,
			
 
				+            Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build()
			
 
				+        );
			
 
				+        final PutAutoscalingPolicyAction.Request request = new PutAutoscalingPolicyAction.Request(
			
 
				+            "ml_test",
			
 
				+            new TreeSet<>(Arrays.asList("master", "data", "ingest", "ml")),
			
 
				+            deciders
			
 
				+        );
			
 
				+        assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet());
			
 
				+        putAndStartModelDeployment("smaller1", ByteSizeValue.ofMb(100).getBytes(), AllocationStatus.State.STARTED);
			
 
				+        putAndStartModelDeployment("smaller2", ByteSizeValue.ofMb(100).getBytes(), AllocationStatus.State.STARTED);
			
 
				+        long expectedTierBytes = (long) Math.ceil(
			
 
				+            ByteSizeValue.ofMb(100 + BASELINE_OVERHEAD_MB + 200 + BASELINE_OVERHEAD_MB).getBytes() * 100 / 30.0
			
 
				+        );
			
 
				+        long expectedNodeBytes = (long) Math.ceil(ByteSizeValue.ofMb(200 + BASELINE_OVERHEAD_MB).getBytes() * 100 / 30.0);
			
 
				+
			
 
				+        assertMlCapacity(
			
 
				+            client().execute(GetAutoscalingCapacityAction.INSTANCE, new GetAutoscalingCapacityAction.Request()).actionGet(),
			
 
				+            "Requesting scale down as tier and/or node size could be smaller",
			
 
				+            expectedTierBytes,
			
 
				+            expectedNodeBytes
			
 
				+        );
			
 
				+
			
 
				+        long modelSize = ByteSizeValue.ofMb(50_000).getBytes();
			
 
				+        putAndStartModelDeployment(modelId, modelSize, AllocationStatus.State.STARTING);
			
 
				+
			
 
				+        List<DiscoveryNode> mlNodes = admin()
			
 
				+            .cluster()
			
 
				+            .prepareNodesInfo()
			
 
				+            .all()
			
 
				+            .get()
			
 
				+            .getNodes()
			
 
				+            .stream()
			
 
				+            .map(NodeInfo::getNode)
			
 
				+            .filter(MachineLearning::isMlNode)
			
 
				+            .collect(Collectors.toList());
			
 
				+        NativeMemoryCapacity currentScale = MlAutoscalingDeciderService.currentScale(mlNodes, 30, false);
			
 
				+        expectedTierBytes = (long)Math.ceil(
			
 
				+            (ByteSizeValue.ofMb(50_000 + BASIC_REQUIREMENT_MB).getBytes() + currentScale.getTier()) * 100 / 30.0
			
 
				+        );
			
 
				+        expectedNodeBytes = (long) (ByteSizeValue.ofMb(50_000 + BASELINE_OVERHEAD_MB).getBytes() * 100 / 30.0);
			
 
				+
			
 
				+        assertMlCapacity(
			
 
				+            client().execute(GetAutoscalingCapacityAction.INSTANCE, new GetAutoscalingCapacityAction.Request()).actionGet(),
			
 
				+            "requesting scale up as number of jobs in queues exceeded configured limit "
			
 
				+                + "or there is at least one trained model waiting for allocation "
			
 
				+                + "and current capacity is not large enough for waiting jobs",
			
 
				+            expectedTierBytes,
			
 
				+            expectedNodeBytes
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				     private void assertMlCapacity(GetAutoscalingCapacityAction.Response capacity, String reason, long tierBytes, long nodeBytes) {
			
 
				         assertThat(capacity.getResults(), hasKey("ml_test"));
			
 
				         AutoscalingDeciderResults autoscalingDeciderResults = capacity.getResults().get("ml_test");
			
@@ -175,4 +269,30 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
				 
			
 
				         putJob(job);
			
 
				     }
			
 
				+
			
 
				+    private void putAndStartModelDeployment(String modelId, long memoryUse, AllocationStatus.State state) {
			
 
				+        client().execute(
			
 
				+            PutTrainedModelAction.INSTANCE,
			
 
				+            new PutTrainedModelAction.Request(
			
 
				+                TrainedModelConfig.builder()
			
 
				+                    .setModelType(TrainedModelType.PYTORCH)
			
 
				+                    .setInferenceConfig(new PassThroughConfig(null, new BertTokenization(null, false, null)))
			
 
				+                    .setModelId(modelId)
			
 
				+                    .build(),
			
 
				+                false
			
 
				+            )
			
 
				+        ).actionGet();
			
 
				+        client().execute(
			
 
				+            PutTrainedModelDefinitionPartAction.INSTANCE,
			
 
				+            new PutTrainedModelDefinitionPartAction.Request(modelId, new BytesArray(BASE_64_ENCODED_MODEL), 0, memoryUse, 1)
			
 
				+        ).actionGet();
			
 
				+        client().execute(
			
 
				+            PutTrainedModelVocabularyAction.INSTANCE,
			
 
				+            new PutTrainedModelVocabularyAction.Request(modelId, List.of("these", "are", "my", "words"))
			
 
				+        ).actionGet();
			
 
				+        client().execute(
			
 
				+            StartTrainedModelDeploymentAction.INSTANCE,
			
 
				+            new StartTrainedModelDeploymentAction.Request(modelId).setWaitForState(state)
			
 
				+        ).actionGet();
			
 
				+    }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java
@@ -25,6 +25,7 @@ import org.elasticsearch.cluster.metadata.NodesShutdownMetadata;
 
				 import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				 import org.elasticsearch.cluster.service.ClusterService;
			
 
				 import org.elasticsearch.common.inject.Inject;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
			
 
				 import org.elasticsearch.core.TimeValue;
			
 
				 import org.elasticsearch.license.LicenseUtils;
			
@@ -74,11 +75,12 @@ public class TransportStartTrainedModelDeploymentAction
 
				     private final TrainedModelAllocationService trainedModelAllocationService;
			
 
				     private final NamedXContentRegistry xContentRegistry;
			
 
				     private final MlMemoryTracker memoryTracker;
			
 
				+    protected volatile int maxLazyMLNodes;
			
 
				 
			
 
				     @Inject
			
 
				     public TransportStartTrainedModelDeploymentAction(TransportService transportService, Client client, ClusterService clusterService,
			
 
				                                                       ThreadPool threadPool, ActionFilters actionFilters, XPackLicenseState licenseState,
			
 
				-                                                      IndexNameExpressionResolver indexNameExpressionResolver,
			
 
				+                                                      IndexNameExpressionResolver indexNameExpressionResolver, Settings settings,
			
 
				                                                       TrainedModelAllocationService trainedModelAllocationService,
			
 
				                                                       NamedXContentRegistry xContentRegistry, MlMemoryTracker memoryTracker) {
			
 
				         super(StartTrainedModelDeploymentAction.NAME, transportService, clusterService, threadPool, actionFilters,
			
@@ -89,6 +91,12 @@ public class TransportStartTrainedModelDeploymentAction
 
				         this.xContentRegistry = Objects.requireNonNull(xContentRegistry);
			
 
				         this.memoryTracker = Objects.requireNonNull(memoryTracker);
			
 
				         this.trainedModelAllocationService = Objects.requireNonNull(trainedModelAllocationService);
			
 
				+        this.maxLazyMLNodes = MachineLearning.MAX_LAZY_ML_NODES.get(settings);
			
 
				+        clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_LAZY_ML_NODES, this::setMaxLazyMLNodes);
			
 
				+    }
			
 
				+
			
 
				+    private void setMaxLazyMLNodes(int value) {
			
 
				+        this.maxLazyMLNodes = value;
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -198,7 +206,7 @@ public class TransportStartTrainedModelDeploymentAction
 
				         AllocationStatus.State state,
			
 
				         ActionListener<CreateTrainedModelAllocationAction.Response> listener
			
 
				     ) {
			
 
				-        DeploymentStartedPredicate predicate = new DeploymentStartedPredicate(modelId, state);
			
 
				+        DeploymentStartedPredicate predicate = new DeploymentStartedPredicate(modelId, state, maxLazyMLNodes);
			
 
				         trainedModelAllocationService.waitForAllocationCondition(modelId, predicate, timeout,
			
 
				             new TrainedModelAllocationService.WaitForAllocationListener() {
			
 
				                 @Override
			
@@ -254,10 +262,12 @@ public class TransportStartTrainedModelDeploymentAction
 
				         // for logging
			
 
				         private final String modelId;
			
 
				         private final AllocationStatus.State waitForState;
			
 
				+        private final int maxLazyMLNodes;
			
 
				 
			
 
				-        DeploymentStartedPredicate(String modelId, AllocationStatus.State waitForState) {
			
 
				+        DeploymentStartedPredicate(String modelId, AllocationStatus.State waitForState, int maxLazyMLNodes) {
			
 
				             this.modelId = ExceptionsHelper.requireNonNull(modelId, "model_id");
			
 
				             this.waitForState = waitForState;
			
 
				+            this.maxLazyMLNodes = maxLazyMLNodes;
			
 
				         }
			
 
				 
			
 
				         @Override
			
@@ -292,10 +302,16 @@ public class TransportStartTrainedModelDeploymentAction
 
				                 );
			
 
				                 return true;
			
 
				             }
			
 
				+            Set<String> nodesShuttingDown = nodesShuttingDown(clusterState);
			
 
				+            List<DiscoveryNode> nodes = clusterState.nodes()
			
 
				+                .getAllNodes()
			
 
				+                .stream()
			
 
				+                .filter(d -> nodesShuttingDown.contains(d.getId()) == false)
			
 
				+                .filter(TaskParams::mayAllocateToNode)
			
 
				+                .collect(Collectors.toList());
			
 
				 
			
 
				             // No nodes allocated at all!
			
 
				-            // TODO when we support autoscaling for this, check for `maxLazyNodes` setting
			
 
				-            if (nodesAndState.isEmpty()) {
			
 
				+            if (nodesAndState.isEmpty() && maxLazyMLNodes <= nodes.size()) {
			
 
				                 String msg = "Could not start deployment because no suitable nodes were found, allocation explanation ["
			
 
				                     + trainedModelAllocation.getReason()
			
 
				                     + "]";
			
@@ -309,13 +325,6 @@ public class TransportStartTrainedModelDeploymentAction
 
				                 return true;
			
 
				             }
			
 
				 
			
 
				-            Set<String> nodesShuttingDown = nodesShuttingDown(clusterState);
			
 
				-            List<DiscoveryNode> nodes = clusterState.nodes()
			
 
				-                .getAllNodes()
			
 
				-                .stream()
			
 
				-                .filter(d -> nodesShuttingDown.contains(d.getId()) == false)
			
 
				-                .filter(TaskParams::mayAllocateToNode)
			
 
				-                .collect(Collectors.toList());
			
 
				             AllocationStatus allocationStatus = trainedModelAllocation.calculateAllocationStatus(nodes).orElse(null);
			
 
				             if (allocationStatus == null || allocationStatus.calculateState().compareTo(waitForState) >= 0) {
			
 
				                 return true;
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
@@ -31,9 +31,12 @@ import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderService;
 
				 import org.elasticsearch.xpack.core.ml.MlTasks;
			
 
				 import org.elasticsearch.xpack.core.ml.action.StartDatafeedAction.DatafeedParams;
			
 
				 import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.allocation.AllocationState;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.allocation.TrainedModelAllocation;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.JobState;
			
 
				 import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				+import org.elasticsearch.xpack.ml.inference.allocation.TrainedModelAllocationMetadata;
			
 
				 import org.elasticsearch.xpack.ml.job.NodeLoad;
			
 
				 import org.elasticsearch.xpack.ml.job.NodeLoadDetector;
			
 
				 import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
			
@@ -348,6 +351,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				         PersistentTasksCustomMetadata tasks = clusterState.getMetadata().custom(PersistentTasksCustomMetadata.TYPE);
			
 
				         Collection<PersistentTask<?>> anomalyDetectionTasks = anomalyDetectionTasks(tasks);
			
 
				         Collection<PersistentTask<?>> dataframeAnalyticsTasks = dataframeAnalyticsTasks(tasks);
			
 
				+        Map<String, TrainedModelAllocation> modelAllocations = TrainedModelAllocationMetadata.fromState(clusterState).modelAllocations();
			
 
				         final List<String> waitingAnomalyJobs = anomalyDetectionTasks.stream()
			
 
				             .filter(t -> AWAITING_LAZY_ASSIGNMENT.equals(t.getAssignment()))
			
 
				             .map(t -> MlTasks.jobId(t.getId()))
			
@@ -356,6 +360,13 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				             .filter(t -> AWAITING_LAZY_ASSIGNMENT.equals(t.getAssignment()))
			
 
				             .map(t -> MlTasks.dataFrameAnalyticsId(t.getId()))
			
 
				             .collect(Collectors.toList());
			
 
				+        final List<String> waitingAllocatedModels = modelAllocations
			
 
				+            .entrySet()
			
 
				+            .stream()
			
 
				+            // TODO: Eventually care about those that are STARTED but not FULLY_ALLOCATED
			
 
				+            .filter(e -> e.getValue().getAllocationState().equals(AllocationState.STARTING) && e.getValue().getNodeRoutingTable().isEmpty())
			
 
				+            .map(Map.Entry::getKey)
			
 
				+            .collect(Collectors.toList());
			
 
				 
			
 
				         final int numAnalyticsJobsInQueue = NUM_ANALYTICS_JOBS_IN_QUEUE.get(configuration);
			
 
				         final int numAnomalyJobsInQueue = NUM_ANOMALY_JOBS_IN_QUEUE.get(configuration);
			
@@ -366,20 +377,22 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				         final MlScalingReason.Builder reasonBuilder = MlScalingReason.builder()
			
 
				             .setWaitingAnomalyJobs(waitingAnomalyJobs)
			
 
				             .setWaitingAnalyticsJobs(waitingAnalyticsJobs)
			
 
				+            .setWaitingModels(waitingAllocatedModels)
			
 
				             .setCurrentMlCapacity(currentScale.autoscalingCapacity(maxMachineMemoryPercent, useAuto))
			
 
				             .setPassedConfiguration(configuration);
			
 
				 
			
 
				         // There are no ML nodes, scale up as quick as possible, no matter if memory is stale or not
			
 
				         if (nodes.isEmpty()
			
 
				             && (waitingAnomalyJobs.isEmpty() == false
			
 
				-            || waitingAnalyticsJobs.isEmpty() == false)) {
			
 
				-            return scaleUpFromZero(waitingAnomalyJobs, waitingAnalyticsJobs, reasonBuilder);
			
 
				+            || waitingAnalyticsJobs.isEmpty() == false
			
 
				+            || waitingAllocatedModels.isEmpty() == false)) {
			
 
				+            return scaleUpFromZero(waitingAnomalyJobs, waitingAnalyticsJobs, waitingAllocatedModels, reasonBuilder);
			
 
				         }
			
 
				 
			
 
				         // We don't need to check anything as there are no tasks
			
 
				         // This is a quick path to downscale.
			
 
				         // simply return `0` for scale down if delay is satisfied
			
 
				-        if (anomalyDetectionTasks.isEmpty() && dataframeAnalyticsTasks.isEmpty()) {
			
 
				+        if (anomalyDetectionTasks.isEmpty() && dataframeAnalyticsTasks.isEmpty() && modelAllocations.isEmpty()) {
			
 
				             long msLeftToScale = msLeftToDownScale(configuration);
			
 
				             if (msLeftToScale > 0) {
			
 
				                 return new AutoscalingDeciderResult(
			
@@ -462,6 +475,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				             nodeLoads,
			
 
				             waitingAnomalyJobs,
			
 
				             waitingAnalyticsJobs,
			
 
				+            waitingAllocatedModels,
			
 
				             futureFreedCapacity.orElse(null),
			
 
				             currentScale,
			
 
				             reasonBuilder
			
@@ -492,7 +506,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     .build()));
			
 
				         }
			
 
				 
			
 
				-        long largestJob = Math.max(
			
 
				+        long largestJobOrModel = Math.max(
			
 
				             anomalyDetectionTasks.stream()
			
 
				                 .filter(PersistentTask::isAssigned)
			
 
				                 // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
@@ -513,15 +527,20 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                 })
			
 
				                 .max()
			
 
				                 .orElse(0L));
			
 
				+        largestJobOrModel = Math.max(
			
 
				+            largestJobOrModel,
			
 
				+            modelAllocations.values().stream().mapToLong(t -> t.getTaskParams().estimateMemoryUsageBytes()).max().orElse(0L)
			
 
				+        );
			
 
				 
			
 
				         // This is an exceptionally weird state
			
 
				         // Our view of the memory is stale or we have tasks where the required job memory is 0, which should be impossible
			
 
				-        if (largestJob == 0L && (dataframeAnalyticsTasks.size() + anomalyDetectionTasks.size() > 0)) {
			
 
				+        if (largestJobOrModel == 0L && (dataframeAnalyticsTasks.size() + anomalyDetectionTasks.size() + modelAllocations.size() > 0)) {
			
 
				             logger.warn(
			
 
				                 "The calculated minimum required node size was unexpectedly [0] as there are "
			
 
				-                    + "[{}] anomaly job tasks and [{}] data frame analytics tasks",
			
 
				+                    + "[{}] anomaly job tasks, [{}] data frame analytics tasks and [{}] model allocations",
			
 
				                 anomalyDetectionTasks.size(),
			
 
				-                dataframeAnalyticsTasks.size()
			
 
				+                dataframeAnalyticsTasks.size(),
			
 
				+                modelAllocations.size()
			
 
				             );
			
 
				             return noScaleResultOrRefresh(reasonBuilder, true, new AutoscalingDeciderResult(
			
 
				                 context.currentCapacity(),
			
@@ -531,7 +550,12 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     .build()));
			
 
				         }
			
 
				 
			
 
				-        final Optional<AutoscalingDeciderResult> maybeScaleDown = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder)
			
 
				+        final Optional<AutoscalingDeciderResult> maybeScaleDown = checkForScaleDown(
			
 
				+            nodeLoads,
			
 
				+            largestJobOrModel,
			
 
				+            currentScale,
			
 
				+            reasonBuilder
			
 
				+        )
			
 
				             // Due to weird rounding errors, it may be that a scale down result COULD cause a scale up
			
 
				             // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
			
 
				             // This is safe as we know that ALL jobs are assigned at the current capacity
			
@@ -643,6 +667,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				     // can eventually start, and given the current cluster, no job can eventually start.
			
 
				     AutoscalingDeciderResult scaleUpFromZero(List<String> waitingAnomalyJobs,
			
 
				                                              List<String> waitingAnalyticsJobs,
			
 
				+                                             List<String> waitingAllocatedModels,
			
 
				                                              MlScalingReason.Builder reasonBuilder) {
			
 
				         final Optional<NativeMemoryCapacity> analyticsCapacity = requiredCapacityForUnassignedJobs(waitingAnalyticsJobs,
			
 
				             this::getAnalyticsMemoryRequirement,
			
@@ -650,9 +675,13 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				         final Optional<NativeMemoryCapacity> anomalyCapacity = requiredCapacityForUnassignedJobs(waitingAnomalyJobs,
			
 
				             this::getAnomalyMemoryRequirement,
			
 
				             0);
			
 
				+        final Optional<NativeMemoryCapacity> allocatedModelCapacity = requiredCapacityForUnassignedJobs(waitingAllocatedModels,
			
 
				+            this::getAllocatedModelRequirement,
			
 
				+            0);
			
 
				         NativeMemoryCapacity updatedCapacity = NativeMemoryCapacity.ZERO
			
 
				             .merge(anomalyCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				-            .merge(analyticsCapacity.orElse(NativeMemoryCapacity.ZERO));
			
 
				+            .merge(analyticsCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				+            .merge(allocatedModelCapacity.orElse(NativeMemoryCapacity.ZERO));
			
 
				         // If we still have calculated zero, this means the ml memory tracker does not have the required info.
			
 
				         // So, request a scale for the default. This is only for the 0 -> N scaling case.
			
 
				         if (updatedCapacity.getNode() == 0L) {
			
@@ -681,13 +710,15 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                                                        List<NodeLoad> nodeLoads,
			
 
				                                                        List<String> waitingAnomalyJobs,
			
 
				                                                        List<String> waitingAnalyticsJobs,
			
 
				+                                                       List<String> waitingAllocatedModels,
			
 
				                                                        @Nullable NativeMemoryCapacity futureFreedCapacity,
			
 
				                                                        NativeMemoryCapacity currentScale,
			
 
				                                                        MlScalingReason.Builder reasonBuilder) {
			
 
				 
			
 
				         // Are we in breach of maximum waiting jobs?
			
 
				         if (waitingAnalyticsJobs.size() > numAnalyticsJobsInQueue
			
 
				-            || waitingAnomalyJobs.size() > numAnomalyJobsInQueue) {
			
 
				+            || waitingAnomalyJobs.size() > numAnomalyJobsInQueue
			
 
				+            || waitingAllocatedModels.size() > 0) {
			
 
				 
			
 
				             Tuple<NativeMemoryCapacity, List<NodeLoad>> anomalyCapacityAndNewLoad = determineUnassignableJobs(
			
 
				                 waitingAnomalyJobs,
			
@@ -701,8 +732,15 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                 numAnalyticsJobsInQueue,
			
 
				                 anomalyCapacityAndNewLoad.v2()).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, anomalyCapacityAndNewLoad.v2()));
			
 
				 
			
 
				+            Tuple<NativeMemoryCapacity, List<NodeLoad>> modelCapacityAndNewLoad = determineUnassignableJobs(
			
 
				+                waitingAllocatedModels,
			
 
				+                this::getAllocatedModelRequirement,
			
 
				+                0,
			
 
				+                analyticsCapacityAndNewLoad.v2()).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, analyticsCapacityAndNewLoad.v2()));
			
 
				+
			
 
				             if (analyticsCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				-                && anomalyCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)) {
			
 
				+                && anomalyCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				+                && modelCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)) {
			
 
				                 logger.debug("no_scale event as current capacity, even though there are waiting jobs, is adequate to run the queued jobs");
			
 
				                 return Optional.empty();
			
 
				             }
			
@@ -710,6 +748,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				             NativeMemoryCapacity updatedCapacity = NativeMemoryCapacity.from(currentScale)
			
 
				                 .merge(analyticsCapacityAndNewLoad.v1())
			
 
				                 .merge(anomalyCapacityAndNewLoad.v1())
			
 
				+                .merge(modelCapacityAndNewLoad.v1())
			
 
				                 // Since we require new capacity, it COULD be we require a brand new node
			
 
				                 // We should account for overhead in the tier capacity just in case.
			
 
				                 .merge(new NativeMemoryCapacity(MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(), 0));
			
@@ -720,13 +759,15 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     .setRequiredCapacity(requiredCapacity)
			
 
				                     .setSimpleReason(
			
 
				                         "requesting scale up as number of jobs in queues exceeded configured limit "
			
 
				-                            + "and current capacity is not large enough for waiting jobs"
			
 
				+                            + "or there is at least one trained model waiting for allocation "
			
 
				+                            + "and current capacity is not large enough for waiting jobs or models"
			
 
				                     )
			
 
				                     .build()
			
 
				             ));
			
 
				         }
			
 
				 
			
 
				         // Could the currently waiting jobs ever be assigned?
			
 
				+        // NOTE: the previous predicate catches if an allocated model isn't assigned
			
 
				         if (waitingAnalyticsJobs.isEmpty() == false || waitingAnomalyJobs.isEmpty() == false) {
			
 
				             // we are unable to determine new tier size, but maybe we can see if our nodes are big enough.
			
 
				             if (futureFreedCapacity == null) {
			
@@ -861,6 +902,10 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				         return mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(analyticsId);
			
 
				     }
			
 
				 
			
 
				+    private Long getAllocatedModelRequirement(String modelId) {
			
 
				+        return mlMemoryTracker.getTrainedModelAllocationMemoryRequirement(modelId);
			
 
				+    }
			
 
				+
			
 
				     private Long getAnalyticsMemoryRequirement(PersistentTask<?> task) {
			
 
				         return getAnalyticsMemoryRequirement(MlTasks.dataFrameAnalyticsId(task.getId()));
			
 
				     }
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlScalingReason.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlScalingReason.java
@@ -7,6 +7,7 @@
 
				 
			
 
				 package org.elasticsearch.xpack.ml.autoscaling;
			
 
				 
			
 
				+import org.elasticsearch.Version;
			
 
				 import org.elasticsearch.common.io.stream.StreamInput;
			
 
				 import org.elasticsearch.common.io.stream.StreamOutput;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
@@ -25,6 +26,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				     public static final String NAME = MlAutoscalingDeciderService.NAME;
			
 
				     static final String WAITING_ANALYTICS_JOBS = "waiting_analytics_jobs";
			
 
				     static final String WAITING_ANOMALY_JOBS = "waiting_anomaly_jobs";
			
 
				+    static final String WAITING_MODELS = "waiting_models";
			
 
				     static final String CONFIGURATION = "configuration";
			
 
				     static final String LARGEST_WAITING_ANALYTICS_JOB = "largest_waiting_analytics_job";
			
 
				     static final String LARGEST_WAITING_ANOMALY_JOB = "largest_waiting_anomaly_job";
			
@@ -34,6 +36,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				 
			
 
				     private final List<String> waitingAnalyticsJobs;
			
 
				     private final List<String> waitingAnomalyJobs;
			
 
				+    private final List<String> waitingModels;
			
 
				     private final Settings passedConfiguration;
			
 
				     private final Long largestWaitingAnalyticsJob;
			
 
				     private final Long largestWaitingAnomalyJob;
			
@@ -44,6 +47,11 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				     public MlScalingReason(StreamInput in) throws IOException {
			
 
				         this.waitingAnalyticsJobs = in.readStringList();
			
 
				         this.waitingAnomalyJobs = in.readStringList();
			
 
				+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
			
 
				+            this.waitingModels = in.readStringList();
			
 
				+        } else {
			
 
				+            this.waitingModels = List.of();
			
 
				+        }
			
 
				         this.passedConfiguration = Settings.readSettingsFromStream(in);;
			
 
				         this.currentMlCapacity = new AutoscalingCapacity(in);
			
 
				         this.requiredCapacity = in.readOptionalWriteable(AutoscalingCapacity::new);
			
@@ -54,6 +62,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				 
			
 
				     MlScalingReason(List<String> waitingAnalyticsJobs,
			
 
				                     List<String> waitingAnomalyJobs,
			
 
				+                    List<String> waitingModels,
			
 
				                     Settings passedConfiguration,
			
 
				                     Long largestWaitingAnalyticsJob,
			
 
				                     Long largestWaitingAnomalyJob,
			
@@ -62,6 +71,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				                     String simpleReason) {
			
 
				         this.waitingAnalyticsJobs = waitingAnalyticsJobs == null ? Collections.emptyList() : waitingAnalyticsJobs;
			
 
				         this.waitingAnomalyJobs = waitingAnomalyJobs == null ? Collections.emptyList() : waitingAnomalyJobs;
			
 
				+        this.waitingModels = waitingModels == null ? List.of() : waitingModels;
			
 
				         this.passedConfiguration = ExceptionsHelper.requireNonNull(passedConfiguration, CONFIGURATION);
			
 
				         this.largestWaitingAnalyticsJob = largestWaitingAnalyticsJob;
			
 
				         this.largestWaitingAnomalyJob = largestWaitingAnomalyJob;
			
@@ -81,6 +91,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				         MlScalingReason that = (MlScalingReason) o;
			
 
				         return Objects.equals(waitingAnalyticsJobs, that.waitingAnalyticsJobs) &&
			
 
				             Objects.equals(waitingAnomalyJobs, that.waitingAnomalyJobs) &&
			
 
				+            Objects.equals(waitingModels, that.waitingModels) &&
			
 
				             Objects.equals(passedConfiguration, that.passedConfiguration) &&
			
 
				             Objects.equals(largestWaitingAnalyticsJob, that.largestWaitingAnalyticsJob) &&
			
 
				             Objects.equals(largestWaitingAnomalyJob, that.largestWaitingAnomalyJob) &&
			
@@ -95,6 +106,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				             waitingAnomalyJobs,
			
 
				             passedConfiguration,
			
 
				             largestWaitingAnalyticsJob,
			
 
				+            waitingModels,
			
 
				             largestWaitingAnomalyJob,
			
 
				             currentMlCapacity,
			
 
				             requiredCapacity,
			
@@ -115,6 +127,9 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				     public void writeTo(StreamOutput out) throws IOException {
			
 
				         out.writeStringCollection(this.waitingAnalyticsJobs);
			
 
				         out.writeStringCollection(this.waitingAnomalyJobs);
			
 
				+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
			
 
				+            out.writeStringCollection(this.waitingModels);
			
 
				+        }
			
 
				         Settings.writeSettingsToStream(this.passedConfiguration, out);
			
 
				         this.currentMlCapacity.writeTo(out);
			
 
				         out.writeOptionalWriteable(this.requiredCapacity);
			
@@ -128,6 +143,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				         builder.startObject();
			
 
				         builder.field(WAITING_ANALYTICS_JOBS, waitingAnalyticsJobs);
			
 
				         builder.field(WAITING_ANOMALY_JOBS, waitingAnomalyJobs);
			
 
				+        builder.field(WAITING_MODELS, waitingModels);
			
 
				         builder.startObject(CONFIGURATION).value(passedConfiguration).endObject();
			
 
				         if (largestWaitingAnalyticsJob != null) {
			
 
				             builder.field(LARGEST_WAITING_ANALYTICS_JOB, largestWaitingAnalyticsJob);
			
@@ -152,6 +168,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				     static class Builder {
			
 
				         private List<String> waitingAnalyticsJobs = Collections.emptyList();
			
 
				         private List<String> waitingAnomalyJobs = Collections.emptyList();
			
 
				+        private List<String> waitingModels = Collections.emptyList();
			
 
				         private Settings passedConfiguration;
			
 
				         private Long largestWaitingAnalyticsJob;
			
 
				         private Long largestWaitingAnomalyJob;
			
@@ -169,6 +186,11 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				             return this;
			
 
				         }
			
 
				 
			
 
				+        public Builder setWaitingModels(List<String> waitingModels) {
			
 
				+            this.waitingModels = waitingModels;
			
 
				+            return this;
			
 
				+        }
			
 
				+
			
 
				         public Builder setPassedConfiguration(Settings passedConfiguration) {
			
 
				             this.passedConfiguration = passedConfiguration;
			
 
				             return this;
			
@@ -203,6 +225,7 @@ public class MlScalingReason implements AutoscalingDeciderResult.Reason {
 
				             return new MlScalingReason(
			
 
				                 waitingAnalyticsJobs,
			
 
				                 waitingAnomalyJobs,
			
 
				+                waitingModels,
			
 
				                 passedConfiguration,
			
 
				                 largestWaitingAnalyticsJob,
			
 
				                 largestWaitingAnomalyJob,
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -70,6 +70,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				     private static final long DEFAULT_NODE_SIZE = ByteSizeValue.ofGb(20).getBytes();
			
 
				     private static final long DEFAULT_JVM_SIZE = ByteSizeValue.ofMb((long)(DEFAULT_NODE_SIZE * 0.25)).getBytes();
			
 
				     private static final long DEFAULT_JOB_SIZE = ByteSizeValue.ofMb(200).getBytes();
			
 
				+    private static final long DEFAULT_MODEL_SIZE = ByteSizeValue.ofMb(200).getBytes();
			
 
				     private static final long OVERHEAD = ByteSizeValue.ofMb(30).getBytes();
			
 
				     private NodeLoadDetector nodeLoadDetector;
			
 
				     private ClusterService clusterService;
			
@@ -84,6 +85,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				         when(mlMemoryTracker.asyncRefresh()).thenReturn(true);
			
 
				         when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(any())).thenReturn(DEFAULT_JOB_SIZE);
			
 
				         when(mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(any())).thenReturn(DEFAULT_JOB_SIZE);
			
 
				+        when(mlMemoryTracker.getTrainedModelAllocationMemoryRequirement(any())).thenReturn(DEFAULT_JOB_SIZE);
			
 
				         nodeLoadDetector = mock(NodeLoadDetector.class);
			
 
				         when(nodeLoadDetector.getMlMemoryTracker()).thenReturn(mlMemoryTracker);
			
 
				         when(nodeLoadDetector.detectNodeLoad(any(), anyBoolean(), any(), anyInt(), anyInt(), anyBoolean()))
			
@@ -121,6 +123,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				             Collections.emptyList(),
			
 
				             Collections.emptyList(),
			
 
				             Collections.emptyList(),
			
 
				+            Collections.emptyList(),
			
 
				             null,
			
 
				             NativeMemoryCapacity.ZERO,
			
 
				             MlScalingReason.builder()),
			
@@ -148,6 +151,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -171,6 +175,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -194,6 +199,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -237,6 +243,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 nodesWithRoom,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -250,6 +257,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 nodesWithRoom,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -260,6 +268,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 nodesWithRoom,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -284,6 +293,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -297,6 +307,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -310,6 +321,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -336,6 +348,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 null,
			
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
@@ -348,6 +361,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 new NativeMemoryCapacity(ByteSizeValue.ofGb(3).getBytes(), ByteSizeValue.ofGb(1).getBytes()),
			
 
				                 new NativeMemoryCapacity(ByteSizeValue.ofGb(2).getBytes(), ByteSizeValue.ofGb(2).getBytes()),
			
 
				                 reasonBuilder);
			
@@ -358,6 +372,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 fullyLoadedNode,
			
 
				                 jobTasks,
			
 
				                 analytics,
			
 
				+                Collections.emptyList(),
			
 
				                 new NativeMemoryCapacity(ByteSizeValue.ofMb(1).getBytes(), ByteSizeValue.ofMb(1).getBytes()),
			
 
				                 new NativeMemoryCapacity(ByteSizeValue.ofGb(2).getBytes(), ByteSizeValue.ofGb(2).getBytes()),
			
 
				                 reasonBuilder);
			
@@ -367,6 +382,90 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				         }
			
 
				     }
			
 
				 
			
 
				+    public void testScaleUp_withWaitingModelAndAutoMemoryAndNoRoomInNodes() {
			
 
				+        when(mlMemoryTracker.getTrainedModelAllocationMemoryRequirement(any())).thenReturn(ByteSizeValue.ofGb(2).getBytes());
			
 
				+        List<NodeLoad> fullyLoadedNode = Arrays.asList(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedJobMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .build()
			
 
				+        );
			
 
				+        MlScalingReason.Builder reasonBuilder = new MlScalingReason.Builder().setPassedConfiguration(Settings.EMPTY)
			
 
				+            .setCurrentMlCapacity(AutoscalingCapacity.ZERO);
			
 
				+        MlAutoscalingDeciderService service = buildService();
			
 
				+        service.setUseAuto(true);
			
 
				+        Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(
			
 
				+            0,
			
 
				+            0,
			
 
				+            fullyLoadedNode,
			
 
				+            Collections.emptyList(),
			
 
				+            Collections.emptyList(),
			
 
				+            List.of("foo"),
			
 
				+            null,
			
 
				+            NativeMemoryCapacity.ZERO,
			
 
				+            reasonBuilder
			
 
				+        );
			
 
				+        assertFalse(decision.isEmpty());
			
 
				+        AutoscalingDeciderResult result = decision.get();
			
 
				+        long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+            result.requiredCapacity().node().memory().getBytes(),
			
 
				+            30,
			
 
				+            true
			
 
				+        );
			
 
				+        long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+            result.requiredCapacity().total().memory().getBytes(),
			
 
				+            30,
			
 
				+            true
			
 
				+        );
			
 
				+        assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				+        assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingModelsAndRoomInNodes() {
			
 
				+        MlScalingReason.Builder reasonBuilder = new MlScalingReason.Builder().setPassedConfiguration(Settings.EMPTY)
			
 
				+            .setCurrentMlCapacity(AutoscalingCapacity.ZERO);
			
 
				+        List<NodeLoad> nodesWithRoom = Arrays.asList(
			
 
				+            NodeLoad.builder("partially_filled")
			
 
				+                .setMaxMemory(ByteSizeValue.ofMb(430).getBytes())
			
 
				+                .setUseMemory(true)
			
 
				+                .setMaxJobs(10)
			
 
				+                .incNumAssignedJobs()
			
 
				+                .incAssignedJobMemory(ByteSizeValue.ofMb(230).getBytes())
			
 
				+                .build(),
			
 
				+            NodeLoad.builder("not_filled").setMaxMemory(ByteSizeValue.ofMb(230).getBytes()).setMaxJobs(10).setUseMemory(true).build()
			
 
				+        );
			
 
				+        MlAutoscalingDeciderService service = buildService();
			
 
				+        service.setMaxMachineMemoryPercent(25);
			
 
				+        Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(
			
 
				+            0,
			
 
				+            0,
			
 
				+            nodesWithRoom,
			
 
				+            Collections.emptyList(),
			
 
				+            Collections.emptyList(),
			
 
				+            List.of("foo", "bar", "baz"),
			
 
				+            null,
			
 
				+            NativeMemoryCapacity.ZERO,
			
 
				+            reasonBuilder
			
 
				+        );
			
 
				+        assertTrue(decision.isPresent());
			
 
				+        assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo((DEFAULT_JOB_SIZE + OVERHEAD) * 4));
			
 
				+        assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(4 * (DEFAULT_JOB_SIZE + OVERHEAD)));
			
 
				+        assertFalse(
			
 
				+            service.checkForScaleUp(
			
 
				+                1,
			
 
				+                0,
			
 
				+                nodesWithRoom,
			
 
				+                Collections.emptyList(),
			
 
				+                Collections.emptyList(),
			
 
				+                List.of("foo", "bar"),
			
 
				+                null,
			
 
				+                NativeMemoryCapacity.ZERO,
			
 
				+                reasonBuilder
			
 
				+            ).isPresent()
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				     public void testScaleDown() {
			
 
				         List<NodeLoad> nodeLoads = Arrays.asList(
			
 
				             NodeLoad.builder("foo").setMaxMemory(DEFAULT_NODE_SIZE).incAssignedJobMemory(ByteSizeValue.ofGb(1).getBytes()).build(),
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlScalingReasonTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlScalingReasonTests.java
@@ -35,6 +35,7 @@ public class MlScalingReasonTests extends AbstractWireSerializingTestCase<MlScal
 
				     @Override
			
 
				     protected MlScalingReason createTestInstance() {
			
 
				         return new MlScalingReason(
			
 
				+            randomBoolean() ? null : Stream.generate(() -> randomAlphaOfLength(10)).limit(5).collect(Collectors.toList()),
			
 
				             randomBoolean() ? null : Stream.generate(() -> randomAlphaOfLength(10)).limit(5).collect(Collectors.toList()),
			
 
				             randomBoolean() ? null : Stream.generate(() -> randomAlphaOfLength(10)).limit(5).collect(Collectors.toList()),
			
 
				             randomConfiguration(),