3 년 전 · fb4addab65
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java
@@ -8,6 +8,7 @@
 
				 package org.elasticsearch.xpack.ml.autoscaling;
			
 
				 
			
 
				 import org.elasticsearch.cluster.ClusterState;
			
 
				+import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				 import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
			
 
				 import org.elasticsearch.xpack.core.ml.MlTasks;
			
 
				 import org.elasticsearch.xpack.core.ml.action.OpenJobAction;
			
@@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignme
 
				 import org.elasticsearch.xpack.core.ml.job.config.JobState;
			
 
				 import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeState;
			
 
				 import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams;
			
 
				+import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				 import org.elasticsearch.xpack.ml.inference.assignment.TrainedModelAssignmentMetadata;
			
 
				 
			
 
				 import java.util.Collection;
			
@@ -42,6 +44,9 @@ class MlAutoscalingContext {
 
				     final List<String> waitingAnalyticsJobs;
			
 
				     final List<String> waitingAllocatedModels;
			
 
				 
			
 
				+    final List<DiscoveryNode> mlNodes;
			
 
				+    final PersistentTasksCustomMetadata persistentTasks;
			
 
				+
			
 
				     MlAutoscalingContext() {
			
 
				         anomalyDetectionTasks = List.of();
			
 
				         snapshotUpgradeTasks = List.of();
			
@@ -52,6 +57,9 @@ class MlAutoscalingContext {
 
				         waitingSnapshotUpgrades = List.of();
			
 
				         waitingAnalyticsJobs = List.of();
			
 
				         waitingAllocatedModels = List.of();
			
 
				+
			
 
				+        mlNodes = List.of();
			
 
				+        persistentTasks = null;
			
 
				     }
			
 
				 
			
 
				     MlAutoscalingContext(ClusterState clusterState) {
			
@@ -79,6 +87,9 @@ class MlAutoscalingContext {
 
				             .filter(e -> e.getValue().getAssignmentState().equals(AssignmentState.STARTING) && e.getValue().getNodeRoutingTable().isEmpty())
			
 
				             .map(Map.Entry::getKey)
			
 
				             .toList();
			
 
				+
			
 
				+        mlNodes = getMlNodes(clusterState);
			
 
				+        persistentTasks = clusterState.getMetadata().custom(PersistentTasksCustomMetadata.TYPE);
			
 
				     }
			
 
				 
			
 
				     private static Collection<PersistentTasksCustomMetadata.PersistentTask<?>> anomalyDetectionTasks(
			
@@ -152,4 +163,8 @@ class MlAutoscalingContext {
 
				             .map(Map.Entry::getKey)
			
 
				             .toList();
			
 
				     }
			
 
				+
			
 
				+    static List<DiscoveryNode> getMlNodes(final ClusterState clusterState) {
			
 
				+        return clusterState.nodes().mastersFirstStream().filter(MachineLearning::isMlNode).toList();
			
 
				+    }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
@@ -10,81 +10,37 @@ import org.apache.logging.log4j.LogManager;
 
				 import org.apache.logging.log4j.Logger;
			
 
				 import org.elasticsearch.cluster.ClusterState;
			
 
				 import org.elasticsearch.cluster.LocalNodeMasterListener;
			
 
				-import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
			
 
				 import org.elasticsearch.cluster.service.ClusterService;
			
 
				-import org.elasticsearch.common.Strings;
			
 
				 import org.elasticsearch.common.settings.Setting;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				-import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				 import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
			
 
				-import org.elasticsearch.core.Nullable;
			
 
				 import org.elasticsearch.core.TimeValue;
			
 
				-import org.elasticsearch.core.Tuple;
			
 
				-import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
			
 
				-import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult;
			
 
				 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderService;
			
 
				-import org.elasticsearch.xpack.core.ml.MlTasks;
			
 
				-import org.elasticsearch.xpack.core.ml.action.StartDatafeedAction.DatafeedParams;
			
 
				-import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
			
 
				-import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
			
 
				-import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				-import org.elasticsearch.xpack.ml.job.NodeLoad;
			
 
				 import org.elasticsearch.xpack.ml.job.NodeLoadDetector;
			
 
				 import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
			
 
				-import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				 
			
 
				-import java.time.Duration;
			
 
				 import java.time.Instant;
			
 
				-import java.util.ArrayList;
			
 
				-import java.util.Collection;
			
 
				-import java.util.Comparator;
			
 
				-import java.util.HashMap;
			
 
				-import java.util.Iterator;
			
 
				 import java.util.List;
			
 
				 import java.util.Locale;
			
 
				-import java.util.Map;
			
 
				-import java.util.Objects;
			
 
				-import java.util.Optional;
			
 
				-import java.util.OptionalLong;
			
 
				-import java.util.PriorityQueue;
			
 
				-import java.util.function.Consumer;
			
 
				-import java.util.function.Function;
			
 
				 import java.util.function.LongSupplier;
			
 
				-import java.util.stream.Collectors;
			
 
				-import java.util.stream.Stream;
			
 
				-
			
 
				-import static java.time.Instant.ofEpochMilli;
			
 
				-import static org.elasticsearch.common.xcontent.XContentElasticsearchExtension.DEFAULT_FORMATTER;
			
 
				-import static org.elasticsearch.core.Strings.format;
			
 
				-import static org.elasticsearch.xpack.ml.MachineLearning.MAX_OPEN_JOBS_PER_NODE;
			
 
				-import static org.elasticsearch.xpack.ml.MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD;
			
 
				 
			
 
				 public class MlAutoscalingDeciderService implements AutoscalingDeciderService, LocalNodeMasterListener {
			
 
				 
			
 
				     private static final Logger logger = LogManager.getLogger(MlAutoscalingDeciderService.class);
			
 
				-    private static final String MEMORY_STALE = "unable to make scaling decision as job memory requirements are stale";
			
 
				-    // If ensureScaleDown changes the calculation by more than this much, log the error
			
 
				-    private static final long ACCEPTABLE_DIFFERENCE = ByteSizeValue.ofMb(1).getBytes();
			
 
				 
			
 
				     public static final String NAME = "ml";
			
 
				     public static final Setting<Integer> NUM_ANOMALY_JOBS_IN_QUEUE = Setting.intSetting("num_anomaly_jobs_in_queue", 0, 0);
			
 
				     public static final Setting<Integer> NUM_ANALYTICS_JOBS_IN_QUEUE = Setting.intSetting("num_analytics_jobs_in_queue", 0, 0);
			
 
				     public static final Setting<TimeValue> DOWN_SCALE_DELAY = Setting.timeSetting("down_scale_delay", TimeValue.timeValueHours(1));
			
 
				 
			
 
				-    private final NodeLoadDetector nodeLoadDetector;
			
 
				-    private final MlMemoryTracker mlMemoryTracker;
			
 
				-    private final NodeAvailabilityZoneMapper nodeAvailabilityZoneMapper;
			
 
				     private final ScaleTimer scaleTimer;
			
 
				+    private final MlMemoryAutoscalingDecider memoryDecider;
			
 
				 
			
 
				     private volatile boolean isMaster;
			
 
				-    private volatile int maxMachineMemoryPercent;
			
 
				-    private volatile int maxOpenJobs;
			
 
				-    private volatile boolean useAuto;
			
 
				-    private volatile long mlNativeMemoryForLargestMlNode;
			
 
				 
			
 
				     public MlAutoscalingDeciderService(
			
 
				         MlMemoryTracker memoryTracker,
			
@@ -102,179 +58,15 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService, L
 
				         ClusterService clusterService,
			
 
				         LongSupplier timeSupplier
			
 
				     ) {
			
 
				-        this.nodeLoadDetector = nodeLoadDetector;
			
 
				-        this.mlMemoryTracker = nodeLoadDetector.getMlMemoryTracker();
			
 
				-        this.nodeAvailabilityZoneMapper = Objects.requireNonNull(nodeAvailabilityZoneMapper);
			
 
				-        this.maxMachineMemoryPercent = MachineLearning.MAX_MACHINE_MEMORY_PERCENT.get(settings);
			
 
				-        this.maxOpenJobs = MAX_OPEN_JOBS_PER_NODE.get(settings);
			
 
				-        this.useAuto = MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT.get(settings);
			
 
				-        setMaxMlNodeSize(MachineLearning.MAX_ML_NODE_SIZE.get(settings));
			
 
				         this.scaleTimer = new ScaleTimer(timeSupplier);
			
 
				-        clusterService.getClusterSettings()
			
 
				-            .addSettingsUpdateConsumer(MachineLearning.MAX_MACHINE_MEMORY_PERCENT, this::setMaxMachineMemoryPercent);
			
 
				-        clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_OPEN_JOBS_PER_NODE, this::setMaxOpenJobs);
			
 
				-        clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT, this::setUseAuto);
			
 
				-        clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_ML_NODE_SIZE, this::setMaxMlNodeSize);
			
 
				-        clusterService.addLocalNodeMasterListener(this);
			
 
				-    }
			
 
				-
			
 
				-    static OptionalLong getNodeJvmSize(DiscoveryNode node) {
			
 
				-        Map<String, String> nodeAttributes = node.getAttributes();
			
 
				-        String valueStr = nodeAttributes.get(MachineLearning.MAX_JVM_SIZE_NODE_ATTR);
			
 
				-        try {
			
 
				-            return OptionalLong.of(Long.parseLong(valueStr));
			
 
				-        } catch (NumberFormatException e) {
			
 
				-            assert e == null : "ml.max_jvm_size should parse because we set it internally: invalid value was " + valueStr;
			
 
				-            logger.debug(
			
 
				-                "could not parse stored string value [{}] in node attribute [{}]",
			
 
				-                valueStr,
			
 
				-                MachineLearning.MAX_JVM_SIZE_NODE_ATTR
			
 
				-            );
			
 
				-        }
			
 
				-        return OptionalLong.empty();
			
 
				-    }
			
 
				-
			
 
				-    static List<DiscoveryNode> getMlNodes(final ClusterState clusterState) {
			
 
				-        return clusterState.nodes().mastersFirstStream().filter(MachineLearning::isMlNode).toList();
			
 
				-    }
			
 
				-
			
 
				-    /**
			
 
				-     * @param unassignedJobs The list of unassigned jobs
			
 
				-     * @param sizeFunction   Function providing the memory required for a job
			
 
				-     * @param maxNumInQueue  The number of unassigned jobs allowed.
			
 
				-     * @return The capacity needed to reduce the length of `unassignedJobs` to `maxNumInQueue`
			
 
				-     */
			
 
				-    static Optional<NativeMemoryCapacity> requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				-        List<String> unassignedJobs,
			
 
				-        Function<String, Long> sizeFunction,
			
 
				-        int maxNumInQueue
			
 
				-    ) {
			
 
				-        if (unassignedJobs.isEmpty()) {
			
 
				-            return Optional.empty();
			
 
				-        }
			
 
				-        List<Long> jobSizes = unassignedJobs.stream()
			
 
				-            .map(sizeFunction)
			
 
				-            .map(l -> l == null ? 0L : l)
			
 
				-            .sorted(Comparator.comparingLong(Long::longValue).reversed())
			
 
				-            .collect(Collectors.toList());
			
 
				-
			
 
				-        long tierMemory = 0L;
			
 
				-        // Node memory needs to be AT LEAST the size of the largest job + the required overhead.
			
 
				-        long nodeMemory = jobSizes.get(0);
			
 
				-        Iterator<Long> iter = jobSizes.iterator();
			
 
				-        while (jobSizes.size() > maxNumInQueue && iter.hasNext()) {
			
 
				-            tierMemory += iter.next();
			
 
				-            iter.remove();
			
 
				-        }
			
 
				-        return Optional.of(new NativeMemoryCapacity(tierMemory, nodeMemory));
			
 
				-    }
			
 
				-
			
 
				-    static Optional<Tuple<NativeMemoryCapacity, List<NodeLoad>>> determineUnassignableJobs(
			
 
				-        List<String> unassignedJobs,
			
 
				-        Function<String, Long> sizeFunction,
			
 
				-        Consumer<NodeLoad.Builder> incrementCountFunction,
			
 
				-        int maxNumInQueue,
			
 
				-        List<NodeLoad> nodeLoads
			
 
				-    ) {
			
 
				-        if (unassignedJobs.isEmpty()) {
			
 
				-            return Optional.empty();
			
 
				-        }
			
 
				-        if (unassignedJobs.size() < maxNumInQueue) {
			
 
				-            return Optional.empty();
			
 
				-        }
			
 
				-        PriorityQueue<NodeLoad.Builder> mostFreeMemoryFirst = new PriorityQueue<>(
			
 
				-            nodeLoads.size(),
			
 
				-            // If we have no more remaining jobs, it's the same as having no more free memory
			
 
				-            Comparator.<NodeLoad.Builder>comparingLong(v -> v.remainingJobs() == 0 ? 0L : v.getFreeMemory()).reversed()
			
 
				+        this.memoryDecider = new MlMemoryAutoscalingDecider(
			
 
				+            settings,
			
 
				+            clusterService,
			
 
				+            nodeAvailabilityZoneMapper,
			
 
				+            nodeLoadDetector,
			
 
				+            scaleTimer
			
 
				         );
			
 
				-        for (NodeLoad load : nodeLoads) {
			
 
				-            mostFreeMemoryFirst.add(NodeLoad.builder(load));
			
 
				-        }
			
 
				-        List<Long> jobSizes = unassignedJobs.stream()
			
 
				-            .map(sizeFunction)
			
 
				-            .map(l -> l == null ? 0L : l)
			
 
				-            .sorted(Comparator.comparingLong(Long::longValue).reversed())
			
 
				-            .collect(Collectors.toList());
			
 
				-
			
 
				-        Iterator<Long> assignmentIter = jobSizes.iterator();
			
 
				-        while (jobSizes.size() > maxNumInQueue && assignmentIter.hasNext()) {
			
 
				-            long requiredMemory = assignmentIter.next();
			
 
				-            long requiredNativeCodeOverhead = 0;
			
 
				-            NodeLoad.Builder nodeLoad = mostFreeMemoryFirst.peek();
			
 
				-            assert nodeLoad != null : "unexpected null value while calculating assignable memory";
			
 
				-            // Add per-node overhead if this is the first assignment
			
 
				-            if (nodeLoad.getNumAssignedJobs() == 0) {
			
 
				-                requiredNativeCodeOverhead = NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes();
			
 
				-            }
			
 
				-            // Since we have the least loaded node (by memory) first, if it can't fit here, it can't fit anywhere
			
 
				-            if (nodeLoad.getFreeMemory() >= requiredMemory + requiredNativeCodeOverhead) {
			
 
				-                assignmentIter.remove();
			
 
				-                // Remove and add to the priority queue to make sure the biggest node with availability is first
			
 
				-                nodeLoad = mostFreeMemoryFirst.poll();
			
 
				-                incrementCountFunction.accept(nodeLoad);
			
 
				-                mostFreeMemoryFirst.add(
			
 
				-                    nodeLoad.incAssignedNativeCodeOverheadMemory(requiredNativeCodeOverhead)
			
 
				-                        .incAssignedAnomalyDetectorMemory(requiredMemory)
			
 
				-                );
			
 
				-            }
			
 
				-        }
			
 
				-        List<NodeLoad> adjustedLoads = mostFreeMemoryFirst.stream().map(NodeLoad.Builder::build).toList();
			
 
				-
			
 
				-        List<Long> unassignableMemory = new ArrayList<>();
			
 
				-        Iterator<Long> unassignableIter = jobSizes.iterator();
			
 
				-        // If we cannot assign enough jobs given the current cluster size
			
 
				-        while (jobSizes.size() > maxNumInQueue && unassignableIter.hasNext()) {
			
 
				-            unassignableMemory.add(unassignableIter.next());
			
 
				-            unassignableIter.remove();
			
 
				-        }
			
 
				-        if (unassignableMemory.isEmpty()) {
			
 
				-            // We don't need to scale but we have adjusted node load given what we could assign
			
 
				-            return Optional.of(Tuple.tuple(NativeMemoryCapacity.ZERO, adjustedLoads));
			
 
				-        }
			
 
				-        return Optional.of(
			
 
				-            Tuple.tuple(
			
 
				-                new NativeMemoryCapacity(
			
 
				-                    unassignableMemory.stream().mapToLong(Long::longValue).sum(),
			
 
				-                    // Node memory excluding overhead needs to be AT LEAST the size of the largest job.
			
 
				-                    unassignableMemory.get(0)
			
 
				-                ),
			
 
				-                adjustedLoads
			
 
				-            )
			
 
				-        );
			
 
				-    }
			
 
				-
			
 
				-    @SuppressWarnings("unchecked")
			
 
				-    private static Collection<PersistentTask<DatafeedParams>> datafeedTasks(PersistentTasksCustomMetadata tasksCustomMetadata) {
			
 
				-        if (tasksCustomMetadata == null) {
			
 
				-            return List.of();
			
 
				-        }
			
 
				-
			
 
				-        return tasksCustomMetadata.findTasks(MlTasks.DATAFEED_TASK_NAME, t -> true)
			
 
				-            .stream()
			
 
				-            .map(p -> (PersistentTask<DatafeedParams>) p)
			
 
				-            .toList();
			
 
				-    }
			
 
				-
			
 
				-    void setMaxMachineMemoryPercent(int maxMachineMemoryPercent) {
			
 
				-        this.maxMachineMemoryPercent = maxMachineMemoryPercent;
			
 
				-    }
			
 
				-
			
 
				-    void setMaxOpenJobs(int maxOpenJobs) {
			
 
				-        this.maxOpenJobs = maxOpenJobs;
			
 
				-    }
			
 
				-
			
 
				-    void setUseAuto(boolean useAuto) {
			
 
				-        this.useAuto = useAuto;
			
 
				-    }
			
 
				-
			
 
				-    void setMaxMlNodeSize(ByteSizeValue maxMlNodeSize) {
			
 
				-        long maxMlNodeSizeBytes = maxMlNodeSize.getBytes();
			
 
				-        // 0 means no known max size
			
 
				-        if (maxMlNodeSizeBytes <= 0) {
			
 
				-            mlNativeMemoryForLargestMlNode = Long.MAX_VALUE;
			
 
				-        } else {
			
 
				-            mlNativeMemoryForLargestMlNode = NativeMemoryCalculator.allowedBytesForMl(maxMlNodeSizeBytes, maxMachineMemoryPercent, useAuto);
			
 
				-        }
			
 
				+        clusterService.addLocalNodeMasterListener(this);
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -282,10 +74,6 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService, L
 
				         isMaster = true;
			
 
				     }
			
 
				 
			
 
				-    NativeMemoryCapacity currentScale(final List<DiscoveryNode> machineLearningNodes) {
			
 
				-        return NativeMemoryCapacity.currentScale(machineLearningNodes, maxMachineMemoryPercent, useAuto);
			
 
				-    }
			
 
				-
			
 
				     @Override
			
 
				     public void offMaster() {
			
 
				         isMaster = false;
			
@@ -296,244 +84,39 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService, L
 
				         if (isMaster == false) {
			
 
				             throw new IllegalArgumentException("request for scaling information is only allowed on the master node");
			
 
				         }
			
 
				+        logger.debug("request to scale received");
			
 
				         scaleTimer.markScale();
			
 
				-        scaleTimer.lastScaleToScaleIntervalMillis()
			
 
				-            .ifPresent(scaleInterval -> mlMemoryTracker.setAutoscalingCheckInterval(Duration.ofMillis(scaleInterval)));
			
 
				 
			
 
				         final ClusterState clusterState = context.state();
			
 
				-
			
 
				-        PersistentTasksCustomMetadata tasks = clusterState.getMetadata().custom(PersistentTasksCustomMetadata.TYPE);
			
 
				-        MlAutoscalingContext mlContext = new MlAutoscalingContext(clusterState);
			
 
				-
			
 
				-        final int numAnalyticsJobsInQueue = NUM_ANALYTICS_JOBS_IN_QUEUE.get(configuration);
			
 
				-        final int numAnomalyJobsInQueue = NUM_ANOMALY_JOBS_IN_QUEUE.get(configuration);
			
 
				-
			
 
				-        final List<DiscoveryNode> mlNodes = getMlNodes(clusterState);
			
 
				-        final NativeMemoryCapacity currentScale = currentScale(mlNodes);
			
 
				+        final MlAutoscalingContext mlContext = new MlAutoscalingContext(clusterState);
			
 
				+        final NativeMemoryCapacity currentNativeMemoryCapacity = memoryDecider.currentScale(mlContext.mlNodes);
			
 
				+        final MlMemoryAutoscalingCapacity currentMemoryCapacity = memoryDecider.capacityFromNativeMemory(currentNativeMemoryCapacity);
			
 
				 
			
 
				         final MlScalingReason.Builder reasonBuilder = MlScalingReason.builder(mlContext)
			
 
				             .setCurrentMlCapacity(
			
 
				-                currentScale.autoscalingCapacity(
			
 
				-                    maxMachineMemoryPercent,
			
 
				-                    useAuto,
			
 
				-                    mlNativeMemoryForLargestMlNode,
			
 
				-                    nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, currentMemoryCapacity.tierSize(), null),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, currentMemoryCapacity.nodeSize(), null)
			
 
				                 )
			
 
				             )
			
 
				             .setPassedConfiguration(configuration);
			
 
				 
			
 
				-        // There are no ML nodes, scale up as quick as possible, no matter if memory is stale or not
			
 
				-        if (mlNodes.isEmpty() && mlContext.hasWaitingTasks()) {
			
 
				-            return scaleUpFromZero(mlContext, reasonBuilder);
			
 
				-        }
			
 
				-
			
 
				         // We don't need to check anything as there are no tasks
			
 
				         if (mlContext.isEmpty()) {
			
 
				             // This is a quick path to downscale.
			
 
				             // simply return `0` for scale down if delay is satisfied
			
 
				-            return downscaleToZero(configuration, context, currentScale, reasonBuilder);
			
 
				-        }
			
 
				-
			
 
				-        // This is the sole check for memory staleness. It's possible that memory becomes stale while we execute the rest
			
 
				-        // of the code of this method, but it's best that all the code runs with the same view of whether the last refresh
			
 
				-        // was done in time.
			
 
				-        if (mlMemoryTracker.isRecentlyRefreshed() == false) {
			
 
				-            logger.debug(
			
 
				-                "view of job memory is stale given duration [{}]. Not attempting to make scaling decision",
			
 
				-                mlMemoryTracker.getStalenessDuration()
			
 
				-            );
			
 
				-            return buildDecisionAndRequestRefresh(reasonBuilder.setSimpleReason(MEMORY_STALE));
			
 
				-        }
			
 
				-        // We need the current node loads to determine if we need to scale up or down
			
 
				-        List<NodeLoad> nodeLoads = new ArrayList<>(mlNodes.size());
			
 
				-        boolean nodeLoadIsMemoryAccurate = true;
			
 
				-        for (DiscoveryNode node : mlNodes) {
			
 
				-            NodeLoad nodeLoad = nodeLoadDetector.detectNodeLoad(clusterState, node, maxOpenJobs, maxMachineMemoryPercent, useAuto);
			
 
				-            if (nodeLoad.getError() != null) {
			
 
				-                logger.warn("[{}] failed to gather node load limits, failure [{}]. Returning no scale", node.getId(), nodeLoad.getError());
			
 
				-                return buildDecisionAndRequestRefresh(
			
 
				-                    reasonBuilder.setSimpleReason(
			
 
				-                        "Passing currently perceived capacity as there was a failure gathering node limits [" + nodeLoad.getError() + "]"
			
 
				-                    )
			
 
				-                );
			
 
				-            }
			
 
				-            nodeLoads.add(nodeLoad);
			
 
				-            if (nodeLoad.isUseMemory() == false) {
			
 
				-                nodeLoadIsMemoryAccurate = false;
			
 
				-                logger.debug("[{}] failed to gather node load - memory usage for one or more tasks not available.", node.getId());
			
 
				-            }
			
 
				-        }
			
 
				-        // This is an exceptional case, the memory tracking became stale between us checking previously and calculating the loads (for
			
 
				-        // example because a new job started that hasn't yet been added to the memory tracker). We should return a no scale in this case.
			
 
				-        if (nodeLoadIsMemoryAccurate == false) {
			
 
				-            return buildDecisionAndRequestRefresh(
			
 
				-                reasonBuilder.setSimpleReason(
			
 
				-                    "Passing currently perceived capacity as nodes were unable to provide an accurate view of their memory usage"
			
 
				-                )
			
 
				-            );
			
 
				-        }
			
 
				-
			
 
				-        final Optional<AutoscalingDeciderResult> scaleUpDecision = checkForScaleUp(
			
 
				-            numAnomalyJobsInQueue,
			
 
				-            numAnalyticsJobsInQueue,
			
 
				-            nodeLoads,
			
 
				-            mlContext.waitingAnomalyJobs,
			
 
				-            mlContext.waitingSnapshotUpgrades,
			
 
				-            mlContext.waitingAnalyticsJobs,
			
 
				-            mlContext.waitingAllocatedModels,
			
 
				-            calculateFutureAvailableCapacity(tasks, nodeLoads).orElse(null),
			
 
				-            currentScale,
			
 
				-            reasonBuilder
			
 
				-        );
			
 
				-        if (scaleUpDecision.isPresent()) {
			
 
				-            scaleTimer.resetScaleDownCoolDown();
			
 
				-            return scaleUpDecision.get();
			
 
				-        }
			
 
				-
			
 
				-        final List<String> partiallyAllocatedModels = mlContext.findPartiallyAllocatedModels();
			
 
				-
			
 
				-        // TODO for autoscaling by memory, we only care about if the model is allocated to at least one node (see above)
			
 
				-        // We should do this check in our autoscaling by processor count service, which will be a separate decider for readability's sake
			
 
				-        if (mlContext.waitingAnalyticsJobs.isEmpty() == false
			
 
				-            || mlContext.waitingSnapshotUpgrades.isEmpty() == false
			
 
				-            || mlContext.waitingAnomalyJobs.isEmpty() == false
			
 
				-            || partiallyAllocatedModels.isEmpty() == false) {
			
 
				-            // We don't want to continue to consider a scale down if there are now waiting jobs
			
 
				-            scaleTimer.resetScaleDownCoolDown();
			
 
				-            return new AutoscalingDeciderResult(
			
 
				-                context.currentCapacity(),
			
 
				-                reasonBuilder.setSimpleReason(
			
 
				-                    String.format(
			
 
				-                        Locale.ROOT,
			
 
				-                        "Passing currently perceived capacity as there are [%d] model snapshot upgrades, "
			
 
				-                            + "[%d] analytics and [%d] anomaly detection jobs in the queue, "
			
 
				-                            + "[%d] trained models not fully-allocated, "
			
 
				-                            + "but the number in the queue is less than the configured maximum allowed "
			
 
				-                            + "or the queued jobs will eventually be assignable at the current size.",
			
 
				-                        mlContext.waitingSnapshotUpgrades.size(),
			
 
				-                        mlContext.waitingAnalyticsJobs.size(),
			
 
				-                        mlContext.waitingAnomalyJobs.size(),
			
 
				-                        partiallyAllocatedModels.size()
			
 
				-                    )
			
 
				-                ).build()
			
 
				-            );
			
 
				+            return downscaleToZero(configuration, context, currentNativeMemoryCapacity, reasonBuilder);
			
 
				         }
			
 
				 
			
 
				-        long maxTaskMemoryBytes = maxMemoryBytes(mlContext);
			
 
				-
			
 
				-        // This state is invalid, but may occur due to complex bugs that have slipped through testing.
			
 
				-        // We could have tasks where the required job memory is 0, which should be impossible.
			
 
				-        // This can also happen if a job that is awaiting assignment ceases to have the AWAITING_LAZY_ASSIGNMENT
			
 
				-        // assignment explanation, for example because some other explanation overrides it. (This second situation
			
 
				-        // arises because, for example, anomalyDetectionTasks contains a task that is waiting but waitingAnomalyJobs
			
 
				-        // doesn't because its assignment explanation didn't match AWAITING_LAZY_ASSIGNMENT.)
			
 
				-        if (maxTaskMemoryBytes == 0L) {
			
 
				-            // We shouldn't need to check this condition because it's the exact opposite of the condition that
			
 
				-            // would have sent us down the scale down to zero branch higher up this method.
			
 
				-            assert mlContext.isEmpty() == false : "No tasks or models at all should have put us in the scale down to zero branch";
			
 
				-            logger.warn(
			
 
				-                "The calculated minimum required node size was unexpectedly [0] as there are [{}] anomaly job tasks, "
			
 
				-                    + "[{}] model snapshot upgrade tasks, [{}] data frame analytics tasks and [{}] model assignments",
			
 
				-                mlContext.anomalyDetectionTasks.size(),
			
 
				-                mlContext.snapshotUpgradeTasks.size(),
			
 
				-                mlContext.dataframeAnalyticsTasks.size(),
			
 
				-                mlContext.modelAssignments.size()
			
 
				-            );
			
 
				-            // This next message could obviously be pretty big, but should only get logged very rarely as it
			
 
				-            // requires both debug enabled and some other bug to exist to cause us to be in this branch
			
 
				-            logger.debug(
			
 
				-                () -> format(
			
 
				-                    "persistent tasks that caused unexpected scaling situation: [%s]",
			
 
				-                    (tasks == null) ? "null" : Strings.toString(tasks)
			
 
				-                )
			
 
				-            );
			
 
				-            return buildDecisionAndRequestRefresh(
			
 
				-                reasonBuilder.setSimpleReason(
			
 
				-                    "Passing currently perceived capacity as there are running analytics and anomaly jobs or deployed models, "
			
 
				-                        + "but their assignment explanations are unexpected or their memory usage estimates are inaccurate."
			
 
				-                )
			
 
				-            );
			
 
				-        }
			
 
				-
			
 
				-        final Optional<AutoscalingDeciderResult> maybeScaleDown = checkForScaleDown(
			
 
				-            nodeLoads,
			
 
				-            maxTaskMemoryBytes,
			
 
				-            currentScale,
			
 
				-            reasonBuilder
			
 
				-        )
			
 
				-            // Due to rounding bugs, it may be that a scale down result COULD cause a scale up.
			
 
				-            // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
			
 
				-            // This is safe as we know that ALL jobs are assigned at the current capacity.
			
 
				-            .map(result -> {
			
 
				-                AutoscalingCapacity capacity = ensureScaleDown(result.requiredCapacity(), context.currentCapacity());
			
 
				-                if (capacity == null) {
			
 
				-                    return null;
			
 
				-                }
			
 
				-                // TODO we should remove this when we can auto-scale (down and up) via a new CPU auto-scaling decider
			
 
				-                if (modelAssignmentsRequireMoreThanHalfCpu(mlContext.modelAssignments.values(), mlNodes)) {
			
 
				-                    logger.debug("not down-scaling; model assignments require more than half of the ML tier's allocated processors");
			
 
				-                    return null;
			
 
				-                }
			
 
				-                return new AutoscalingDeciderResult(capacity, result.reason());
			
 
				-            });
			
 
				-        if (maybeScaleDown.isPresent()) {
			
 
				-            final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get();
			
 
				-
			
 
				-            // Given maxOpenJobs, could we scale down to just one node?
			
 
				-            // We have no way of saying "we need X nodes"
			
 
				-            if (nodeLoads.size() > 1) {
			
 
				-                long totalAssignedJobs = nodeLoads.stream().mapToLong(NodeLoad::getNumAssignedJobsAndModels).sum();
			
 
				-                // one volatile read
			
 
				-                long maxOpenJobsCopy = this.maxOpenJobs;
			
 
				-                if (totalAssignedJobs > maxOpenJobsCopy) {
			
 
				-                    String msg = String.format(
			
 
				-                        Locale.ROOT,
			
 
				-                        "not scaling down as the total number of jobs [%d] exceeds the setting [%s (%d)]. "
			
 
				-                            + "To allow a scale down [%s] must be increased.",
			
 
				-                        totalAssignedJobs,
			
 
				-                        MAX_OPEN_JOBS_PER_NODE.getKey(),
			
 
				-                        maxOpenJobsCopy,
			
 
				-                        MAX_OPEN_JOBS_PER_NODE.getKey()
			
 
				-                    );
			
 
				-                    logger.info(
			
 
				-                        () -> format("%s Calculated potential scaled down capacity [%s]", msg, scaleDownDecisionResult.requiredCapacity())
			
 
				-                    );
			
 
				-                    return new AutoscalingDeciderResult(context.currentCapacity(), reasonBuilder.setSimpleReason(msg).build());
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-            long msLeftToScale = scaleTimer.markDownScaleAndGetMillisLeftFromDelay(configuration);
			
 
				-            if (msLeftToScale <= 0) {
			
 
				-                return scaleDownDecisionResult;
			
 
				-            }
			
 
				-            TimeValue downScaleDelay = DOWN_SCALE_DELAY.get(configuration);
			
 
				-            logger.debug(
			
 
				-                () -> format(
			
 
				-                    "not scaling down as the current scale down delay [%s] is not satisfied."
			
 
				-                        + " The last time scale down was detected [%s]. Calculated scaled down capacity [%s] ",
			
 
				-                    downScaleDelay.getStringRep(),
			
 
				-                    DEFAULT_FORMATTER.format(ofEpochMilli(scaleTimer.downScaleDetectedMillis())),
			
 
				-                    scaleDownDecisionResult.requiredCapacity()
			
 
				-                )
			
 
				-            );
			
 
				-            return new AutoscalingDeciderResult(
			
 
				-                context.currentCapacity(),
			
 
				-                reasonBuilder.setSimpleReason(
			
 
				-                    String.format(
			
 
				-                        Locale.ROOT,
			
 
				-                        "Passing currently perceived capacity as down scale delay has not been satisfied; configured delay [%s] "
			
 
				-                            + "last detected scale down event [%s]. Will request scale down in approximately [%s]",
			
 
				-                        downScaleDelay.getStringRep(),
			
 
				-                        XContentElasticsearchExtension.DEFAULT_FORMATTER.format(Instant.ofEpochMilli(scaleTimer.downScaleDetectedMillis())),
			
 
				-                        TimeValue.timeValueMillis(msLeftToScale).getStringRep()
			
 
				-                    )
			
 
				-                ).build()
			
 
				-            );
			
 
				-        }
			
 
				+        MlMemoryAutoscalingCapacity memoryCapacity = memoryDecider.scale(configuration, context, mlContext);
			
 
				+        reasonBuilder.setSimpleReason(memoryCapacity.reason());
			
 
				 
			
 
				         return new AutoscalingDeciderResult(
			
 
				-            context.currentCapacity(),
			
 
				-            reasonBuilder.setSimpleReason("Passing currently perceived capacity as no scaling changes are necessary").build()
			
 
				+            new AutoscalingCapacity(
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, memoryCapacity.tierSize(), null),
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, memoryCapacity.nodeSize(), null)
			
 
				+            ),
			
 
				+            reasonBuilder.build()
			
 
				         );
			
 
				     }
			
 
				 
			
@@ -575,541 +158,6 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService, L
 
				         );
			
 
				     }
			
 
				 
			
 
				-    private long maxMemoryBytes(MlAutoscalingContext mlContext) {
			
 
				-        long maxMemoryBytes = Math.max(
			
 
				-            mlContext.anomalyDetectionTasks.stream()
			
 
				-                .filter(PersistentTask::isAssigned)
			
 
				-                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				-                .mapToLong(t -> {
			
 
				-                    Long mem = this.getAnomalyMemoryRequirement(t);
			
 
				-                    assert mem != null : "unexpected null for anomaly memory requirement after recent stale check";
			
 
				-                    return mem;
			
 
				-                })
			
 
				-                .max()
			
 
				-                .orElse(0L),
			
 
				-            mlContext.snapshotUpgradeTasks.stream()
			
 
				-                .filter(PersistentTask::isAssigned)
			
 
				-                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				-                .mapToLong(t -> {
			
 
				-                    Long mem = this.getAnomalyMemoryRequirement(t);
			
 
				-                    assert mem != null : "unexpected null for anomaly memory requirement after recent stale check";
			
 
				-                    return mem;
			
 
				-                })
			
 
				-                .max()
			
 
				-                .orElse(0L)
			
 
				-        );
			
 
				-        maxMemoryBytes = Math.max(
			
 
				-            maxMemoryBytes,
			
 
				-            mlContext.dataframeAnalyticsTasks.stream()
			
 
				-                .filter(PersistentTask::isAssigned)
			
 
				-                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				-                .mapToLong(t -> {
			
 
				-                    Long mem = this.getAnalyticsMemoryRequirement(t);
			
 
				-                    assert mem != null : "unexpected null for analytics memory requirement after recent stale check";
			
 
				-                    return mem;
			
 
				-                })
			
 
				-                .max()
			
 
				-                .orElse(0L)
			
 
				-        );
			
 
				-        maxMemoryBytes = Math.max(
			
 
				-            maxMemoryBytes,
			
 
				-            mlContext.modelAssignments.values().stream().mapToLong(t -> t.getTaskParams().estimateMemoryUsageBytes()).max().orElse(0L)
			
 
				-        );
			
 
				-        return maxMemoryBytes;
			
 
				-    }
			
 
				-
			
 
				-    static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) {
			
 
				-        if (scaleDownResult == null || currentCapacity == null) {
			
 
				-            return null;
			
 
				-        }
			
 
				-        AutoscalingCapacity newCapacity = new AutoscalingCapacity(
			
 
				-            new AutoscalingCapacity.AutoscalingResources(
			
 
				-                currentCapacity.total().storage(),
			
 
				-                ByteSizeValue.ofBytes(Math.min(scaleDownResult.total().memory().getBytes(), currentCapacity.total().memory().getBytes())),
			
 
				-                null
			
 
				-            ),
			
 
				-            new AutoscalingCapacity.AutoscalingResources(
			
 
				-                currentCapacity.node().storage(),
			
 
				-                ByteSizeValue.ofBytes(Math.min(scaleDownResult.node().memory().getBytes(), currentCapacity.node().memory().getBytes())),
			
 
				-                null
			
 
				-            )
			
 
				-        );
			
 
				-        if (scaleDownResult.node().memory().getBytes() - newCapacity.node().memory().getBytes() > ACCEPTABLE_DIFFERENCE
			
 
				-            || scaleDownResult.total().memory().getBytes() - newCapacity.total().memory().getBytes() > ACCEPTABLE_DIFFERENCE) {
			
 
				-            logger.warn(
			
 
				-                "scale down accidentally requested a scale up, auto-corrected; initial scaling [{}], corrected [{}]",
			
 
				-                scaleDownResult,
			
 
				-                newCapacity
			
 
				-            );
			
 
				-        }
			
 
				-        return newCapacity;
			
 
				-    }
			
 
				-
			
 
				-    static boolean modelAssignmentsRequireMoreThanHalfCpu(Collection<TrainedModelAssignment> assignments, List<DiscoveryNode> mlNodes) {
			
 
				-        int totalRequiredProcessors = assignments.stream()
			
 
				-            .mapToInt(t -> t.getTaskParams().getNumberOfAllocations() * t.getTaskParams().getThreadsPerAllocation())
			
 
				-            .sum();
			
 
				-        int totalMlProcessors = mlNodes.stream().mapToInt(node -> {
			
 
				-            String allocatedProcessorsString = node.getAttributes().get(MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR);
			
 
				-            try {
			
 
				-                return Integer.parseInt(allocatedProcessorsString);
			
 
				-            } catch (NumberFormatException e) {
			
 
				-                assert e == null
			
 
				-                    : MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR
			
 
				-                        + " should parse because we set it internally: invalid value was ["
			
 
				-                        + allocatedProcessorsString
			
 
				-                        + "]";
			
 
				-                return 0;
			
 
				-            }
			
 
				-        }).sum();
			
 
				-        return totalRequiredProcessors * 2 > totalMlProcessors;
			
 
				-    }
			
 
				-
			
 
				-    // This doesn't allow any jobs to wait in the queue, this is because in a "normal" scaling event, we also verify if a job
			
 
				-    // can eventually start, and given the current cluster, no job can eventually start.
			
 
				-    AutoscalingDeciderResult scaleUpFromZero(MlAutoscalingContext mlContext, MlScalingReason.Builder reasonBuilder) {
			
 
				-        final Optional<NativeMemoryCapacity> analyticsCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				-            mlContext.waitingAnalyticsJobs,
			
 
				-            this::getAnalyticsMemoryRequirement,
			
 
				-            0
			
 
				-        );
			
 
				-        final Optional<NativeMemoryCapacity> anomalyCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				-            mlContext.waitingAnomalyJobs,
			
 
				-            this::getAnomalyMemoryRequirement,
			
 
				-            0
			
 
				-        );
			
 
				-        final Optional<NativeMemoryCapacity> snapshotUpgradeCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				-            mlContext.waitingSnapshotUpgrades,
			
 
				-            this::getAnomalyMemoryRequirement,
			
 
				-            0
			
 
				-        );
			
 
				-        final Optional<NativeMemoryCapacity> allocatedModelCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				-            mlContext.waitingAllocatedModels,
			
 
				-            this::getAllocatedModelRequirement,
			
 
				-            0
			
 
				-        );
			
 
				-        NativeMemoryCapacity updatedCapacity = anomalyCapacity.orElse(NativeMemoryCapacity.ZERO)
			
 
				-            .merge(snapshotUpgradeCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				-            .merge(analyticsCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				-            .merge(allocatedModelCapacity.orElse(NativeMemoryCapacity.ZERO));
			
 
				-        // If we still have calculated zero, this means the ml memory tracker does not have the required info.
			
 
				-        // So, request a scale for the default. This is only for the 0 -> N scaling case.
			
 
				-        if (updatedCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead() == 0L) {
			
 
				-            updatedCapacity = updatedCapacity.merge(
			
 
				-                new NativeMemoryCapacity(
			
 
				-                    ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes(),
			
 
				-                    ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes()
			
 
				-                )
			
 
				-            );
			
 
				-        }
			
 
				-        AutoscalingCapacity requiredCapacity = updatedCapacity.autoscalingCapacity(
			
 
				-            maxMachineMemoryPercent,
			
 
				-            useAuto,
			
 
				-            mlNativeMemoryForLargestMlNode,
			
 
				-            nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				-        );
			
 
				-        return new AutoscalingDeciderResult(
			
 
				-            requiredCapacity,
			
 
				-            reasonBuilder.setRequiredCapacity(requiredCapacity)
			
 
				-                .setSimpleReason(
			
 
				-                    "requesting scale up as number of jobs in queues exceeded configured limit and there are no machine learning nodes"
			
 
				-                )
			
 
				-                .build()
			
 
				-        );
			
 
				-    }
			
 
				-
			
 
				-    /**
			
 
				-     * @param numAnomalyJobsInQueue How many anomaly detection jobs (including model snapshot upgrades)
			
 
				-     *                              are permitted to queue for space to become available by other jobs
			
 
				-     *                              completing?
			
 
				-     * @param numAnalyticsJobsInQueue How many data frame analytics jobs are permitted to queue for space
			
 
				-     *                                to become available by other jobs completing?
			
 
				-     * @param nodeLoads Node loads on ML nodes in the current cluster.
			
 
				-     * @param waitingAnomalyJobs Job IDs of waiting anomaly detection jobs.
			
 
				-     * @param waitingSnapshotUpgrades Job IDs of waiting model snapshot upgrades.
			
 
				-     * @param waitingAnalyticsJobs Job IDs of waiting data frame analytics jobs.
			
 
				-     * @param waitingAllocatedModels IDs of waiting trained models that require a native process.
			
 
				-     * @param futureFreedCapacity Optionally, the combination of free memory and memory used by
			
 
				-     *                            jobs that are expected to terminate after completing a batch
			
 
				-     *                            analysis.
			
 
				-     * @param currentScale The current total ML <em>allowance</em> irrespective of what's in use.
			
 
				-     *                     It is <em>not</em> space already used or free space.
			
 
				-     * @param reasonBuilder Used to build the reason attached to any scaling decision that is made.
			
 
				-     * @return The scale up decision, or {@link Optional#empty} if no decision is made.
			
 
				-     */
			
 
				-    Optional<AutoscalingDeciderResult> checkForScaleUp(
			
 
				-        int numAnomalyJobsInQueue,
			
 
				-        int numAnalyticsJobsInQueue,
			
 
				-        List<NodeLoad> nodeLoads,
			
 
				-        List<String> waitingAnomalyJobs,
			
 
				-        List<String> waitingSnapshotUpgrades,
			
 
				-        List<String> waitingAnalyticsJobs,
			
 
				-        List<String> waitingAllocatedModels,
			
 
				-        @Nullable NativeMemoryCapacity futureFreedCapacity,
			
 
				-        NativeMemoryCapacity currentScale,
			
 
				-        MlScalingReason.Builder reasonBuilder
			
 
				-    ) {
			
 
				-        logger.debug(
			
 
				-            () -> format(
			
 
				-                "Checking for scale up -"
			
 
				-                    + " waiting data frame analytics jobs [%s]"
			
 
				-                    + " data frame analytics jobs allowed to queue [%s]"
			
 
				-                    + " waiting anomaly detection jobs (including model snapshot upgrades) [%s]"
			
 
				-                    + " anomaly detection jobs allowed to queue [%s]"
			
 
				-                    + " waiting models [%s]"
			
 
				-                    + " future freed capacity [%s]"
			
 
				-                    + " current scale [%s]",
			
 
				-                waitingAnalyticsJobs.size(),
			
 
				-                numAnalyticsJobsInQueue,
			
 
				-                waitingAnomalyJobs.size() + waitingSnapshotUpgrades.size(),
			
 
				-                numAnomalyJobsInQueue,
			
 
				-                waitingAllocatedModels.size(),
			
 
				-                futureFreedCapacity,
			
 
				-                currentScale
			
 
				-            )
			
 
				-        );
			
 
				-
			
 
				-        // Are we in breach of maximum waiting jobs?
			
 
				-        if (waitingAnalyticsJobs.size() > numAnalyticsJobsInQueue
			
 
				-            || waitingAnomalyJobs.size() + waitingSnapshotUpgrades.size() > numAnomalyJobsInQueue
			
 
				-            || waitingAllocatedModels.size() > 0) {
			
 
				-
			
 
				-            Tuple<NativeMemoryCapacity, List<NodeLoad>> anomalyCapacityAndNewLoad = determineUnassignableJobs(
			
 
				-                Stream.concat(waitingAnomalyJobs.stream(), waitingSnapshotUpgrades.stream()).toList(),
			
 
				-                this::getAnomalyMemoryRequirement,
			
 
				-                NodeLoad.Builder::incNumAssignedAnomalyDetectorJobs,
			
 
				-                numAnomalyJobsInQueue,
			
 
				-                nodeLoads
			
 
				-            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, nodeLoads));
			
 
				-
			
 
				-            Tuple<NativeMemoryCapacity, List<NodeLoad>> analyticsCapacityAndNewLoad = determineUnassignableJobs(
			
 
				-                waitingAnalyticsJobs,
			
 
				-                this::getAnalyticsMemoryRequirement,
			
 
				-                NodeLoad.Builder::incNumAssignedDataFrameAnalyticsJobs,
			
 
				-                numAnalyticsJobsInQueue,
			
 
				-                anomalyCapacityAndNewLoad.v2()
			
 
				-            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, anomalyCapacityAndNewLoad.v2()));
			
 
				-
			
 
				-            Tuple<NativeMemoryCapacity, List<NodeLoad>> modelCapacityAndNewLoad = determineUnassignableJobs(
			
 
				-                waitingAllocatedModels,
			
 
				-                this::getAllocatedModelRequirement,
			
 
				-                NodeLoad.Builder::incNumAssignedNativeInferenceModels,
			
 
				-                0,
			
 
				-                analyticsCapacityAndNewLoad.v2()
			
 
				-            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, analyticsCapacityAndNewLoad.v2()));
			
 
				-
			
 
				-            if (analyticsCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				-                && anomalyCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				-                && modelCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)) {
			
 
				-                logger.debug("no_scale event as current capacity, even though there are waiting jobs, is adequate to run the queued jobs");
			
 
				-                return Optional.empty();
			
 
				-            }
			
 
				-
			
 
				-            // We don't have enough information to get a perfect answer here. Even though there
			
 
				-            // are jobs that cannot be assigned, there is likely some free memory on the current
			
 
				-            // nodes. If we don't consider it then we can scale up a level too far. For example,
			
 
				-            // suppose we're currently on a 1GB node with a 21MB job running and a 970MB job waiting.
			
 
				-            // If we scale up to a 2GB node then both will fit. But if we don't consider the free
			
 
				-            // memory on the 1GB node then we'll scale up to 4GB, then later scale back down to 2GB.
			
 
				-            // However, there's a complication. Assigning jobs is in reality a bin-packing problem
			
 
				-            // but we're modelling it as a simple summation problem. If we had 970MB of free space
			
 
				-            // spread over multiple existing nodes then we very well might need to scale up to fit
			
 
				-            // a 970MB job, but subtracting the current free memory from the requirement would lead
			
 
				-            // to us not scaling up at all. We don't have enough control to solve this correctly,
			
 
				-            // but a heuristic that's better than doing nothing is to at least consider the amount
			
 
				-            // of free space on the current node with the most free space and subtract that from the
			
 
				-            // requirement. In our example with the 21MB and 970MB jobs on the 1GB node, we'll then
			
 
				-            // correctly scale to 2GB. The more nodes in the cluster the worse the heuristic will do
			
 
				-            // but it won't ever be worse than doing nothing, many clusters only have a small number
			
 
				-            // of ML nodes, and by the time we get to large nodes the scaling steps are big anyway
			
 
				-            // so we are less likely to incorrectly skip a level due to this problem.
			
 
				-            long maxFreeNodeMemAfterPossibleAssignments = modelCapacityAndNewLoad.v2()
			
 
				-                .stream()
			
 
				-                .filter(nodeLoad -> nodeLoad.getError() == null && nodeLoad.isUseMemory())
			
 
				-                .map(NodeLoad::getFreeMemoryExcludingPerNodeOverhead)
			
 
				-                .max(Long::compareTo)
			
 
				-                .orElse(0L);
			
 
				-            if (maxFreeNodeMemAfterPossibleAssignments > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead()
			
 
				-                || maxFreeNodeMemAfterPossibleAssignments > currentScale.getTierMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				-                assert false
			
 
				-                    : "highest free node memory after possible assignments ["
			
 
				-                        + maxFreeNodeMemAfterPossibleAssignments
			
 
				-                        + "] greater than current scale ["
			
 
				-                        + currentScale
			
 
				-                        + "]";
			
 
				-                // If we get here in production it means there's a bug somewhere else, but it's
			
 
				-                // better to scale in the pre-8.3 way than not scale at all if this happens
			
 
				-                logger.warn(
			
 
				-                    "Highest free node memory after possible assignments ["
			
 
				-                        + maxFreeNodeMemAfterPossibleAssignments
			
 
				-                        + "] greater than current scale ["
			
 
				-                        + currentScale
			
 
				-                        + "] - will scale up without considering current free memory"
			
 
				-                );
			
 
				-                maxFreeNodeMemAfterPossibleAssignments = 0;
			
 
				-            }
			
 
				-
			
 
				-            NativeMemoryCapacity updatedCapacity = new NativeMemoryCapacity(-maxFreeNodeMemAfterPossibleAssignments, 0).merge(currentScale)
			
 
				-                .merge(analyticsCapacityAndNewLoad.v1())
			
 
				-                .merge(anomalyCapacityAndNewLoad.v1())
			
 
				-                .merge(modelCapacityAndNewLoad.v1());
			
 
				-            AutoscalingCapacity requiredCapacity = updatedCapacity.autoscalingCapacity(
			
 
				-                maxMachineMemoryPercent,
			
 
				-                useAuto,
			
 
				-                mlNativeMemoryForLargestMlNode,
			
 
				-                nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				-            );
			
 
				-            return Optional.of(
			
 
				-                new AutoscalingDeciderResult(
			
 
				-                    requiredCapacity,
			
 
				-                    reasonBuilder.setRequiredCapacity(requiredCapacity)
			
 
				-                        .setSimpleReason(
			
 
				-                            "requesting scale up as number of jobs in queues exceeded configured limit "
			
 
				-                                + "or there is at least one trained model waiting for assignment "
			
 
				-                                + "and current capacity is not large enough for waiting jobs or models"
			
 
				-                        )
			
 
				-                        .build()
			
 
				-                )
			
 
				-            );
			
 
				-        }
			
 
				-
			
 
				-        // Could the currently waiting jobs ever be assigned?
			
 
				-        // NOTE: the previous predicate catches if an allocated model isn't assigned
			
 
				-        if (waitingAnalyticsJobs.isEmpty() == false
			
 
				-            || waitingSnapshotUpgrades.isEmpty() == false
			
 
				-            || waitingAnomalyJobs.isEmpty() == false) {
			
 
				-            // we are unable to determine new tier size, but maybe we can see if our nodes are big enough.
			
 
				-            if (futureFreedCapacity == null) {
			
 
				-                Optional<Long> maxSize = Stream.concat(
			
 
				-                    waitingAnalyticsJobs.stream().map(this::getAnalyticsMemoryRequirement),
			
 
				-                    Stream.concat(
			
 
				-                        waitingAnomalyJobs.stream().map(this::getAnomalyMemoryRequirement),
			
 
				-                        waitingSnapshotUpgrades.stream().map(this::getAnomalyMemoryRequirement)
			
 
				-                    )
			
 
				-                ).filter(Objects::nonNull).max(Long::compareTo);
			
 
				-                if (maxSize.isPresent() && maxSize.get() > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				-                    AutoscalingCapacity requiredCapacity = new NativeMemoryCapacity(
			
 
				-                        Math.max(currentScale.getTierMlNativeMemoryRequirementExcludingOverhead(), maxSize.get()),
			
 
				-                        maxSize.get()
			
 
				-                    ).autoscalingCapacity(
			
 
				-                        maxMachineMemoryPercent,
			
 
				-                        useAuto,
			
 
				-                        mlNativeMemoryForLargestMlNode,
			
 
				-                        nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				-                    );
			
 
				-                    return Optional.of(
			
 
				-                        new AutoscalingDeciderResult(
			
 
				-                            requiredCapacity,
			
 
				-                            reasonBuilder.setSimpleReason("requesting scale up as there is no node large enough to handle queued jobs")
			
 
				-                                .setRequiredCapacity(requiredCapacity)
			
 
				-                                .build()
			
 
				-                        )
			
 
				-                    );
			
 
				-                }
			
 
				-                // we have no info, allow the caller to make the appropriate action, probably returning a no_scale
			
 
				-                logger.debug(
			
 
				-                    "Cannot make a scaling decision as future freed capacity is not known and largest job could fit on an existing node"
			
 
				-                );
			
 
				-                return Optional.empty();
			
 
				-            }
			
 
				-            long newTierNeeded = -futureFreedCapacity.getTierMlNativeMemoryRequirementExcludingOverhead();
			
 
				-            // could any of the nodes actually run the job?
			
 
				-            long newNodeMax = currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead();
			
 
				-            for (String analyticsJob : waitingAnalyticsJobs) {
			
 
				-                Long requiredMemory = getAnalyticsMemoryRequirement(analyticsJob);
			
 
				-                // it is OK to continue here as we have not breached our queuing limit
			
 
				-                if (requiredMemory == null) {
			
 
				-                    continue;
			
 
				-                }
			
 
				-                newTierNeeded += requiredMemory;
			
 
				-                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				-            }
			
 
				-            for (String anomalyJob : waitingAnomalyJobs) {
			
 
				-                Long requiredMemory = getAnomalyMemoryRequirement(anomalyJob);
			
 
				-                // it is OK to continue here as we have not breached our queuing limit
			
 
				-                if (requiredMemory == null) {
			
 
				-                    continue;
			
 
				-                }
			
 
				-                newTierNeeded += requiredMemory;
			
 
				-                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				-            }
			
 
				-            for (String snapshotUpgrade : waitingSnapshotUpgrades) {
			
 
				-                Long requiredMemory = getAnomalyMemoryRequirement(snapshotUpgrade);
			
 
				-                // it is OK to continue here as we have not breached our queuing limit
			
 
				-                if (requiredMemory == null) {
			
 
				-                    continue;
			
 
				-                }
			
 
				-                newTierNeeded += requiredMemory;
			
 
				-                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				-            }
			
 
				-            if (newNodeMax > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead() || newTierNeeded > 0L) {
			
 
				-                NativeMemoryCapacity newCapacity = new NativeMemoryCapacity(Math.max(0L, newTierNeeded), newNodeMax);
			
 
				-                AutoscalingCapacity requiredCapacity = currentScale.merge(newCapacity)
			
 
				-                    .autoscalingCapacity(
			
 
				-                        maxMachineMemoryPercent,
			
 
				-                        useAuto,
			
 
				-                        mlNativeMemoryForLargestMlNode,
			
 
				-                        nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				-                    );
			
 
				-                return Optional.of(
			
 
				-                    new AutoscalingDeciderResult(
			
 
				-                        // We need more memory in the tier, or our individual node size requirements has increased
			
 
				-                        requiredCapacity,
			
 
				-                        reasonBuilder.setSimpleReason(
			
 
				-                            "scaling up as adequate space would not automatically become available when running jobs finish"
			
 
				-                        ).setRequiredCapacity(requiredCapacity).build()
			
 
				-                    )
			
 
				-                );
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        return Optional.empty();
			
 
				-    }
			
 
				-
			
 
				-    /**
			
 
				-     * This calculates the potential future free capacity.
			
 
				-     * Since jobs with lookback-only datafeeds, and data frame analytics jobs all have some potential future end date
			
 
				-     * we can assume (without user intervention) that these will eventually stop and free their currently occupied resources.
			
 
				-     *
			
 
				-     * The capacity is as follows:
			
 
				-     * - tier: The sum total of the resources that will eventually be available.
			
 
				-     * - node: The largest block of memory that will be free on a given node.
			
 
				-     * - If > 1 "batch" ml tasks are running on the same node, we sum their resources.
			
 
				-     */
			
 
				-    Optional<NativeMemoryCapacity> calculateFutureAvailableCapacity(Collection<DiscoveryNode> mlNodes, ClusterState clusterState) {
			
 
				-        return calculateFutureAvailableCapacity(
			
 
				-            clusterState.metadata().custom(PersistentTasksCustomMetadata.TYPE),
			
 
				-            mlNodes.stream()
			
 
				-                .map(node -> nodeLoadDetector.detectNodeLoad(clusterState, node, maxOpenJobs, maxMachineMemoryPercent, useAuto))
			
 
				-                .toList()
			
 
				-        );
			
 
				-    }
			
 
				-
			
 
				-    /**
			
 
				-     * This calculates the potential future free capacity.
			
 
				-     * Since jobs with lookback-only datafeeds, and data frame analytics jobs all have some potential future end date
			
 
				-     * we can assume (without user intervention) that these will eventually stop and free their currently occupied resources.
			
 
				-     *
			
 
				-     * The capacity is as follows:
			
 
				-     * - tier: The sum total of the resources that will eventually be available.
			
 
				-     * - node: The largest block of memory that will be free on a given node.
			
 
				-     * - If > 1 "batch" ml tasks are running on the same node, we sum their resources.
			
 
				-     */
			
 
				-    Optional<NativeMemoryCapacity> calculateFutureAvailableCapacity(PersistentTasksCustomMetadata tasks, List<NodeLoad> nodeLoads) {
			
 
				-        final List<PersistentTask<DatafeedParams>> jobsWithLookbackDatafeeds = datafeedTasks(tasks).stream()
			
 
				-            .filter(t -> t.getParams().getEndTime() != null && t.getExecutorNode() != null)
			
 
				-            .toList();
			
 
				-        final List<PersistentTask<?>> assignedAnalyticsJobs = MlAutoscalingContext.dataframeAnalyticsTasks(tasks)
			
 
				-            .stream()
			
 
				-            .filter(t -> t.getExecutorNode() != null)
			
 
				-            .toList();
			
 
				-
			
 
				-        // What is the future freed capacity, knowing the current capacity and what could be freed up in the future?
			
 
				-        Map<String, Long> freeMemoryByNodeId = new HashMap<>();
			
 
				-        for (NodeLoad nodeLoad : nodeLoads) {
			
 
				-            if (nodeLoad.getError() != null || nodeLoad.isUseMemory() == false) {
			
 
				-                logger.debug("[{}] node free memory not available", nodeLoad.getNodeId());
			
 
				-                return Optional.empty();
			
 
				-            }
			
 
				-            freeMemoryByNodeId.put(nodeLoad.getNodeId(), nodeLoad.getFreeMemoryExcludingPerNodeOverhead());
			
 
				-        }
			
 
				-        for (PersistentTask<DatafeedParams> lookbackOnlyDf : jobsWithLookbackDatafeeds) {
			
 
				-            Long jobSize = getAnomalyMemoryRequirement(lookbackOnlyDf.getParams().getJobId());
			
 
				-            if (jobSize == null) {
			
 
				-                return Optional.empty();
			
 
				-            }
			
 
				-            freeMemoryByNodeId.compute(lookbackOnlyDf.getExecutorNode(), (k, v) -> v == null ? jobSize : jobSize + v);
			
 
				-        }
			
 
				-        for (PersistentTask<?> task : assignedAnalyticsJobs) {
			
 
				-            Long jobSize = getAnalyticsMemoryRequirement(MlTasks.dataFrameAnalyticsId(task.getId()));
			
 
				-            if (jobSize == null) {
			
 
				-                return Optional.empty();
			
 
				-            }
			
 
				-            freeMemoryByNodeId.compute(task.getExecutorNode(), (k, v) -> v == null ? jobSize : jobSize + v);
			
 
				-        }
			
 
				-        return Optional.of(
			
 
				-            new NativeMemoryCapacity(
			
 
				-                freeMemoryByNodeId.values().stream().mapToLong(Long::longValue).sum(),
			
 
				-                freeMemoryByNodeId.values().stream().mapToLong(Long::longValue).max().orElse(0L)
			
 
				-            )
			
 
				-        );
			
 
				-    }
			
 
				-
			
 
				-    private AutoscalingDeciderResult buildDecisionAndRequestRefresh(MlScalingReason.Builder reasonBuilder) {
			
 
				-        mlMemoryTracker.asyncRefresh();
			
 
				-        return new AutoscalingDeciderResult(null, reasonBuilder.build());
			
 
				-    }
			
 
				-
			
 
				-    private Long getAnalyticsMemoryRequirement(String analyticsId) {
			
 
				-        Long mem = mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(analyticsId);
			
 
				-        if (mem == null) {
			
 
				-            logger.debug("[{}] data frame analytics job memory requirement not available", analyticsId);
			
 
				-        }
			
 
				-        return mem;
			
 
				-    }
			
 
				-
			
 
				-    private Long getAllocatedModelRequirement(String modelId) {
			
 
				-        Long mem = mlMemoryTracker.getTrainedModelAssignmentMemoryRequirement(modelId);
			
 
				-        if (mem == null) {
			
 
				-            logger.debug("[{}] trained model memory requirement not available", modelId);
			
 
				-        }
			
 
				-        return mem;
			
 
				-    }
			
 
				-
			
 
				-    private Long getAnalyticsMemoryRequirement(PersistentTask<?> task) {
			
 
				-        return getAnalyticsMemoryRequirement(MlTasks.dataFrameAnalyticsId(task.getId()));
			
 
				-    }
			
 
				-
			
 
				-    private Long getAnomalyMemoryRequirement(String anomalyId) {
			
 
				-        Long mem = mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(anomalyId);
			
 
				-        if (mem == null) {
			
 
				-            logger.debug("[{}] anomaly detection job memory requirement not available", anomalyId);
			
 
				-        }
			
 
				-        return mem;
			
 
				-    }
			
 
				-
			
 
				-    private Long getAnomalyMemoryRequirement(PersistentTask<?> task) {
			
 
				-        return getAnomalyMemoryRequirement(MlTasks.jobId(task.getId()));
			
 
				-    }
			
 
				-
			
 
				-    Optional<AutoscalingDeciderResult> checkForScaleDown(
			
 
				-        List<NodeLoad> nodeLoads,
			
 
				-        long largestJob,
			
 
				-        NativeMemoryCapacity currentCapacity,
			
 
				-        MlScalingReason.Builder reasonBuilder
			
 
				-    ) {
			
 
				-        long currentlyNecessaryTier = nodeLoads.stream().mapToLong(NodeLoad::getAssignedJobMemoryExcludingPerNodeOverhead).sum();
			
 
				-        // We consider a scale down if we are not fully utilizing the tier
			
 
				-        // Or our largest job could be on a smaller node (meaning the same size tier but smaller nodes are possible).
			
 
				-        if (currentlyNecessaryTier < currentCapacity.getTierMlNativeMemoryRequirementExcludingOverhead()
			
 
				-            || largestJob < currentCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				-            NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
			
 
				-                // Since we are in the `scaleDown` branch, we know jobs are running and we could be smaller
			
 
				-                // If we have some weird rounding errors, it may be that the `currentlyNecessary` values are larger than
			
 
				-                // current capacity. We never want to accidentally say "scale up" via a scale down.
			
 
				-                Math.min(currentlyNecessaryTier, currentCapacity.getTierMlNativeMemoryRequirementExcludingOverhead()),
			
 
				-                Math.min(largestJob, currentCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead()),
			
 
				-                null
			
 
				-            );
			
 
				-            AutoscalingCapacity requiredCapacity = nativeMemoryCapacity.autoscalingCapacity(
			
 
				-                maxMachineMemoryPercent,
			
 
				-                useAuto,
			
 
				-                mlNativeMemoryForLargestMlNode,
			
 
				-                nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				-            );
			
 
				-            return Optional.of(
			
 
				-                new AutoscalingDeciderResult(
			
 
				-                    requiredCapacity,
			
 
				-                    reasonBuilder.setRequiredCapacity(requiredCapacity)
			
 
				-                        .setSimpleReason("Requesting scale down as tier and/or node size could be smaller")
			
 
				-                        .build()
			
 
				-                )
			
 
				-            );
			
 
				-        }
			
 
				-
			
 
				-        return Optional.empty();
			
 
				-    }
			
 
				-
			
 
				     @Override
			
 
				     public String name() {
			
 
				         return NAME;
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingCapacity.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingCapacity.java
@@ -0,0 +1,48 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.autoscaling;
			
 
				+
			
 
				+import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				+
			
 
				+public record MlMemoryAutoscalingCapacity(ByteSizeValue nodeSize, ByteSizeValue tierSize, String reason) {
			
 
				+
			
 
				+    public static Builder builder(ByteSizeValue nodeSize, ByteSizeValue tierSize) {
			
 
				+        return new Builder(nodeSize, tierSize);
			
 
				+    }
			
 
				+
			
 
				+    public static Builder from(AutoscalingCapacity autoscalingCapacity) {
			
 
				+        return builder(autoscalingCapacity.node().memory(), autoscalingCapacity.total().memory());
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public String toString() {
			
 
				+        return "MlMemoryAutoscalingCapacity{" + "nodeSize=" + nodeSize + ", tierSize=" + tierSize + ", reason='" + reason + '\'' + '}';
			
 
				+    }
			
 
				+
			
 
				+    public static class Builder {
			
 
				+
			
 
				+        private ByteSizeValue nodeSize;
			
 
				+        private ByteSizeValue tierSize;
			
 
				+        private String reason;
			
 
				+
			
 
				+        public Builder(ByteSizeValue nodeSize, ByteSizeValue tierSize) {
			
 
				+            this.nodeSize = nodeSize;
			
 
				+            this.tierSize = tierSize;
			
 
				+        }
			
 
				+
			
 
				+        public Builder setReason(String reason) {
			
 
				+            this.reason = reason;
			
 
				+            return this;
			
 
				+        }
			
 
				+
			
 
				+        public MlMemoryAutoscalingCapacity build() {
			
 
				+            return new MlMemoryAutoscalingCapacity(nodeSize, tierSize, reason);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingDecider.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingDecider.java
@@ -0,0 +1,950 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.autoscaling;
			
 
				+
			
 
				+import org.elasticsearch.cluster.ClusterState;
			
 
				+import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				+import org.elasticsearch.cluster.service.ClusterService;
			
 
				+import org.elasticsearch.common.Strings;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				+import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
			
 
				+import org.elasticsearch.core.Nullable;
			
 
				+import org.elasticsearch.core.TimeValue;
			
 
				+import org.elasticsearch.core.Tuple;
			
 
				+import org.elasticsearch.logging.LogManager;
			
 
				+import org.elasticsearch.logging.Logger;
			
 
				+import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
			
 
				+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext;
			
 
				+import org.elasticsearch.xpack.core.ml.MlTasks;
			
 
				+import org.elasticsearch.xpack.core.ml.action.StartDatafeedAction;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
			
 
				+import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
			
 
				+import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				+import org.elasticsearch.xpack.ml.job.NodeLoad;
			
 
				+import org.elasticsearch.xpack.ml.job.NodeLoadDetector;
			
 
				+import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
			
 
				+import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				+
			
 
				+import java.time.Duration;
			
 
				+import java.time.Instant;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Collection;
			
 
				+import java.util.Comparator;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.List;
			
 
				+import java.util.Locale;
			
 
				+import java.util.Map;
			
 
				+import java.util.Objects;
			
 
				+import java.util.Optional;
			
 
				+import java.util.PriorityQueue;
			
 
				+import java.util.function.Consumer;
			
 
				+import java.util.function.Function;
			
 
				+import java.util.stream.Collectors;
			
 
				+import java.util.stream.Stream;
			
 
				+
			
 
				+import static java.time.Instant.ofEpochMilli;
			
 
				+import static org.elasticsearch.common.xcontent.XContentElasticsearchExtension.DEFAULT_FORMATTER;
			
 
				+import static org.elasticsearch.core.Strings.format;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.MAX_MACHINE_MEMORY_PERCENT;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.MAX_OPEN_JOBS_PER_NODE;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD;
			
 
				+
			
 
				+class MlMemoryAutoscalingDecider {
			
 
				+
			
 
				+    private static final Logger logger = LogManager.getLogger(MlMemoryAutoscalingDecider.class);
			
 
				+
			
 
				+    private static final String MEMORY_STALE = "unable to make scaling decision as job memory requirements are stale";
			
 
				+    // If ensureScaleDown changes the calculation by more than this much, log the error
			
 
				+    private static final long ACCEPTABLE_DIFFERENCE = ByteSizeValue.ofMb(1).getBytes();
			
 
				+
			
 
				+    private final MlMemoryTracker mlMemoryTracker;
			
 
				+    private final NodeAvailabilityZoneMapper nodeAvailabilityZoneMapper;
			
 
				+    private final NodeLoadDetector nodeLoadDetector;
			
 
				+    private final ScaleTimer scaleTimer;
			
 
				+
			
 
				+    private volatile int maxMachineMemoryPercent;
			
 
				+    private volatile int maxOpenJobs;
			
 
				+    private volatile boolean useAuto;
			
 
				+    private volatile long mlNativeMemoryForLargestMlNode;
			
 
				+
			
 
				+    MlMemoryAutoscalingDecider(
			
 
				+        Settings settings,
			
 
				+        ClusterService clusterService,
			
 
				+        NodeAvailabilityZoneMapper nodeAvailabilityZoneMapper,
			
 
				+        NodeLoadDetector nodeLoadDetector,
			
 
				+        ScaleTimer scaleTimer
			
 
				+    ) {
			
 
				+        this.nodeAvailabilityZoneMapper = Objects.requireNonNull(nodeAvailabilityZoneMapper);
			
 
				+        this.nodeLoadDetector = Objects.requireNonNull(nodeLoadDetector);
			
 
				+        this.mlMemoryTracker = Objects.requireNonNull(nodeLoadDetector.getMlMemoryTracker());
			
 
				+        this.scaleTimer = Objects.requireNonNull(scaleTimer);
			
 
				+
			
 
				+        this.maxMachineMemoryPercent = MAX_MACHINE_MEMORY_PERCENT.get(settings);
			
 
				+        this.maxOpenJobs = MAX_OPEN_JOBS_PER_NODE.get(settings);
			
 
				+        this.useAuto = MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT.get(settings);
			
 
				+        setMaxMlNodeSize(MachineLearning.MAX_ML_NODE_SIZE.get(settings));
			
 
				+        clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_MACHINE_MEMORY_PERCENT, this::setMaxMachineMemoryPercent);
			
 
				+        clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_OPEN_JOBS_PER_NODE, this::setMaxOpenJobs);
			
 
				+        clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT, this::setUseAuto);
			
 
				+        clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_ML_NODE_SIZE, this::setMaxMlNodeSize);
			
 
				+    }
			
 
				+
			
 
				+    void setMaxMachineMemoryPercent(int maxMachineMemoryPercent) {
			
 
				+        this.maxMachineMemoryPercent = maxMachineMemoryPercent;
			
 
				+    }
			
 
				+
			
 
				+    void setMaxOpenJobs(int maxOpenJobs) {
			
 
				+        this.maxOpenJobs = maxOpenJobs;
			
 
				+    }
			
 
				+
			
 
				+    void setUseAuto(boolean useAuto) {
			
 
				+        this.useAuto = useAuto;
			
 
				+    }
			
 
				+
			
 
				+    void setMaxMlNodeSize(ByteSizeValue maxMlNodeSize) {
			
 
				+        long maxMlNodeSizeBytes = maxMlNodeSize.getBytes();
			
 
				+        // 0 means no known max size
			
 
				+        if (maxMlNodeSizeBytes <= 0) {
			
 
				+            mlNativeMemoryForLargestMlNode = Long.MAX_VALUE;
			
 
				+        } else {
			
 
				+            mlNativeMemoryForLargestMlNode = NativeMemoryCalculator.allowedBytesForMl(maxMlNodeSizeBytes, maxMachineMemoryPercent, useAuto);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public MlMemoryAutoscalingCapacity scale(Settings configuration, AutoscalingDeciderContext context, MlAutoscalingContext mlContext) {
			
 
				+        final ClusterState clusterState = context.state();
			
 
				+
			
 
				+        scaleTimer.lastScaleToScaleIntervalMillis()
			
 
				+            .ifPresent(scaleInterval -> mlMemoryTracker.setAutoscalingCheckInterval(Duration.ofMillis(scaleInterval)));
			
 
				+        final int numAnalyticsJobsInQueue = MlAutoscalingDeciderService.NUM_ANALYTICS_JOBS_IN_QUEUE.get(configuration);
			
 
				+        final int numAnomalyJobsInQueue = MlAutoscalingDeciderService.NUM_ANOMALY_JOBS_IN_QUEUE.get(configuration);
			
 
				+        final NativeMemoryCapacity currentScale = currentScale(mlContext.mlNodes);
			
 
				+
			
 
				+        // There are no ML nodes, scale up as quick as possible, no matter if memory is stale or not
			
 
				+        if (mlContext.mlNodes.isEmpty() && mlContext.hasWaitingTasks()) {
			
 
				+            return scaleUpFromZero(mlContext);
			
 
				+        }
			
 
				+
			
 
				+        // This is the sole check for memory staleness. It's possible that memory becomes stale while we execute the rest
			
 
				+        // of the code of this method, but it's best that all the code runs with the same view of whether the last refresh
			
 
				+        // was done in time.
			
 
				+        if (mlMemoryTracker.isRecentlyRefreshed() == false) {
			
 
				+            logger.debug(
			
 
				+                "view of job memory is stale given duration [{}]. Not attempting to make scaling decision",
			
 
				+                mlMemoryTracker.getStalenessDuration()
			
 
				+            );
			
 
				+            return refreshMemoryTrackerAndBuildEmptyDecision(MEMORY_STALE);
			
 
				+        }
			
 
				+        // We need the current node loads to determine if we need to scale up or down
			
 
				+        List<NodeLoad> nodeLoads = new ArrayList<>(mlContext.mlNodes.size());
			
 
				+        boolean nodeLoadIsMemoryAccurate = true;
			
 
				+        for (DiscoveryNode node : mlContext.mlNodes) {
			
 
				+            NodeLoad nodeLoad = nodeLoadDetector.detectNodeLoad(clusterState, node, maxOpenJobs, maxMachineMemoryPercent, useAuto);
			
 
				+            if (nodeLoad.getError() != null) {
			
 
				+                logger.warn("[{}] failed to gather node load limits, failure [{}]. Returning no scale", node.getId(), nodeLoad.getError());
			
 
				+                return refreshMemoryTrackerAndBuildEmptyDecision(
			
 
				+                    "Passing currently perceived capacity as there was a failure gathering node limits [" + nodeLoad.getError() + "]"
			
 
				+                );
			
 
				+            }
			
 
				+            nodeLoads.add(nodeLoad);
			
 
				+            if (nodeLoad.isUseMemory() == false) {
			
 
				+                nodeLoadIsMemoryAccurate = false;
			
 
				+                logger.debug("[{}] failed to gather node load - memory usage for one or more tasks not available.", node.getId());
			
 
				+            }
			
 
				+        }
			
 
				+        // This is an exceptional case, the memory tracking became stale between us checking previously and calculating the loads (for
			
 
				+        // example because a new job started that hasn't yet been added to the memory tracker). We should return a no scale in this case.
			
 
				+        if (nodeLoadIsMemoryAccurate == false) {
			
 
				+            return refreshMemoryTrackerAndBuildEmptyDecision(
			
 
				+                "Passing currently perceived capacity as nodes were unable to provide an accurate view of their memory usage"
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        final Optional<MlMemoryAutoscalingCapacity> scaleUpDecision = checkForScaleUp(
			
 
				+            numAnomalyJobsInQueue,
			
 
				+            numAnalyticsJobsInQueue,
			
 
				+            nodeLoads,
			
 
				+            mlContext.waitingAnomalyJobs,
			
 
				+            mlContext.waitingSnapshotUpgrades,
			
 
				+            mlContext.waitingAnalyticsJobs,
			
 
				+            mlContext.waitingAllocatedModels,
			
 
				+            calculateFutureAvailableCapacity(mlContext.persistentTasks, nodeLoads).orElse(null),
			
 
				+            currentScale
			
 
				+        );
			
 
				+        if (scaleUpDecision.isPresent()) {
			
 
				+            scaleTimer.resetScaleDownCoolDown();
			
 
				+            return scaleUpDecision.get();
			
 
				+        }
			
 
				+
			
 
				+        final List<String> partiallyAllocatedModels = mlContext.findPartiallyAllocatedModels();
			
 
				+
			
 
				+        // TODO for autoscaling by memory, we only care about if the model is allocated to at least one node (see above)
			
 
				+        // We should do this check in our autoscaling by processor count service, which will be a separate decider for readability's sake
			
 
				+        if (mlContext.waitingAnalyticsJobs.isEmpty() == false
			
 
				+            || mlContext.waitingSnapshotUpgrades.isEmpty() == false
			
 
				+            || mlContext.waitingAnomalyJobs.isEmpty() == false
			
 
				+            || partiallyAllocatedModels.isEmpty() == false) {
			
 
				+            // We don't want to continue to consider a scale down if there are now waiting jobs
			
 
				+            scaleTimer.resetScaleDownCoolDown();
			
 
				+            return MlMemoryAutoscalingCapacity.from(context.currentCapacity())
			
 
				+                .setReason(
			
 
				+                    String.format(
			
 
				+                        Locale.ROOT,
			
 
				+                        "Passing currently perceived capacity as there are [%d] model snapshot upgrades, "
			
 
				+                            + "[%d] analytics and [%d] anomaly detection jobs in the queue, "
			
 
				+                            + "[%d] trained models not fully-allocated, "
			
 
				+                            + "but the number in the queue is less than the configured maximum allowed "
			
 
				+                            + "or the queued jobs will eventually be assignable at the current size.",
			
 
				+                        mlContext.waitingSnapshotUpgrades.size(),
			
 
				+                        mlContext.waitingAnalyticsJobs.size(),
			
 
				+                        mlContext.waitingAnomalyJobs.size(),
			
 
				+                        partiallyAllocatedModels.size()
			
 
				+                    )
			
 
				+                )
			
 
				+                .build();
			
 
				+        }
			
 
				+
			
 
				+        long maxTaskMemoryBytes = maxMemoryBytes(mlContext);
			
 
				+
			
 
				+        // This state is invalid, but may occur due to complex bugs that have slipped through testing.
			
 
				+        // We could have tasks where the required job memory is 0, which should be impossible.
			
 
				+        // This can also happen if a job that is awaiting assignment ceases to have the AWAITING_LAZY_ASSIGNMENT
			
 
				+        // assignment explanation, for example because some other explanation overrides it. (This second situation
			
 
				+        // arises because, for example, anomalyDetectionTasks contains a task that is waiting but waitingAnomalyJobs
			
 
				+        // doesn't because its assignment explanation didn't match AWAITING_LAZY_ASSIGNMENT.)
			
 
				+        if (maxTaskMemoryBytes == 0L) {
			
 
				+            // We shouldn't need to check this condition because it's the exact opposite of the condition that
			
 
				+            // would have sent us down the scale down to zero branch higher up this method.
			
 
				+            assert mlContext.isEmpty() == false : "No tasks or models at all should have put us in the scale down to zero branch";
			
 
				+            logger.warn(
			
 
				+                "The calculated minimum required node size was unexpectedly [0] as there are [{}] anomaly job tasks, "
			
 
				+                    + "[{}] model snapshot upgrade tasks, [{}] data frame analytics tasks and [{}] model assignments",
			
 
				+                mlContext.anomalyDetectionTasks.size(),
			
 
				+                mlContext.snapshotUpgradeTasks.size(),
			
 
				+                mlContext.dataframeAnalyticsTasks.size(),
			
 
				+                mlContext.modelAssignments.size()
			
 
				+            );
			
 
				+            // This next message could obviously be pretty big, but should only get logged very rarely as it
			
 
				+            // requires both debug enabled and some other bug to exist to cause us to be in this branch
			
 
				+            logger.debug(
			
 
				+                () -> format(
			
 
				+                    "persistent tasks that caused unexpected scaling situation: [%s]",
			
 
				+                    (mlContext.persistentTasks == null) ? "null" : Strings.toString(mlContext.persistentTasks)
			
 
				+                )
			
 
				+            );
			
 
				+            return refreshMemoryTrackerAndBuildEmptyDecision(
			
 
				+                "Passing currently perceived capacity as there are running analytics and anomaly jobs or deployed models, "
			
 
				+                    + "but their assignment explanations are unexpected or their memory usage estimates are inaccurate."
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        final Optional<MlMemoryAutoscalingCapacity> maybeScaleDown = checkForScaleDown(nodeLoads, maxTaskMemoryBytes, currentScale)
			
 
				+            // Due to rounding bugs, it may be that a scale down result COULD cause a scale up.
			
 
				+            // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
			
 
				+            // This is safe as we know that ALL jobs are assigned at the current capacity.
			
 
				+            .map(result -> {
			
 
				+                MlMemoryAutoscalingCapacity capacity = ensureScaleDown(
			
 
				+                    result,
			
 
				+                    MlMemoryAutoscalingCapacity.from(context.currentCapacity()).build()
			
 
				+                );
			
 
				+                if (capacity == null) {
			
 
				+                    return null;
			
 
				+                }
			
 
				+                // TODO we should remove this when we can auto-scale (down and up) via a new CPU auto-scaling decider
			
 
				+                if (modelAssignmentsRequireMoreThanHalfCpu(mlContext.modelAssignments.values(), mlContext.mlNodes)) {
			
 
				+                    logger.debug("not down-scaling; model assignments require more than half of the ML tier's allocated processors");
			
 
				+                    return null;
			
 
				+                }
			
 
				+                return capacity;
			
 
				+            });
			
 
				+        if (maybeScaleDown.isPresent()) {
			
 
				+            final MlMemoryAutoscalingCapacity scaleDownDecisionResult = maybeScaleDown.get();
			
 
				+
			
 
				+            // Given maxOpenJobs, could we scale down to just one node?
			
 
				+            // We have no way of saying "we need X nodes"
			
 
				+            if (nodeLoads.size() > 1) {
			
 
				+                long totalAssignedJobs = nodeLoads.stream().mapToLong(NodeLoad::getNumAssignedJobsAndModels).sum();
			
 
				+                // one volatile read
			
 
				+                long maxOpenJobsCopy = this.maxOpenJobs;
			
 
				+                if (totalAssignedJobs > maxOpenJobsCopy) {
			
 
				+                    String msg = String.format(
			
 
				+                        Locale.ROOT,
			
 
				+                        "not scaling down as the total number of jobs [%d] exceeds the setting [%s (%d)]. "
			
 
				+                            + "To allow a scale down [%s] must be increased.",
			
 
				+                        totalAssignedJobs,
			
 
				+                        MAX_OPEN_JOBS_PER_NODE.getKey(),
			
 
				+                        maxOpenJobsCopy,
			
 
				+                        MAX_OPEN_JOBS_PER_NODE.getKey()
			
 
				+                    );
			
 
				+                    logger.info(() -> format("%s Calculated potential scaled down capacity [%s]", msg, scaleDownDecisionResult));
			
 
				+                    return MlMemoryAutoscalingCapacity.from(context.currentCapacity()).setReason(msg).build();
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            long msLeftToScale = scaleTimer.markDownScaleAndGetMillisLeftFromDelay(configuration);
			
 
				+            if (msLeftToScale <= 0) {
			
 
				+                return scaleDownDecisionResult;
			
 
				+            }
			
 
				+            TimeValue downScaleDelay = MlAutoscalingDeciderService.DOWN_SCALE_DELAY.get(configuration);
			
 
				+            logger.debug(
			
 
				+                () -> format(
			
 
				+                    "not scaling down as the current scale down delay [%s] is not satisfied."
			
 
				+                        + " The last time scale down was detected [%s]. Calculated scaled down capacity [%s] ",
			
 
				+                    downScaleDelay.getStringRep(),
			
 
				+                    DEFAULT_FORMATTER.format(ofEpochMilli(scaleTimer.downScaleDetectedMillis())),
			
 
				+                    scaleDownDecisionResult
			
 
				+                )
			
 
				+            );
			
 
				+            return MlMemoryAutoscalingCapacity.from(context.currentCapacity())
			
 
				+                .setReason(
			
 
				+                    String.format(
			
 
				+                        Locale.ROOT,
			
 
				+                        "Passing currently perceived capacity as down scale delay has not been satisfied; configured delay [%s] "
			
 
				+                            + "last detected scale down event [%s]. Will request scale down in approximately [%s]",
			
 
				+                        downScaleDelay.getStringRep(),
			
 
				+                        XContentElasticsearchExtension.DEFAULT_FORMATTER.format(Instant.ofEpochMilli(scaleTimer.downScaleDetectedMillis())),
			
 
				+                        TimeValue.timeValueMillis(msLeftToScale).getStringRep()
			
 
				+                    )
			
 
				+                )
			
 
				+                .build();
			
 
				+        }
			
 
				+
			
 
				+        return MlMemoryAutoscalingCapacity.from(context.currentCapacity())
			
 
				+            .setReason("Passing currently perceived capacity as no scaling changes are necessary")
			
 
				+            .build();
			
 
				+    }
			
 
				+
			
 
				+    NativeMemoryCapacity currentScale(final List<DiscoveryNode> machineLearningNodes) {
			
 
				+        return NativeMemoryCapacity.currentScale(machineLearningNodes, maxMachineMemoryPercent, useAuto);
			
 
				+    }
			
 
				+
			
 
				+    MlMemoryAutoscalingCapacity capacityFromNativeMemory(NativeMemoryCapacity nativeMemoryCapacity) {
			
 
				+        return nativeMemoryCapacity.autoscalingCapacity(
			
 
				+            maxMachineMemoryPercent,
			
 
				+            useAuto,
			
 
				+            mlNativeMemoryForLargestMlNode,
			
 
				+            nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+        ).build();
			
 
				+    }
			
 
				+
			
 
				+    private MlMemoryAutoscalingCapacity refreshMemoryTrackerAndBuildEmptyDecision(String reason) {
			
 
				+        mlMemoryTracker.asyncRefresh();
			
 
				+        return MlMemoryAutoscalingCapacity.builder(null, null).setReason(reason).build();
			
 
				+    }
			
 
				+
			
 
				+    private long maxMemoryBytes(MlAutoscalingContext mlContext) {
			
 
				+        long maxMemoryBytes = Math.max(
			
 
				+            mlContext.anomalyDetectionTasks.stream()
			
 
				+                .filter(PersistentTasksCustomMetadata.PersistentTask::isAssigned)
			
 
				+                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				+                .mapToLong(t -> {
			
 
				+                    Long mem = getAnomalyMemoryRequirement(t);
			
 
				+                    assert mem != null : "unexpected null for anomaly memory requirement after recent stale check";
			
 
				+                    return mem;
			
 
				+                })
			
 
				+                .max()
			
 
				+                .orElse(0L),
			
 
				+            mlContext.snapshotUpgradeTasks.stream()
			
 
				+                .filter(PersistentTasksCustomMetadata.PersistentTask::isAssigned)
			
 
				+                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				+                .mapToLong(t -> {
			
 
				+                    Long mem = getAnomalyMemoryRequirement(t);
			
 
				+                    assert mem != null : "unexpected null for anomaly memory requirement after recent stale check";
			
 
				+                    return mem;
			
 
				+                })
			
 
				+                .max()
			
 
				+                .orElse(0L)
			
 
				+        );
			
 
				+        maxMemoryBytes = Math.max(
			
 
				+            maxMemoryBytes,
			
 
				+            mlContext.dataframeAnalyticsTasks.stream()
			
 
				+                .filter(PersistentTasksCustomMetadata.PersistentTask::isAssigned)
			
 
				+                // Memory SHOULD be recently refreshed, so in our current state, we should at least have an idea of the memory used
			
 
				+                .mapToLong(t -> {
			
 
				+                    Long mem = this.getAnalyticsMemoryRequirement(t);
			
 
				+                    assert mem != null : "unexpected null for analytics memory requirement after recent stale check";
			
 
				+                    return mem;
			
 
				+                })
			
 
				+                .max()
			
 
				+                .orElse(0L)
			
 
				+        );
			
 
				+        maxMemoryBytes = Math.max(
			
 
				+            maxMemoryBytes,
			
 
				+            mlContext.modelAssignments.values().stream().mapToLong(t -> t.getTaskParams().estimateMemoryUsageBytes()).max().orElse(0L)
			
 
				+        );
			
 
				+        return maxMemoryBytes;
			
 
				+    }
			
 
				+
			
 
				+    // This doesn't allow any jobs to wait in the queue, this is because in a "normal" scaling event, we also verify if a job
			
 
				+    // can eventually start, and given the current cluster, no job can eventually start.
			
 
				+    MlMemoryAutoscalingCapacity scaleUpFromZero(MlAutoscalingContext mlContext) {
			
 
				+        final Optional<NativeMemoryCapacity> analyticsCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				+            mlContext.waitingAnalyticsJobs,
			
 
				+            this::getAnalyticsMemoryRequirement,
			
 
				+            0
			
 
				+        );
			
 
				+        final Optional<NativeMemoryCapacity> anomalyCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				+            mlContext.waitingAnomalyJobs,
			
 
				+            this::getAnomalyMemoryRequirement,
			
 
				+            0
			
 
				+        );
			
 
				+        final Optional<NativeMemoryCapacity> snapshotUpgradeCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				+            mlContext.waitingSnapshotUpgrades,
			
 
				+            this::getAnomalyMemoryRequirement,
			
 
				+            0
			
 
				+        );
			
 
				+        final Optional<NativeMemoryCapacity> allocatedModelCapacity = requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				+            mlContext.waitingAllocatedModels,
			
 
				+            this::getAllocatedModelRequirement,
			
 
				+            0
			
 
				+        );
			
 
				+        NativeMemoryCapacity updatedCapacity = anomalyCapacity.orElse(NativeMemoryCapacity.ZERO)
			
 
				+            .merge(snapshotUpgradeCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				+            .merge(analyticsCapacity.orElse(NativeMemoryCapacity.ZERO))
			
 
				+            .merge(allocatedModelCapacity.orElse(NativeMemoryCapacity.ZERO));
			
 
				+        // If we still have calculated zero, this means the ml memory tracker does not have the required info.
			
 
				+        // So, request a scale for the default. This is only for the 0 -> N scaling case.
			
 
				+        if (updatedCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead() == 0L) {
			
 
				+            updatedCapacity = updatedCapacity.merge(
			
 
				+                new NativeMemoryCapacity(
			
 
				+                    ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes(),
			
 
				+                    ByteSizeValue.ofMb(AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB).getBytes()
			
 
				+                )
			
 
				+            );
			
 
				+        }
			
 
				+        MlMemoryAutoscalingCapacity.Builder requiredCapacity = updatedCapacity.autoscalingCapacity(
			
 
				+            maxMachineMemoryPercent,
			
 
				+            useAuto,
			
 
				+            mlNativeMemoryForLargestMlNode,
			
 
				+            nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+        );
			
 
				+        return requiredCapacity.setReason(
			
 
				+            "requesting scale up as number of jobs in queues exceeded configured limit and there are no machine learning nodes"
			
 
				+        ).build();
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * @param unassignedJobs The list of unassigned jobs
			
 
				+     * @param sizeFunction   Function providing the memory required for a job
			
 
				+     * @param maxNumInQueue  The number of unassigned jobs allowed.
			
 
				+     * @return The capacity needed to reduce the length of `unassignedJobs` to `maxNumInQueue`
			
 
				+     */
			
 
				+    static Optional<NativeMemoryCapacity> requiredCapacityExcludingPerNodeOverheadForUnassignedJobs(
			
 
				+        List<String> unassignedJobs,
			
 
				+        Function<String, Long> sizeFunction,
			
 
				+        int maxNumInQueue
			
 
				+    ) {
			
 
				+        if (unassignedJobs.isEmpty()) {
			
 
				+            return Optional.empty();
			
 
				+        }
			
 
				+        List<Long> jobSizes = unassignedJobs.stream()
			
 
				+            .map(sizeFunction)
			
 
				+            .map(l -> l == null ? 0L : l)
			
 
				+            .sorted(Comparator.comparingLong(Long::longValue).reversed())
			
 
				+            .collect(Collectors.toList());
			
 
				+
			
 
				+        long tierMemory = 0L;
			
 
				+        // Node memory needs to be AT LEAST the size of the largest job + the required overhead.
			
 
				+        long nodeMemory = jobSizes.get(0);
			
 
				+        Iterator<Long> iter = jobSizes.iterator();
			
 
				+        while (jobSizes.size() > maxNumInQueue && iter.hasNext()) {
			
 
				+            tierMemory += iter.next();
			
 
				+            iter.remove();
			
 
				+        }
			
 
				+        return Optional.of(new NativeMemoryCapacity(tierMemory, nodeMemory));
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * @param numAnomalyJobsInQueue How many anomaly detection jobs (including model snapshot upgrades)
			
 
				+     *                              are permitted to queue for space to become available by other jobs
			
 
				+     *                              completing?
			
 
				+     * @param numAnalyticsJobsInQueue How many data frame analytics jobs are permitted to queue for space
			
 
				+     *                                to become available by other jobs completing?
			
 
				+     * @param nodeLoads Node loads on ML nodes in the current cluster.
			
 
				+     * @param waitingAnomalyJobs Job IDs of waiting anomaly detection jobs.
			
 
				+     * @param waitingSnapshotUpgrades Job IDs of waiting model snapshot upgrades.
			
 
				+     * @param waitingAnalyticsJobs Job IDs of waiting data frame analytics jobs.
			
 
				+     * @param waitingAllocatedModels IDs of waiting trained models that require a native process.
			
 
				+     * @param futureFreedCapacity Optionally, the combination of free memory and memory used by
			
 
				+     *                            jobs that are expected to terminate after completing a batch
			
 
				+     *                            analysis.
			
 
				+     * @param currentScale The current total ML <em>allowance</em> irrespective of what's in use.
			
 
				+     *                     It is <em>not</em> space already used or free space.
			
 
				+     * @return The scale up decision, or {@link Optional#empty} if no decision is made.
			
 
				+     */
			
 
				+    Optional<MlMemoryAutoscalingCapacity> checkForScaleUp(
			
 
				+        int numAnomalyJobsInQueue,
			
 
				+        int numAnalyticsJobsInQueue,
			
 
				+        List<NodeLoad> nodeLoads,
			
 
				+        List<String> waitingAnomalyJobs,
			
 
				+        List<String> waitingSnapshotUpgrades,
			
 
				+        List<String> waitingAnalyticsJobs,
			
 
				+        List<String> waitingAllocatedModels,
			
 
				+        @Nullable NativeMemoryCapacity futureFreedCapacity,
			
 
				+        NativeMemoryCapacity currentScale
			
 
				+    ) {
			
 
				+        logger.debug(
			
 
				+            () -> format(
			
 
				+                "Checking for scale up -"
			
 
				+                    + " waiting data frame analytics jobs [%s]"
			
 
				+                    + " data frame analytics jobs allowed to queue [%s]"
			
 
				+                    + " waiting anomaly detection jobs (including model snapshot upgrades) [%s]"
			
 
				+                    + " anomaly detection jobs allowed to queue [%s]"
			
 
				+                    + " waiting models [%s]"
			
 
				+                    + " future freed capacity [%s]"
			
 
				+                    + " current scale [%s]",
			
 
				+                waitingAnalyticsJobs.size(),
			
 
				+                numAnalyticsJobsInQueue,
			
 
				+                waitingAnomalyJobs.size() + waitingSnapshotUpgrades.size(),
			
 
				+                numAnomalyJobsInQueue,
			
 
				+                waitingAllocatedModels.size(),
			
 
				+                futureFreedCapacity,
			
 
				+                currentScale
			
 
				+            )
			
 
				+        );
			
 
				+
			
 
				+        // Are we in breach of maximum waiting jobs?
			
 
				+        if (waitingAnalyticsJobs.size() > numAnalyticsJobsInQueue
			
 
				+            || waitingAnomalyJobs.size() + waitingSnapshotUpgrades.size() > numAnomalyJobsInQueue
			
 
				+            || waitingAllocatedModels.size() > 0) {
			
 
				+
			
 
				+            Tuple<NativeMemoryCapacity, List<NodeLoad>> anomalyCapacityAndNewLoad = determineUnassignableJobs(
			
 
				+                Stream.concat(waitingAnomalyJobs.stream(), waitingSnapshotUpgrades.stream()).toList(),
			
 
				+                this::getAnomalyMemoryRequirement,
			
 
				+                NodeLoad.Builder::incNumAssignedAnomalyDetectorJobs,
			
 
				+                numAnomalyJobsInQueue,
			
 
				+                nodeLoads
			
 
				+            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, nodeLoads));
			
 
				+
			
 
				+            Tuple<NativeMemoryCapacity, List<NodeLoad>> analyticsCapacityAndNewLoad = determineUnassignableJobs(
			
 
				+                waitingAnalyticsJobs,
			
 
				+                this::getAnalyticsMemoryRequirement,
			
 
				+                NodeLoad.Builder::incNumAssignedDataFrameAnalyticsJobs,
			
 
				+                numAnalyticsJobsInQueue,
			
 
				+                anomalyCapacityAndNewLoad.v2()
			
 
				+            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, anomalyCapacityAndNewLoad.v2()));
			
 
				+
			
 
				+            Tuple<NativeMemoryCapacity, List<NodeLoad>> modelCapacityAndNewLoad = determineUnassignableJobs(
			
 
				+                waitingAllocatedModels,
			
 
				+                this::getAllocatedModelRequirement,
			
 
				+                NodeLoad.Builder::incNumAssignedNativeInferenceModels,
			
 
				+                0,
			
 
				+                analyticsCapacityAndNewLoad.v2()
			
 
				+            ).orElse(Tuple.tuple(NativeMemoryCapacity.ZERO, analyticsCapacityAndNewLoad.v2()));
			
 
				+
			
 
				+            if (analyticsCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				+                && anomalyCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)
			
 
				+                && modelCapacityAndNewLoad.v1().equals(NativeMemoryCapacity.ZERO)) {
			
 
				+                logger.debug("no_scale event as current capacity, even though there are waiting jobs, is adequate to run the queued jobs");
			
 
				+                return Optional.empty();
			
 
				+            }
			
 
				+
			
 
				+            // We don't have enough information to get a perfect answer here. Even though there
			
 
				+            // are jobs that cannot be assigned, there is likely some free memory on the current
			
 
				+            // nodes. If we don't consider it then we can scale up a level too far. For example,
			
 
				+            // suppose we're currently on a 1GB node with a 21MB job running and a 970MB job waiting.
			
 
				+            // If we scale up to a 2GB node then both will fit. But if we don't consider the free
			
 
				+            // memory on the 1GB node then we'll scale up to 4GB, then later scale back down to 2GB.
			
 
				+            // However, there's a complication. Assigning jobs is in reality a bin-packing problem
			
 
				+            // but we're modelling it as a simple summation problem. If we had 970MB of free space
			
 
				+            // spread over multiple existing nodes then we very well might need to scale up to fit
			
 
				+            // a 970MB job, but subtracting the current free memory from the requirement would lead
			
 
				+            // to us not scaling up at all. We don't have enough control to solve this correctly,
			
 
				+            // but a heuristic that's better than doing nothing is to at least consider the amount
			
 
				+            // of free space on the current node with the most free space and subtract that from the
			
 
				+            // requirement. In our example with the 21MB and 970MB jobs on the 1GB node, we'll then
			
 
				+            // correctly scale to 2GB. The more nodes in the cluster the worse the heuristic will do
			
 
				+            // but it won't ever be worse than doing nothing, many clusters only have a small number
			
 
				+            // of ML nodes, and by the time we get to large nodes the scaling steps are big anyway
			
 
				+            // so we are less likely to incorrectly skip a level due to this problem.
			
 
				+            long maxFreeNodeMemAfterPossibleAssignments = modelCapacityAndNewLoad.v2()
			
 
				+                .stream()
			
 
				+                .filter(nodeLoad -> nodeLoad.getError() == null && nodeLoad.isUseMemory())
			
 
				+                .map(NodeLoad::getFreeMemoryExcludingPerNodeOverhead)
			
 
				+                .max(Long::compareTo)
			
 
				+                .orElse(0L);
			
 
				+            if (maxFreeNodeMemAfterPossibleAssignments > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead()
			
 
				+                || maxFreeNodeMemAfterPossibleAssignments > currentScale.getTierMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				+                assert false
			
 
				+                    : "highest free node memory after possible assignments ["
			
 
				+                        + maxFreeNodeMemAfterPossibleAssignments
			
 
				+                        + "] greater than current scale ["
			
 
				+                        + currentScale
			
 
				+                        + "]";
			
 
				+                // If we get here in production it means there's a bug somewhere else, but it's
			
 
				+                // better to scale in the pre-8.3 way than not scale at all if this happens
			
 
				+                logger.warn(
			
 
				+                    "Highest free node memory after possible assignments ["
			
 
				+                        + maxFreeNodeMemAfterPossibleAssignments
			
 
				+                        + "] greater than current scale ["
			
 
				+                        + currentScale
			
 
				+                        + "] - will scale up without considering current free memory"
			
 
				+                );
			
 
				+                maxFreeNodeMemAfterPossibleAssignments = 0;
			
 
				+            }
			
 
				+
			
 
				+            NativeMemoryCapacity updatedCapacity = new NativeMemoryCapacity(-maxFreeNodeMemAfterPossibleAssignments, 0).merge(currentScale)
			
 
				+                .merge(analyticsCapacityAndNewLoad.v1())
			
 
				+                .merge(anomalyCapacityAndNewLoad.v1())
			
 
				+                .merge(modelCapacityAndNewLoad.v1());
			
 
				+            MlMemoryAutoscalingCapacity requiredCapacity = updatedCapacity.autoscalingCapacity(
			
 
				+                maxMachineMemoryPercent,
			
 
				+                useAuto,
			
 
				+                mlNativeMemoryForLargestMlNode,
			
 
				+                nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+            )
			
 
				+                .setReason(
			
 
				+                    "requesting scale up as number of jobs in queues exceeded configured limit "
			
 
				+                        + "or there is at least one trained model waiting for assignment "
			
 
				+                        + "and current capacity is not large enough for waiting jobs or models"
			
 
				+                )
			
 
				+                .build();
			
 
				+            return Optional.of(requiredCapacity);
			
 
				+        }
			
 
				+
			
 
				+        // Could the currently waiting jobs ever be assigned?
			
 
				+        // NOTE: the previous predicate catches if an allocated model isn't assigned
			
 
				+        if (waitingAnalyticsJobs.isEmpty() == false
			
 
				+            || waitingSnapshotUpgrades.isEmpty() == false
			
 
				+            || waitingAnomalyJobs.isEmpty() == false) {
			
 
				+            // we are unable to determine new tier size, but maybe we can see if our nodes are big enough.
			
 
				+            if (futureFreedCapacity == null) {
			
 
				+                Optional<Long> maxSize = Stream.concat(
			
 
				+                    waitingAnalyticsJobs.stream().map(this::getAnalyticsMemoryRequirement),
			
 
				+                    Stream.concat(
			
 
				+                        waitingAnomalyJobs.stream().map(this::getAnomalyMemoryRequirement),
			
 
				+                        waitingSnapshotUpgrades.stream().map(this::getAnomalyMemoryRequirement)
			
 
				+                    )
			
 
				+                ).filter(Objects::nonNull).max(Long::compareTo);
			
 
				+                if (maxSize.isPresent() && maxSize.get() > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				+                    MlMemoryAutoscalingCapacity requiredCapacity = new NativeMemoryCapacity(
			
 
				+                        Math.max(currentScale.getTierMlNativeMemoryRequirementExcludingOverhead(), maxSize.get()),
			
 
				+                        maxSize.get()
			
 
				+                    ).autoscalingCapacity(
			
 
				+                        maxMachineMemoryPercent,
			
 
				+                        useAuto,
			
 
				+                        mlNativeMemoryForLargestMlNode,
			
 
				+                        nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+                    ).setReason("requesting scale up as there is no node large enough to handle queued jobs").build();
			
 
				+                    return Optional.of(requiredCapacity);
			
 
				+                }
			
 
				+                // we have no info, allow the caller to make the appropriate action, probably returning a no_scale
			
 
				+                logger.debug(
			
 
				+                    "Cannot make a scaling decision as future freed capacity is not known and largest job could fit on an existing node"
			
 
				+                );
			
 
				+                return Optional.empty();
			
 
				+            }
			
 
				+            long newTierNeeded = -futureFreedCapacity.getTierMlNativeMemoryRequirementExcludingOverhead();
			
 
				+            // could any of the nodes actually run the job?
			
 
				+            long newNodeMax = currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead();
			
 
				+            for (String analyticsJob : waitingAnalyticsJobs) {
			
 
				+                Long requiredMemory = getAnalyticsMemoryRequirement(analyticsJob);
			
 
				+                // it is OK to continue here as we have not breached our queuing limit
			
 
				+                if (requiredMemory == null) {
			
 
				+                    continue;
			
 
				+                }
			
 
				+                newTierNeeded += requiredMemory;
			
 
				+                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				+            }
			
 
				+            for (String anomalyJob : waitingAnomalyJobs) {
			
 
				+                Long requiredMemory = getAnomalyMemoryRequirement(anomalyJob);
			
 
				+                // it is OK to continue here as we have not breached our queuing limit
			
 
				+                if (requiredMemory == null) {
			
 
				+                    continue;
			
 
				+                }
			
 
				+                newTierNeeded += requiredMemory;
			
 
				+                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				+            }
			
 
				+            for (String snapshotUpgrade : waitingSnapshotUpgrades) {
			
 
				+                Long requiredMemory = getAnomalyMemoryRequirement(snapshotUpgrade);
			
 
				+                // it is OK to continue here as we have not breached our queuing limit
			
 
				+                if (requiredMemory == null) {
			
 
				+                    continue;
			
 
				+                }
			
 
				+                newTierNeeded += requiredMemory;
			
 
				+                newNodeMax = Math.max(newNodeMax, requiredMemory);
			
 
				+            }
			
 
				+            if (newNodeMax > currentScale.getNodeMlNativeMemoryRequirementExcludingOverhead() || newTierNeeded > 0L) {
			
 
				+                NativeMemoryCapacity newCapacity = new NativeMemoryCapacity(Math.max(0L, newTierNeeded), newNodeMax);
			
 
				+                MlMemoryAutoscalingCapacity requiredCapacity = currentScale.merge(newCapacity)
			
 
				+                    .autoscalingCapacity(
			
 
				+                        maxMachineMemoryPercent,
			
 
				+                        useAuto,
			
 
				+                        mlNativeMemoryForLargestMlNode,
			
 
				+                        nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+                    )
			
 
				+                    .setReason("scaling up as adequate space would not automatically become available when running jobs finish")
			
 
				+                    .build();
			
 
				+                return Optional.of(requiredCapacity);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        return Optional.empty();
			
 
				+    }
			
 
				+
			
 
				+    static Optional<Tuple<NativeMemoryCapacity, List<NodeLoad>>> determineUnassignableJobs(
			
 
				+        List<String> unassignedJobs,
			
 
				+        Function<String, Long> sizeFunction,
			
 
				+        Consumer<NodeLoad.Builder> incrementCountFunction,
			
 
				+        int maxNumInQueue,
			
 
				+        List<NodeLoad> nodeLoads
			
 
				+    ) {
			
 
				+        if (unassignedJobs.isEmpty()) {
			
 
				+            return Optional.empty();
			
 
				+        }
			
 
				+        if (unassignedJobs.size() < maxNumInQueue) {
			
 
				+            return Optional.empty();
			
 
				+        }
			
 
				+        PriorityQueue<NodeLoad.Builder> mostFreeMemoryFirst = new PriorityQueue<>(
			
 
				+            nodeLoads.size(),
			
 
				+            // If we have no more remaining jobs, it's the same as having no more free memory
			
 
				+            Comparator.<NodeLoad.Builder>comparingLong(v -> v.remainingJobs() == 0 ? 0L : v.getFreeMemory()).reversed()
			
 
				+        );
			
 
				+        for (NodeLoad load : nodeLoads) {
			
 
				+            mostFreeMemoryFirst.add(NodeLoad.builder(load));
			
 
				+        }
			
 
				+        List<Long> jobSizes = unassignedJobs.stream()
			
 
				+            .map(sizeFunction)
			
 
				+            .map(l -> l == null ? 0L : l)
			
 
				+            .sorted(Comparator.comparingLong(Long::longValue).reversed())
			
 
				+            .collect(Collectors.toList());
			
 
				+
			
 
				+        Iterator<Long> assignmentIter = jobSizes.iterator();
			
 
				+        while (jobSizes.size() > maxNumInQueue && assignmentIter.hasNext()) {
			
 
				+            long requiredMemory = assignmentIter.next();
			
 
				+            long requiredNativeCodeOverhead = 0;
			
 
				+            NodeLoad.Builder nodeLoad = mostFreeMemoryFirst.peek();
			
 
				+            assert nodeLoad != null : "unexpected null value while calculating assignable memory";
			
 
				+            // Add per-node overhead if this is the first assignment
			
 
				+            if (nodeLoad.getNumAssignedJobs() == 0) {
			
 
				+                requiredNativeCodeOverhead = NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes();
			
 
				+            }
			
 
				+            // Since we have the least loaded node (by memory) first, if it can't fit here, it can't fit anywhere
			
 
				+            if (nodeLoad.getFreeMemory() >= requiredMemory + requiredNativeCodeOverhead) {
			
 
				+                assignmentIter.remove();
			
 
				+                // Remove and add to the priority queue to make sure the biggest node with availability is first
			
 
				+                nodeLoad = mostFreeMemoryFirst.poll();
			
 
				+                incrementCountFunction.accept(nodeLoad);
			
 
				+                mostFreeMemoryFirst.add(
			
 
				+                    nodeLoad.incAssignedNativeCodeOverheadMemory(requiredNativeCodeOverhead)
			
 
				+                        .incAssignedAnomalyDetectorMemory(requiredMemory)
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+        List<NodeLoad> adjustedLoads = mostFreeMemoryFirst.stream().map(NodeLoad.Builder::build).toList();
			
 
				+
			
 
				+        List<Long> unassignableMemory = new ArrayList<>();
			
 
				+        Iterator<Long> unassignableIter = jobSizes.iterator();
			
 
				+        // If we cannot assign enough jobs given the current cluster size
			
 
				+        while (jobSizes.size() > maxNumInQueue && unassignableIter.hasNext()) {
			
 
				+            unassignableMemory.add(unassignableIter.next());
			
 
				+            unassignableIter.remove();
			
 
				+        }
			
 
				+        if (unassignableMemory.isEmpty()) {
			
 
				+            // We don't need to scale but we have adjusted node load given what we could assign
			
 
				+            return Optional.of(Tuple.tuple(NativeMemoryCapacity.ZERO, adjustedLoads));
			
 
				+        }
			
 
				+        return Optional.of(
			
 
				+            Tuple.tuple(
			
 
				+                new NativeMemoryCapacity(
			
 
				+                    unassignableMemory.stream().mapToLong(Long::longValue).sum(),
			
 
				+                    // Node memory excluding overhead needs to be AT LEAST the size of the largest job.
			
 
				+                    unassignableMemory.get(0)
			
 
				+                ),
			
 
				+                adjustedLoads
			
 
				+            )
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    Optional<MlMemoryAutoscalingCapacity> checkForScaleDown(
			
 
				+        List<NodeLoad> nodeLoads,
			
 
				+        long largestJob,
			
 
				+        NativeMemoryCapacity currentCapacity
			
 
				+    ) {
			
 
				+        long currentlyNecessaryTier = nodeLoads.stream().mapToLong(NodeLoad::getAssignedJobMemoryExcludingPerNodeOverhead).sum();
			
 
				+        // We consider a scale down if we are not fully utilizing the tier
			
 
				+        // Or our largest job could be on a smaller node (meaning the same size tier but smaller nodes are possible).
			
 
				+        if (currentlyNecessaryTier < currentCapacity.getTierMlNativeMemoryRequirementExcludingOverhead()
			
 
				+            || largestJob < currentCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead()) {
			
 
				+            NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
			
 
				+                // Since we are in the `scaleDown` branch, we know jobs are running and we could be smaller
			
 
				+                // If we have some weird rounding errors, it may be that the `currentlyNecessary` values are larger than
			
 
				+                // current capacity. We never want to accidentally say "scale up" via a scale down.
			
 
				+                Math.min(currentlyNecessaryTier, currentCapacity.getTierMlNativeMemoryRequirementExcludingOverhead()),
			
 
				+                Math.min(largestJob, currentCapacity.getNodeMlNativeMemoryRequirementExcludingOverhead()),
			
 
				+                null
			
 
				+            );
			
 
				+            MlMemoryAutoscalingCapacity requiredCapacity = nativeMemoryCapacity.autoscalingCapacity(
			
 
				+                maxMachineMemoryPercent,
			
 
				+                useAuto,
			
 
				+                mlNativeMemoryForLargestMlNode,
			
 
				+                nodeAvailabilityZoneMapper.getNumMlAvailabilityZones().orElse(1)
			
 
				+            ).setReason("Requesting scale down as tier and/or node size could be smaller").build();
			
 
				+            return Optional.of(requiredCapacity);
			
 
				+        }
			
 
				+
			
 
				+        return Optional.empty();
			
 
				+    }
			
 
				+
			
 
				+    static MlMemoryAutoscalingCapacity ensureScaleDown(
			
 
				+        MlMemoryAutoscalingCapacity scaleDownResult,
			
 
				+        MlMemoryAutoscalingCapacity currentCapacity
			
 
				+    ) {
			
 
				+        if (scaleDownResult == null || currentCapacity == null) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        MlMemoryAutoscalingCapacity newCapacity = MlMemoryAutoscalingCapacity.builder(
			
 
				+            ByteSizeValue.ofBytes(Math.min(scaleDownResult.nodeSize().getBytes(), currentCapacity.nodeSize().getBytes())),
			
 
				+            ByteSizeValue.ofBytes(Math.min(scaleDownResult.tierSize().getBytes(), currentCapacity.tierSize().getBytes()))
			
 
				+        ).setReason(scaleDownResult.reason()).build();
			
 
				+        if (scaleDownResult.nodeSize().getBytes() - newCapacity.nodeSize().getBytes() > ACCEPTABLE_DIFFERENCE
			
 
				+            || scaleDownResult.tierSize().getBytes() - newCapacity.tierSize().getBytes() > ACCEPTABLE_DIFFERENCE) {
			
 
				+            logger.warn(
			
 
				+                "scale down accidentally requested a scale up, auto-corrected; initial scaling [{}], corrected [{}]",
			
 
				+                scaleDownResult,
			
 
				+                newCapacity
			
 
				+            );
			
 
				+        }
			
 
				+        return newCapacity;
			
 
				+    }
			
 
				+
			
 
				+    static boolean modelAssignmentsRequireMoreThanHalfCpu(Collection<TrainedModelAssignment> assignments, List<DiscoveryNode> mlNodes) {
			
 
				+        int totalRequiredProcessors = assignments.stream()
			
 
				+            .mapToInt(t -> t.getTaskParams().getNumberOfAllocations() * t.getTaskParams().getThreadsPerAllocation())
			
 
				+            .sum();
			
 
				+        int totalMlProcessors = mlNodes.stream().mapToInt(node -> {
			
 
				+            String allocatedProcessorsString = node.getAttributes().get(MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR);
			
 
				+            try {
			
 
				+                return Integer.parseInt(allocatedProcessorsString);
			
 
				+            } catch (NumberFormatException e) {
			
 
				+                assert e == null
			
 
				+                    : MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR
			
 
				+                        + " should parse because we set it internally: invalid value was ["
			
 
				+                        + allocatedProcessorsString
			
 
				+                        + "]";
			
 
				+                return 0;
			
 
				+            }
			
 
				+        }).sum();
			
 
				+        return totalRequiredProcessors * 2 > totalMlProcessors;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * This calculates the potential future free capacity.
			
 
				+     * Since jobs with lookback-only datafeeds, and data frame analytics jobs all have some potential future end date
			
 
				+     * we can assume (without user intervention) that these will eventually stop and free their currently occupied resources.
			
 
				+     *
			
 
				+     * The capacity is as follows:
			
 
				+     * - tier: The sum total of the resources that will eventually be available.
			
 
				+     * - node: The largest block of memory that will be free on a given node.
			
 
				+     * - If > 1 "batch" ml tasks are running on the same node, we sum their resources.
			
 
				+     */
			
 
				+    Optional<NativeMemoryCapacity> calculateFutureAvailableCapacity(Collection<DiscoveryNode> mlNodes, ClusterState clusterState) {
			
 
				+        return calculateFutureAvailableCapacity(
			
 
				+            clusterState.metadata().custom(PersistentTasksCustomMetadata.TYPE),
			
 
				+            mlNodes.stream()
			
 
				+                .map(node -> nodeLoadDetector.detectNodeLoad(clusterState, node, maxOpenJobs, maxMachineMemoryPercent, useAuto))
			
 
				+                .toList()
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * This calculates the potential future free capacity.
			
 
				+     * Since jobs with lookback-only datafeeds, and data frame analytics jobs all have some potential future end date
			
 
				+     * we can assume (without user intervention) that these will eventually stop and free their currently occupied resources.
			
 
				+     *
			
 
				+     * The capacity is as follows:
			
 
				+     * - tier: The sum total of the resources that will eventually be available.
			
 
				+     * - node: The largest block of memory that will be free on a given node.
			
 
				+     * - If > 1 "batch" ml tasks are running on the same node, we sum their resources.
			
 
				+     */
			
 
				+    Optional<NativeMemoryCapacity> calculateFutureAvailableCapacity(PersistentTasksCustomMetadata tasks, List<NodeLoad> nodeLoads) {
			
 
				+        final List<PersistentTasksCustomMetadata.PersistentTask<StartDatafeedAction.DatafeedParams>> jobsWithLookbackDatafeeds =
			
 
				+            datafeedTasks(tasks).stream().filter(t -> t.getParams().getEndTime() != null && t.getExecutorNode() != null).toList();
			
 
				+        final List<PersistentTasksCustomMetadata.PersistentTask<?>> assignedAnalyticsJobs = MlAutoscalingContext.dataframeAnalyticsTasks(
			
 
				+            tasks
			
 
				+        ).stream().filter(t -> t.getExecutorNode() != null).toList();
			
 
				+
			
 
				+        // What is the future freed capacity, knowing the current capacity and what could be freed up in the future?
			
 
				+        Map<String, Long> freeMemoryByNodeId = new HashMap<>();
			
 
				+        for (NodeLoad nodeLoad : nodeLoads) {
			
 
				+            if (nodeLoad.getError() != null || nodeLoad.isUseMemory() == false) {
			
 
				+                logger.debug("[{}] node free memory not available", nodeLoad.getNodeId());
			
 
				+                return Optional.empty();
			
 
				+            }
			
 
				+            freeMemoryByNodeId.put(nodeLoad.getNodeId(), nodeLoad.getFreeMemoryExcludingPerNodeOverhead());
			
 
				+        }
			
 
				+        for (PersistentTasksCustomMetadata.PersistentTask<StartDatafeedAction.DatafeedParams> lookbackOnlyDf : jobsWithLookbackDatafeeds) {
			
 
				+            Long jobSize = getAnomalyMemoryRequirement(lookbackOnlyDf.getParams().getJobId());
			
 
				+            if (jobSize == null) {
			
 
				+                return Optional.empty();
			
 
				+            }
			
 
				+            freeMemoryByNodeId.compute(lookbackOnlyDf.getExecutorNode(), (k, v) -> v == null ? jobSize : jobSize + v);
			
 
				+        }
			
 
				+        for (PersistentTasksCustomMetadata.PersistentTask<?> task : assignedAnalyticsJobs) {
			
 
				+            Long jobSize = getAnalyticsMemoryRequirement(MlTasks.dataFrameAnalyticsId(task.getId()));
			
 
				+            if (jobSize == null) {
			
 
				+                return Optional.empty();
			
 
				+            }
			
 
				+            freeMemoryByNodeId.compute(task.getExecutorNode(), (k, v) -> v == null ? jobSize : jobSize + v);
			
 
				+        }
			
 
				+        return Optional.of(
			
 
				+            new NativeMemoryCapacity(
			
 
				+                freeMemoryByNodeId.values().stream().mapToLong(Long::longValue).sum(),
			
 
				+                freeMemoryByNodeId.values().stream().mapToLong(Long::longValue).max().orElse(0L)
			
 
				+            )
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    @SuppressWarnings("unchecked")
			
 
				+    private static Collection<PersistentTasksCustomMetadata.PersistentTask<StartDatafeedAction.DatafeedParams>> datafeedTasks(
			
 
				+        PersistentTasksCustomMetadata tasksCustomMetadata
			
 
				+    ) {
			
 
				+        if (tasksCustomMetadata == null) {
			
 
				+            return List.of();
			
 
				+        }
			
 
				+
			
 
				+        return tasksCustomMetadata.findTasks(MlTasks.DATAFEED_TASK_NAME, t -> true)
			
 
				+            .stream()
			
 
				+            .map(p -> (PersistentTasksCustomMetadata.PersistentTask<StartDatafeedAction.DatafeedParams>) p)
			
 
				+            .toList();
			
 
				+    }
			
 
				+
			
 
				+    private Long getAnalyticsMemoryRequirement(String analyticsId) {
			
 
				+        Long mem = mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(analyticsId);
			
 
				+        if (mem == null) {
			
 
				+            logger.debug("[{}] data frame analytics job memory requirement not available", analyticsId);
			
 
				+        }
			
 
				+        return mem;
			
 
				+    }
			
 
				+
			
 
				+    private Long getAllocatedModelRequirement(String modelId) {
			
 
				+        Long mem = mlMemoryTracker.getTrainedModelAssignmentMemoryRequirement(modelId);
			
 
				+        if (mem == null) {
			
 
				+            logger.debug("[{}] trained model memory requirement not available", modelId);
			
 
				+        }
			
 
				+        return mem;
			
 
				+    }
			
 
				+
			
 
				+    private Long getAnalyticsMemoryRequirement(PersistentTasksCustomMetadata.PersistentTask<?> task) {
			
 
				+        return getAnalyticsMemoryRequirement(MlTasks.dataFrameAnalyticsId(task.getId()));
			
 
				+    }
			
 
				+
			
 
				+    private Long getAnomalyMemoryRequirement(String anomalyId) {
			
 
				+        Long mem = mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(anomalyId);
			
 
				+        if (mem == null) {
			
 
				+            logger.debug("[{}] anomaly detection job memory requirement not available", anomalyId);
			
 
				+        }
			
 
				+        return mem;
			
 
				+    }
			
 
				+
			
 
				+    private Long getAnomalyMemoryRequirement(PersistentTasksCustomMetadata.PersistentTask<?> task) {
			
 
				+        return getAnomalyMemoryRequirement(MlTasks.jobId(task.getId()));
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
@@ -11,11 +11,12 @@ import org.apache.logging.log4j.LogManager;
 
				 import org.apache.logging.log4j.Logger;
			
 
				 import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				 import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				-import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				+import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				 import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				 
			
 
				 import java.util.Arrays;
			
 
				 import java.util.List;
			
 
				+import java.util.Map;
			
 
				 import java.util.Objects;
			
 
				 import java.util.OptionalLong;
			
 
				 
			
@@ -107,7 +108,7 @@ public class NativeMemoryCapacity {
 
				      * @return The minimum node size required for ML nodes and the minimum tier size required for the complete ML
			
 
				      *         tier.
			
 
				      */
			
 
				-    public AutoscalingCapacity autoscalingCapacity(
			
 
				+    public MlMemoryAutoscalingCapacity.Builder autoscalingCapacity(
			
 
				         int maxMemoryPercent,
			
 
				         boolean useAuto,
			
 
				         long mlNativeMemoryForLargestMlNode,
			
@@ -132,10 +133,7 @@ public class NativeMemoryCapacity {
 
				                     nodeMlNativeMemoryRequirementExcludingOverhead
			
 
				                 );
			
 
				             }
			
 
				-            return new AutoscalingCapacity(
			
 
				-                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ZERO, null),
			
 
				-                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ZERO, null)
			
 
				-            );
			
 
				+            return MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
			
 
				         }
			
 
				 
			
 
				         if (mlNativeMemoryForLargestMlNode <= NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()) {
			
@@ -223,9 +221,9 @@ public class NativeMemoryCapacity {
 
				         }
			
 
				         // The assertion above should hold, but the Math.max below catches the case with inconsistent
			
 
				         // inputs plus any bugs that weren't caught in tests.
			
 
				-        return new AutoscalingCapacity(
			
 
				-            new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofBytes(Math.max(requiredTierSize, requiredNodeSize)), null),
			
 
				-            new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofBytes(requiredNodeSize), null)
			
 
				+        return MlMemoryAutoscalingCapacity.builder(
			
 
				+            ByteSizeValue.ofBytes(requiredNodeSize),
			
 
				+            ByteSizeValue.ofBytes(Math.max(requiredTierSize, requiredNodeSize))
			
 
				         );
			
 
				     }
			
 
				 
			
@@ -290,11 +288,27 @@ public class NativeMemoryCapacity {
 
				             Arrays.stream(mlMemory).max().orElse(0L),
			
 
				             // We assume that JVM size is universal, at least, the largest JVM indicates the largest node
			
 
				             machineLearningNodes.stream()
			
 
				-                .map(MlAutoscalingDeciderService::getNodeJvmSize)
			
 
				+                .map(NativeMemoryCapacity::getNodeJvmSize)
			
 
				                 .filter(OptionalLong::isPresent)
			
 
				                 .map(OptionalLong::getAsLong)
			
 
				                 .max(Long::compare)
			
 
				                 .orElse(null)
			
 
				         );
			
 
				     }
			
 
				+
			
 
				+    static OptionalLong getNodeJvmSize(DiscoveryNode node) {
			
 
				+        Map<String, String> nodeAttributes = node.getAttributes();
			
 
				+        String valueStr = nodeAttributes.get(MachineLearning.MAX_JVM_SIZE_NODE_ATTR);
			
 
				+        try {
			
 
				+            return OptionalLong.of(Long.parseLong(valueStr));
			
 
				+        } catch (NumberFormatException e) {
			
 
				+            assert e == null : "ml.max_jvm_size should parse because we set it internally: invalid value was " + valueStr;
			
 
				+            logger.debug(
			
 
				+                "could not parse stored string value [{}] in node attribute [{}]",
			
 
				+                valueStr,
			
 
				+                MachineLearning.MAX_JVM_SIZE_NODE_ATTR
			
 
				+            );
			
 
				+        }
			
 
				+        return OptionalLong.empty();
			
 
				+    }
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingDeciderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlMemoryAutoscalingDeciderTests.java
@@ -0,0 +1,1394 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0; you may not use this file except in compliance with the Elastic License
			
 
				+ * 2.0.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.xpack.ml.autoscaling;
			
 
				+
			
 
				+import org.elasticsearch.Version;
			
 
				+import org.elasticsearch.cluster.ClusterInfo;
			
 
				+import org.elasticsearch.cluster.ClusterName;
			
 
				+import org.elasticsearch.cluster.ClusterState;
			
 
				+import org.elasticsearch.cluster.metadata.Metadata;
			
 
				+import org.elasticsearch.cluster.node.DiscoveryNode;
			
 
				+import org.elasticsearch.cluster.node.DiscoveryNodeRole;
			
 
				+import org.elasticsearch.cluster.node.DiscoveryNodes;
			
 
				+import org.elasticsearch.cluster.routing.allocation.decider.AwarenessAllocationDecider;
			
 
				+import org.elasticsearch.cluster.service.ClusterService;
			
 
				+import org.elasticsearch.common.settings.ClusterSettings;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				+import org.elasticsearch.core.Tuple;
			
 
				+import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
			
 
				+import org.elasticsearch.snapshots.SnapshotShardSizeInfo;
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext;
			
 
				+import org.elasticsearch.xpack.core.ml.MlTasks;
			
 
				+import org.elasticsearch.xpack.core.ml.action.OpenJobAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.StartDatafeedAction;
			
 
				+import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
			
 
				+import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
			
 
				+import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsTaskState;
			
 
				+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
			
 
				+import org.elasticsearch.xpack.core.ml.job.config.Job;
			
 
				+import org.elasticsearch.xpack.core.ml.job.config.JobState;
			
 
				+import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
			
 
				+import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				+import org.elasticsearch.xpack.ml.job.NodeLoad;
			
 
				+import org.elasticsearch.xpack.ml.job.NodeLoadDetector;
			
 
				+import org.elasticsearch.xpack.ml.job.task.OpenJobPersistentTasksExecutorTests;
			
 
				+import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
			
 
				+import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				+import org.junit.Before;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.Collection;
			
 
				+import java.util.Date;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.Optional;
			
 
				+import java.util.OptionalInt;
			
 
				+import java.util.Set;
			
 
				+import java.util.function.LongSupplier;
			
 
				+
			
 
				+import static java.lang.Math.min;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.MACHINE_MEMORY_NODE_ATTR;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.MAX_JVM_SIZE_NODE_ATTR;
			
 
				+import static org.elasticsearch.xpack.ml.MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD;
			
 
				+import static org.elasticsearch.xpack.ml.job.JobNodeSelector.AWAITING_LAZY_ASSIGNMENT;
			
 
				+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.JVM_SIZE_KNOT_POINT;
			
 
				+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.STATIC_JVM_UPPER_THRESHOLD;
			
 
				+import static org.hamcrest.Matchers.allOf;
			
 
				+import static org.hamcrest.Matchers.containsString;
			
 
				+import static org.hamcrest.Matchers.equalTo;
			
 
				+import static org.hamcrest.Matchers.greaterThan;
			
 
				+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
			
 
				+import static org.hamcrest.Matchers.is;
			
 
				+import static org.hamcrest.Matchers.lessThan;
			
 
				+import static org.hamcrest.Matchers.lessThanOrEqualTo;
			
 
				+import static org.mockito.ArgumentMatchers.any;
			
 
				+import static org.mockito.ArgumentMatchers.anyBoolean;
			
 
				+import static org.mockito.ArgumentMatchers.anyInt;
			
 
				+import static org.mockito.ArgumentMatchers.eq;
			
 
				+import static org.mockito.Mockito.mock;
			
 
				+import static org.mockito.Mockito.when;
			
 
				+
			
 
				+public class MlMemoryAutoscalingDeciderTests extends ESTestCase {
			
 
				+
			
 
				+    private static final long[] NODE_TIERS_NO_MONITORING = new long[] {
			
 
				+        ByteSizeValue.ofGb(1).getBytes(),
			
 
				+        ByteSizeValue.ofGb(2).getBytes(),
			
 
				+        ByteSizeValue.ofGb(4).getBytes(),
			
 
				+        ByteSizeValue.ofGb(8).getBytes(),
			
 
				+        ByteSizeValue.ofGb(16).getBytes(),
			
 
				+        ByteSizeValue.ofGb(32).getBytes(),
			
 
				+        ByteSizeValue.ofGb(64).getBytes(),
			
 
				+        ByteSizeValue.ofGb(15).getBytes(),
			
 
				+        ByteSizeValue.ofGb(30).getBytes(),
			
 
				+        ByteSizeValue.ofGb(60).getBytes() };
			
 
				+
			
 
				+    // When monitoring is enabled Filebeat and Metricbeat are given a memory allowance of 360MB,
			
 
				+    // and this is deducted from the raw node size.
			
 
				+    private static final long MONITORING_ALLOWANCE_BYTES = ByteSizeValue.ofMb(360).getBytes();
			
 
				+
			
 
				+    private static final long[] NODE_TIERS_WITH_MONITORING = Arrays.stream(NODE_TIERS_NO_MONITORING)
			
 
				+        .map(m -> m - MONITORING_ALLOWANCE_BYTES)
			
 
				+        .toArray();
			
 
				+
			
 
				+    private static final long BYTES_IN_4MB = ByteSizeValue.ofMb(4).getBytes();
			
 
				+
			
 
				+    // Must match the logic used in MachineDependentHeap.MachineNodeRole.ML_ONLY
			
 
				+    // (including rounding down to a multiple of 4 megabytes before multiplying
			
 
				+    // back up).
			
 
				+    public static long mlOnlyNodeJvmBytes(long systemMemoryBytes) {
			
 
				+        // 40% of memory up to 16GB, plus 10% of memory above that, up to an absolute maximum of 31GB
			
 
				+        long unroundedBytes = (systemMemoryBytes <= JVM_SIZE_KNOT_POINT)
			
 
				+            ? (long) (systemMemoryBytes * 0.4)
			
 
				+            : (long) min(JVM_SIZE_KNOT_POINT * 0.4 + (systemMemoryBytes - JVM_SIZE_KNOT_POINT) * 0.1, STATIC_JVM_UPPER_THRESHOLD);
			
 
				+        return (unroundedBytes / BYTES_IN_4MB) * BYTES_IN_4MB;
			
 
				+    }
			
 
				+
			
 
				+    public static final List<Tuple<Long, Long>> AUTO_NODE_TIERS_NO_MONITORING = Arrays.stream(NODE_TIERS_NO_MONITORING)
			
 
				+        .mapToObj(m -> Tuple.tuple(m, mlOnlyNodeJvmBytes(m)))
			
 
				+        .toList();
			
 
				+
			
 
				+    public static final List<Tuple<Long, Long>> AUTO_NODE_TIERS_WITH_MONITORING = Arrays.stream(NODE_TIERS_WITH_MONITORING)
			
 
				+        .mapToObj(m -> Tuple.tuple(m, mlOnlyNodeJvmBytes(m)))
			
 
				+        .toList();
			
 
				+
			
 
				+    private static final long TEST_NODE_SIZE = ByteSizeValue.ofGb(20).getBytes();
			
 
				+    private static final long ML_MEMORY_FOR_TEST_NODE_SIZE = NativeMemoryCalculator.allowedBytesForMl(TEST_NODE_SIZE, 0, true);
			
 
				+    private static final long TEST_JVM_SIZE = mlOnlyNodeJvmBytes(TEST_NODE_SIZE);
			
 
				+    private static final int TEST_ALLOCATED_PROCESSORS = 2;
			
 
				+    private static final long TEST_JOB_SIZE = ByteSizeValue.ofMb(200).getBytes();
			
 
				+    private static final long PER_NODE_OVERHEAD = MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes();
			
 
				+
			
 
				+    private NodeLoadDetector nodeLoadDetector;
			
 
				+    private NodeAvailabilityZoneMapper nodeAvailabilityZoneMapper;
			
 
				+    private ClusterService clusterService;
			
 
				+    private Settings settings;
			
 
				+    private LongSupplier timeSupplier;
			
 
				+    private MlMemoryTracker mlMemoryTracker;
			
 
				+
			
 
				+    @Before
			
 
				+    public void setup() {
			
 
				+        mlMemoryTracker = mock(MlMemoryTracker.class);
			
 
				+        when(mlMemoryTracker.isRecentlyRefreshed()).thenReturn(true);
			
 
				+        when(mlMemoryTracker.asyncRefresh()).thenReturn(true);
			
 
				+        when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(any())).thenReturn(TEST_JOB_SIZE);
			
 
				+        when(mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(any())).thenReturn(TEST_JOB_SIZE);
			
 
				+        when(mlMemoryTracker.getTrainedModelAssignmentMemoryRequirement(any())).thenReturn(TEST_JOB_SIZE);
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(any(), any())).thenReturn(TEST_JOB_SIZE);
			
 
				+        nodeLoadDetector = mock(NodeLoadDetector.class);
			
 
				+        when(nodeLoadDetector.getMlMemoryTracker()).thenReturn(mlMemoryTracker);
			
 
				+        when(nodeLoadDetector.detectNodeLoad(any(), any(), anyInt(), anyInt(), anyBoolean())).thenReturn(
			
 
				+            NodeLoad.builder("any").setUseMemory(true).incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes()).build()
			
 
				+        );
			
 
				+        nodeAvailabilityZoneMapper = mock(NodeAvailabilityZoneMapper.class);
			
 
				+        clusterService = mock(ClusterService.class);
			
 
				+        settings = Settings.EMPTY;
			
 
				+        timeSupplier = System::currentTimeMillis;
			
 
				+        ClusterSettings cSettings = new ClusterSettings(
			
 
				+            settings,
			
 
				+            Set.of(
			
 
				+                MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
			
 
				+                MachineLearning.MAX_OPEN_JOBS_PER_NODE,
			
 
				+                MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT,
			
 
				+                MachineLearning.MAX_ML_NODE_SIZE,
			
 
				+                AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING
			
 
				+            )
			
 
				+        );
			
 
				+        when(clusterService.getClusterSettings()).thenReturn(cSettings);
			
 
				+    }
			
 
				+
			
 
				+    public void testScalingEdgeCase() {
			
 
				+        // This scale up should push above 1gb, but under 2gb.
			
 
				+        // The unassigned job barely doesn't fit within the current scale (by 1 megabyte - 610mb available and 611mb needed).
			
 
				+        // The three assigned jobs have model memory limits 200mb, 10mb and 9mb.
			
 
				+        // The unassigned job has model memory limit 128mb.
			
 
				+        // Then we have four times the process overhead of 10mb, plus the per-node overhead of 30mb, so total overhead on one node is 70mb.
			
 
				+        when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(any())).thenReturn(
			
 
				+            ByteSizeValue.ofMb(128).getBytes() + Job.PROCESS_MEMORY_OVERHEAD.getBytes()
			
 
				+        );
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(any(), any())).thenReturn(
			
 
				+            ByteSizeValue.ofMb(128).getBytes() + Job.PROCESS_MEMORY_OVERHEAD.getBytes()
			
 
				+        );
			
 
				+        List<String> jobTasks = List.of("waiting_job");
			
 
				+        long mlMemoryFor1GbNode = autoBytesForMl(AUTO_NODE_TIERS_NO_MONITORING.get(0).v1(), AUTO_NODE_TIERS_NO_MONITORING.get(0).v2());
			
 
				+        List<NodeLoad> nodesForScaleup = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(mlMemoryFor1GbNode)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(
			
 
				+                    ByteSizeValue.ofMb(200).getBytes() + ByteSizeValue.ofMb(10).getBytes() + ByteSizeValue.ofMb(9).getBytes()
			
 
				+                        + Job.PROCESS_MEMORY_OVERHEAD.getBytes() * 3
			
 
				+                )
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        MlScalingReason.Builder reasonBuilder = new MlScalingReason.Builder(new MlAutoscalingContext()).setPassedConfiguration(
			
 
				+            Settings.EMPTY
			
 
				+        )
			
 
				+            .setCurrentMlCapacity(
			
 
				+                AutoscalingCapacity.builder()
			
 
				+                    .node(null, AUTO_NODE_TIERS_NO_MONITORING.get(0).v1(), null)
			
 
				+                    .total(null, AUTO_NODE_TIERS_NO_MONITORING.get(0).v1(), null)
			
 
				+                    .build()
			
 
				+            );
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        MlMemoryAutoscalingCapacity scaleUpResult = decider.checkForScaleUp(
			
 
				+            0,
			
 
				+            0,
			
 
				+            nodesForScaleup,
			
 
				+            jobTasks,
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            null,
			
 
				+            new NativeMemoryCapacity(
			
 
				+                mlMemoryFor1GbNode - PER_NODE_OVERHEAD,
			
 
				+                mlMemoryFor1GbNode - PER_NODE_OVERHEAD,
			
 
				+                AUTO_NODE_TIERS_NO_MONITORING.get(0).v2()
			
 
				+            )
			
 
				+        ).orElseThrow();
			
 
				+
			
 
				+        assertThat(
			
 
				+            scaleUpResult.tierSize().getBytes(),
			
 
				+            allOf(greaterThan(ByteSizeValue.ofGb(1).getBytes()), lessThan(ByteSizeValue.ofGb(2).getBytes()))
			
 
				+        );
			
 
				+
			
 
				+        // Assume a scale up to 2gb nodes
			
 
				+        // We should NOT scale down below or to 1gb given the same jobs with 2gb node
			
 
				+        long mlMemoryFor2GbNode = autoBytesForMl(AUTO_NODE_TIERS_NO_MONITORING.get(1).v1(), AUTO_NODE_TIERS_NO_MONITORING.get(1).v2());
			
 
				+        List<NodeLoad> nodeForScaleDown = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(mlMemoryFor2GbNode)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(
			
 
				+                    ByteSizeValue.ofMb(200).getBytes() + ByteSizeValue.ofMb(10).getBytes() + ByteSizeValue.ofMb(9).getBytes()
			
 
				+                        + ByteSizeValue.ofMb(128).getBytes() + Job.PROCESS_MEMORY_OVERHEAD.getBytes() * 4
			
 
				+                )
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        reasonBuilder = new MlScalingReason.Builder(new MlAutoscalingContext()).setPassedConfiguration(Settings.EMPTY)
			
 
				+            .setCurrentMlCapacity(AutoscalingCapacity.builder().node(null, 2147483648L, null).total(null, 2147483648L, null).build());
			
 
				+        MlMemoryAutoscalingCapacity result = decider.checkForScaleDown(
			
 
				+            nodeForScaleDown,
			
 
				+            ByteSizeValue.ofMb(200).getBytes() + Job.PROCESS_MEMORY_OVERHEAD.getBytes(),
			
 
				+            new NativeMemoryCapacity(mlMemoryFor2GbNode, mlMemoryFor2GbNode, AUTO_NODE_TIERS_NO_MONITORING.get(1).v2())
			
 
				+        ).orElseThrow();
			
 
				+        assertThat(
			
 
				+            result.tierSize().getBytes(),
			
 
				+            allOf(greaterThan(ByteSizeValue.ofGb(1).getBytes()), lessThan(ByteSizeValue.ofGb(2).getBytes()))
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleStability() {
			
 
				+        for (int i = 0; i < 10; i++) {
			
 
				+            // Run this test with the Cloud node sizes we get when monitoring is not enabled and when monitoring is enabled
			
 
				+            final long[] nodeTiers;
			
 
				+            final List<Tuple<Long, Long>> autoNodeTiers;
			
 
				+            if ((i % 2) == 0) {
			
 
				+                nodeTiers = NODE_TIERS_NO_MONITORING;
			
 
				+                autoNodeTiers = AUTO_NODE_TIERS_NO_MONITORING;
			
 
				+            } else {
			
 
				+                nodeTiers = NODE_TIERS_WITH_MONITORING;
			
 
				+                autoNodeTiers = AUTO_NODE_TIERS_WITH_MONITORING;
			
 
				+            }
			
 
				+            for (int tier = 0; tier < autoNodeTiers.size() - 1; tier++) {
			
 
				+                final Tuple<Long, Long> lowerTier = autoNodeTiers.get(tier);
			
 
				+                final long lowerTierNodeSize = lowerTier.v1();
			
 
				+                final long lowerTierJvmSize = lowerTier.v2();
			
 
				+                final long lowerTierMemoryForMl = autoBytesForMl(lowerTierNodeSize, lowerTierJvmSize);
			
 
				+                final Tuple<Long, Long> higherTier = autoNodeTiers.get(tier + 1);
			
 
				+                // The jobs that currently exist, to use in the scaleUp call
			
 
				+                NodeLoad.Builder forScaleUp = new NodeLoad.Builder("any").setMaxMemory(lowerTierMemoryForMl)
			
 
				+                    .setMaxJobs(Integer.MAX_VALUE)
			
 
				+                    .setUseMemory(true);
			
 
				+                // The jobs + load that exists for all jobs (after scale up), used in scaleDown call
			
 
				+                final long higherTierMemoryForMl = autoBytesForMl(higherTier.v1(), higherTier.v2());
			
 
				+                NodeLoad.Builder forScaleDown = new NodeLoad.Builder("any").setMaxMemory(higherTierMemoryForMl)
			
 
				+                    .setMaxJobs(Integer.MAX_VALUE)
			
 
				+                    .setUseMemory(true);
			
 
				+                long maxJobSize = 0;
			
 
				+                // Fill with existing tier jobs
			
 
				+                while (forScaleUp.getFreeMemory() > Job.PROCESS_MEMORY_OVERHEAD.getBytes()) {
			
 
				+                    long jobSize = randomLongBetween(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), forScaleUp.getFreeMemory());
			
 
				+                    maxJobSize = Math.max(jobSize, maxJobSize);
			
 
				+                    forScaleUp.incNumAssignedAnomalyDetectorJobs().incAssignedAnomalyDetectorMemory(jobSize);
			
 
				+                    forScaleDown.incNumAssignedAnomalyDetectorJobs().incAssignedAnomalyDetectorMemory(jobSize);
			
 
				+                }
			
 
				+                // Create jobs for scale up
			
 
				+                NodeLoad nodeLoadForScaleUp = forScaleUp.build();
			
 
				+                List<String> waitingJobs = new ArrayList<>();
			
 
				+                while (forScaleDown.getFreeMemory() > Job.PROCESS_MEMORY_OVERHEAD.getBytes()) {
			
 
				+                    long jobSize = randomLongBetween(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), forScaleDown.getFreeMemory());
			
 
				+                    maxJobSize = Math.max(jobSize, maxJobSize);
			
 
				+                    forScaleDown.incNumAssignedAnomalyDetectorJobs().incAssignedAnomalyDetectorMemory(jobSize);
			
 
				+                    String waitingJob = randomAlphaOfLength(10);
			
 
				+                    when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(eq(waitingJob))).thenReturn(jobSize);
			
 
				+                    when(mlMemoryTracker.getJobMemoryRequirement(eq(MlTasks.JOB_TASK_NAME), eq(waitingJob))).thenReturn(jobSize);
			
 
				+                    waitingJobs.add(waitingJob);
			
 
				+                }
			
 
				+                MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+                decider.setUseAuto(true);
			
 
				+
			
 
				+                MlMemoryAutoscalingCapacity scaleUpResult = decider.checkForScaleUp(
			
 
				+                    0,
			
 
				+                    0,
			
 
				+                    List.of(nodeLoadForScaleUp),
			
 
				+                    waitingJobs,
			
 
				+                    List.of(),
			
 
				+                    List.of(),
			
 
				+                    List.of(),
			
 
				+                    null,
			
 
				+                    new NativeMemoryCapacity(lowerTierMemoryForMl, lowerTierMemoryForMl, lowerTierJvmSize)
			
 
				+                ).orElseThrow();
			
 
				+
			
 
				+                long scaledUpTierSizeRequested = scaleUpResult.tierSize().getBytes();
			
 
				+                assertThat(scaledUpTierSizeRequested, greaterThan(lowerTierNodeSize));
			
 
				+                assertThat(scaleUpResult.nodeSize().getBytes(), greaterThanOrEqualTo(lowerTierNodeSize));
			
 
				+                // It's possible that the next tier is above what we consider "higherTier"
			
 
				+                // This is just fine for this test, as long as scale_down does not drop below this tier
			
 
				+                int nextTier = Arrays.binarySearch(nodeTiers, scaledUpTierSizeRequested);
			
 
				+                if (nextTier < 0) {
			
 
				+                    nextTier = -nextTier - 1;
			
 
				+                }
			
 
				+                // It's possible we requested a huge scale up, this is OK, we just don't have validation
			
 
				+                // numbers that exist past a certain point.
			
 
				+                if (nextTier >= nodeTiers.length) {
			
 
				+                    // Start the next iteration of the outermost loop
			
 
				+                    break;
			
 
				+                }
			
 
				+                // Actual scaled up size will likely be bigger than what we asked for
			
 
				+                long scaledUpSize = nodeTiers[nextTier];
			
 
				+                assertThat(scaledUpSize, greaterThanOrEqualTo(scaledUpTierSizeRequested));
			
 
				+                long scaledUpJvmSize = autoNodeTiers.get(nextTier).v2();
			
 
				+                long scaledUpBytesForMl = autoBytesForMl(scaledUpSize, scaledUpJvmSize);
			
 
				+                NodeLoad nodeLoadForScaleDown = forScaleDown.build();
			
 
				+                // It could be that scale down doesn't occur, this is fine as we are "perfectly scaled"
			
 
				+                Optional<MlMemoryAutoscalingCapacity> result = decider.checkForScaleDown(
			
 
				+                    List.of(nodeLoadForScaleDown),
			
 
				+                    maxJobSize,
			
 
				+                    new NativeMemoryCapacity(scaledUpBytesForMl, scaledUpBytesForMl, scaledUpJvmSize)
			
 
				+                );
			
 
				+                // If scale down is present, we don't want to drop below our current tier.
			
 
				+                // If we do, that means that for the same jobs we scaled with, we calculated something incorrectly.
			
 
				+                if (result.isPresent()) {
			
 
				+                    long tierSizeRequired = result.get().tierSize().getBytes();
			
 
				+                    int afterScaleDownTier = Arrays.binarySearch(nodeTiers, tierSizeRequired);
			
 
				+                    if (afterScaleDownTier < 0) {
			
 
				+                        afterScaleDownTier = -afterScaleDownTier - 1;
			
 
				+                    }
			
 
				+                    assertThat(afterScaleDownTier, equalTo(nextTier));
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withNoJobsWaitingNoMlNodes() {
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+
			
 
				+        assertThat(
			
 
				+            decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                List.of(), // node loads when there are no ML nodes
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                NativeMemoryCapacity.ZERO // current scale when there are no ML nodes
			
 
				+            ),
			
 
				+            equalTo(Optional.empty())
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingJobsAndAutoMemoryAndNoRoomInNodes() {
			
 
				+        ByteSizeValue anomalyDetectorJobSize = ByteSizeValue.ofGb(randomIntBetween(2, 4));
			
 
				+        ByteSizeValue analyticsJobSize = ByteSizeValue.ofGb(randomIntBetween(2, 4));
			
 
				+        when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(any())).thenReturn(anomalyDetectorJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(eq(MlTasks.JOB_TASK_NAME), any())).thenReturn(anomalyDetectorJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(any())).thenReturn(analyticsJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(eq(MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME), any())).thenReturn(
			
 
				+            analyticsJobSize.getBytes()
			
 
				+        );
			
 
				+        List<String> jobTasks = List.of("waiting_job", "waiting_job_2");
			
 
				+        List<String> analytics = List.of("analytics_waiting");
			
 
				+        List<NodeLoad> fullyLoadedNode = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(anomalyDetectorJobSize.getBytes())
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(anomalyDetectorJobSize.getBytes(), anomalyDetectorJobSize.getBytes());
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        { // No time in queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.nodeSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.tierSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(
			
 
				+                allowedBytesForMlNode,
			
 
				+                greaterThanOrEqualTo(Math.max(anomalyDetectorJobSize.getBytes(), analyticsJobSize.getBytes()) + PER_NODE_OVERHEAD)
			
 
				+            );
			
 
				+            assertThat(
			
 
				+                allowedBytesForMlTier,
			
 
				+                greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() * 3 + analyticsJobSize.getBytes() + PER_NODE_OVERHEAD)
			
 
				+            );
			
 
				+        }
			
 
				+        { // we allow one job in the analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.nodeSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.tierSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() * 3 + PER_NODE_OVERHEAD));
			
 
				+        }
			
 
				+        { // we allow one job in the anomaly queue and analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                1,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.nodeSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.tierSize().getBytes(),
			
 
				+                randomIntBetween(5, 90), // irrelevant because auto is true
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() * 2 + PER_NODE_OVERHEAD));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingSnapshotUpgradesAndAutoMemoryAndNoRoomInNodes() {
			
 
				+        ByteSizeValue anomalyDetectorJobSize = ByteSizeValue.ofGb(randomIntBetween(2, 8));
			
 
				+        ByteSizeValue analyticsJobSize = ByteSizeValue.ofGb(randomIntBetween(2, 8));
			
 
				+        when(mlMemoryTracker.getAnomalyDetectorJobMemoryRequirement(any())).thenReturn(anomalyDetectorJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(eq(MlTasks.JOB_TASK_NAME), any())).thenReturn(anomalyDetectorJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getDataFrameAnalyticsJobMemoryRequirement(any())).thenReturn(analyticsJobSize.getBytes());
			
 
				+        when(mlMemoryTracker.getJobMemoryRequirement(eq(MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME), any())).thenReturn(
			
 
				+            analyticsJobSize.getBytes()
			
 
				+        );
			
 
				+        List<String> snapshotUpgradeTasks = List.of("waiting_upgrade", "waiting_upgrade_2");
			
 
				+        List<NodeLoad> fullyLoadedNode = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofGb(1).getBytes());
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        { // No time in queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                fullyLoadedNode,
			
 
				+                List.of(),
			
 
				+                snapshotUpgradeTasks,
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(result.nodeSize().getBytes(), 30, true);
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(result.tierSize().getBytes(), 30, true);
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() * 2 + PER_NODE_OVERHEAD));
			
 
				+        }
			
 
				+        { // we allow one job in the analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                List.of(),
			
 
				+                snapshotUpgradeTasks,
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(result.nodeSize().getBytes(), 30, true);
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(result.tierSize().getBytes(), 30, true);
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() * 2 + PER_NODE_OVERHEAD));
			
 
				+        }
			
 
				+        { // we allow one job in the anomaly queue and analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                1,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                List.of(),
			
 
				+                snapshotUpgradeTasks,
			
 
				+                List.of(),
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                NativeMemoryCapacity.ZERO
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(result.nodeSize().getBytes(), 30, true);
			
 
				+            // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+            // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(result.tierSize().getBytes(), 30, true);
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(anomalyDetectorJobSize.getBytes() + PER_NODE_OVERHEAD));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingJobsAndRoomInNodes() {
			
 
				+        List<String> jobTasks = List.of("waiting_job", "waiting_job_2");
			
 
				+        List<String> analytics = List.of("analytics_waiting");
			
 
				+        // Two small nodes in cluster, so simulate two availability zones
			
 
				+        when(nodeAvailabilityZoneMapper.getNumMlAvailabilityZones()).thenReturn(OptionalInt.of(2));
			
 
				+        List<NodeLoad> nodesWithRoom = List.of(
			
 
				+            NodeLoad.builder("partially_filled")
			
 
				+                .setMaxMemory(2 * TEST_JOB_SIZE + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .setMaxJobs(10)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(TEST_JOB_SIZE)
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build(),
			
 
				+            NodeLoad.builder("not_filled").setMaxMemory(TEST_JOB_SIZE + PER_NODE_OVERHEAD).setMaxJobs(10).setUseMemory(true).build()
			
 
				+        );
			
 
				+        // Current scale needs to be set to total cluster allowance for ML excluding per-node overhead
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(3 * TEST_JOB_SIZE, TEST_JOB_SIZE);
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        // No time in queue, should be able to assign all but one job given the current node load
			
 
				+        {
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                nodesWithRoom,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertTrue(decision.isPresent());
			
 
				+            // It's four times because the native memory percentage is 25.
			
 
				+            assertThat(decision.get().nodeSize().getBytes(), equalTo(4 * (TEST_JOB_SIZE + PER_NODE_OVERHEAD)));
			
 
				+            // In the scaled up cluster we're going to have 4 jobs and 2 node overheads. Then multiply by 4 again as 25% ML memory.
			
 
				+            assertThat(decision.get().tierSize().getBytes(), equalTo(4 * (4 * TEST_JOB_SIZE + 2 * PER_NODE_OVERHEAD)));
			
 
				+        }
			
 
				+        // We allow one job in the analytics queue, so no need to scale as both anomaly detection jobs will fit
			
 
				+        {
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                1,
			
 
				+                nodesWithRoom,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isPresent());
			
 
				+        }
			
 
				+        // We allow one job in the anomaly detection queue, so no need to scale as one anomaly detection job and the analytics job will fit
			
 
				+        {
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                1,
			
 
				+                0,
			
 
				+                nodesWithRoom,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isPresent());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingJobsAndNoRoomInNodes() {
			
 
				+        List<String> jobTasks = List.of("waiting_job", "waiting_job_2");
			
 
				+        List<String> analytics = List.of("analytics_waiting");
			
 
				+        List<NodeLoad> fullyLoadedNode = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        // Current scale needs to be set to total cluster allowance for ML excluding per-node overhead
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofGb(1).getBytes());
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        { // No time in queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            // Existing 1GB job is bigger than the waiting TEST_JOB_SIZE, and node requirement is based on the larger value
			
 
				+            assertThat(decision.get().nodeSize().getBytes(), equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)));
			
 
				+            assertThat(
			
 
				+                decision.get().tierSize().getBytes(),
			
 
				+                equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + 3 * TEST_JOB_SIZE + PER_NODE_OVERHEAD))
			
 
				+            );
			
 
				+        }
			
 
				+        { // we allow one job in the analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            // Existing 1GB job is bigger than the waiting TEST_JOB_SIZE, and node requirement is based on the larger value
			
 
				+            assertThat(decision.get().nodeSize().getBytes(), equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)));
			
 
				+            assertThat(
			
 
				+                decision.get().tierSize().getBytes(),
			
 
				+                equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + 2 * TEST_JOB_SIZE + PER_NODE_OVERHEAD))
			
 
				+            );
			
 
				+        }
			
 
				+        { // we allow one job in the anomaly queue and analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                1,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            // Existing 1GB job is bigger than the waiting TEST_JOB_SIZE, and node requirement is based on the larger value
			
 
				+            assertThat(decision.get().nodeSize().getBytes(), equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)));
			
 
				+            assertThat(
			
 
				+                decision.get().tierSize().getBytes(),
			
 
				+                equalTo(4 * (ByteSizeValue.ofGb(1).getBytes() + TEST_JOB_SIZE + PER_NODE_OVERHEAD))
			
 
				+            );
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingJobsAndSomeRoomInNodes() {
			
 
				+        List<String> jobTasks = List.of("waiting_job");
			
 
				+        List<String> analytics = List.of("analytics_waiting");
			
 
				+        List<NodeLoad> nearlyFullyLoadedNode = List.of(
			
 
				+            // Free space on this node is _nearly_ enough for another job but not quite
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(2 * TEST_JOB_SIZE - ByteSizeValue.ofMb(1).getBytes() + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(TEST_JOB_SIZE)
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        // Current scale needs to be set to total cluster allowance for ML excluding per-node overhead
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(
			
 
				+            2 * TEST_JOB_SIZE - ByteSizeValue.ofMb(1).getBytes(),
			
 
				+            2 * TEST_JOB_SIZE - ByteSizeValue.ofMb(1).getBytes()
			
 
				+        );
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        { // No time in queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                0,
			
 
				+                nearlyFullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            // We won't ask for a smaller node than the current scale on a scale up even
			
 
				+            // though we theoretically could tolerate smaller nodes but more of them
			
 
				+            assertThat(
			
 
				+                decision.get().nodeSize().getBytes(),
			
 
				+                equalTo(4 * (2 * TEST_JOB_SIZE - ByteSizeValue.ofMb(1).getBytes() + PER_NODE_OVERHEAD))
			
 
				+            );
			
 
				+            // The important thing here is that the free space that was nearly enough for another job is _not_ added in again
			
 
				+            assertThat(decision.get().tierSize().getBytes(), equalTo(4 * (3 * TEST_JOB_SIZE + PER_NODE_OVERHEAD)));
			
 
				+        }
			
 
				+        { // we allow one job in the analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                0,
			
 
				+                1,
			
 
				+                nearlyFullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            // We won't ask for a smaller node than the current scale on a scale up even
			
 
				+            // though we theoretically could tolerate smaller nodes but more of them
			
 
				+            assertThat(
			
 
				+                decision.get().nodeSize().getBytes(),
			
 
				+                equalTo(4 * (2 * TEST_JOB_SIZE - ByteSizeValue.ofMb(1).getBytes() + PER_NODE_OVERHEAD))
			
 
				+            );
			
 
				+            // The important thing here is that the free space that was nearly enough for another job is _not_ added in again
			
 
				+            // (so we are asking for a very tiny scale up here - just enough for 1MB extra ML memory)
			
 
				+            assertThat(decision.get().tierSize().getBytes(), equalTo(4 * (2 * TEST_JOB_SIZE + PER_NODE_OVERHEAD)));
			
 
				+        }
			
 
				+        { // we allow one job in the anomaly queue and analytics queue
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                1,
			
 
				+                1,
			
 
				+                nearlyFullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertTrue(decision.isEmpty());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingJobs_WithFutureCapacity() {
			
 
				+        List<String> jobTasks = List.of("waiting_job", "waiting_job_2");
			
 
				+        List<String> analytics = List.of("analytics_waiting");
			
 
				+        List<NodeLoad> fullyLoadedNode = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(
			
 
				+            ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD,
			
 
				+            ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD
			
 
				+        );
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        { // with null future capacity and current capacity is full
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                2,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                null,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertTrue(decision.isEmpty()); // means "don't know" in this case
			
 
				+        }
			
 
				+        { // current capacity is full but the existing job is expected to terminate and free up all its resources
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                2,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                currentScale,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertTrue(decision.isEmpty()); // means "OK to wait for future capacity"
			
 
				+        }
			
 
				+        { // with no future capacity (i.e. current jobs expected to run forever) and current capacity is full
			
 
				+            Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+                2,
			
 
				+                1,
			
 
				+                fullyLoadedNode,
			
 
				+                jobTasks,
			
 
				+                List.of(),
			
 
				+                analytics,
			
 
				+                List.of(),
			
 
				+                NativeMemoryCapacity.ZERO,
			
 
				+                currentScale
			
 
				+            );
			
 
				+            assertFalse(decision.isEmpty());
			
 
				+            assertThat(decision.get().nodeSize().getBytes(), equalTo(ByteSizeValue.ofGb(4).getBytes()));
			
 
				+            // For the tier we'll need enough for the current 1GB of usage plus 3 new 200MB jobs,
			
 
				+            // so with 25% ML memory percent we need 4 * 1624MB
			
 
				+            assertThat(decision.get().tierSize().getBytes(), equalTo(ByteSizeValue.ofMb(6496).getBytes()));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingModelAndAutoMemoryAndNoRoomInNodes() {
			
 
				+        when(mlMemoryTracker.getTrainedModelAssignmentMemoryRequirement(any())).thenReturn(ByteSizeValue.ofGb(2).getBytes());
			
 
				+        List<NodeLoad> fullyLoadedNode = List.of(
			
 
				+            NodeLoad.builder("any")
			
 
				+                .setMaxMemory(ByteSizeValue.ofGb(1).getBytes() + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build()
			
 
				+        );
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(ByteSizeValue.ofGb(1).getBytes(), ByteSizeValue.ofGb(1).getBytes());
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+            0,
			
 
				+            0,
			
 
				+            fullyLoadedNode,
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            List.of("foo"),
			
 
				+            null,
			
 
				+            currentScale
			
 
				+        );
			
 
				+        assertFalse(decision.isEmpty());
			
 
				+        MlMemoryAutoscalingCapacity result = decision.get();
			
 
				+        long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(result.nodeSize().getBytes(), 30, true);
			
 
				+        // Note: with more than 1 job involved this calculation could be a wild overestimate. We get away
			
 
				+        // with it here because all the jobs fit on one node. This is not how the production code works.
			
 
				+        long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(result.tierSize().getBytes(), 30, true);
			
 
				+        assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + PER_NODE_OVERHEAD));
			
 
				+        assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + PER_NODE_OVERHEAD));
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleUp_withWaitingModelsAndRoomInNodes() {
			
 
				+        // Two small nodes in cluster, so simulate two availability zones
			
 
				+        when(nodeAvailabilityZoneMapper.getNumMlAvailabilityZones()).thenReturn(OptionalInt.of(2));
			
 
				+        List<NodeLoad> nodesWithRoom = List.of(
			
 
				+            NodeLoad.builder("partially_filled")
			
 
				+                .setMaxMemory(2 * TEST_JOB_SIZE + PER_NODE_OVERHEAD)
			
 
				+                .setUseMemory(true)
			
 
				+                .setMaxJobs(10)
			
 
				+                .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                .incAssignedAnomalyDetectorMemory(TEST_JOB_SIZE)
			
 
				+                .incNumAssignedAnomalyDetectorJobs()
			
 
				+                .build(),
			
 
				+            NodeLoad.builder("not_filled").setMaxMemory(TEST_JOB_SIZE + PER_NODE_OVERHEAD).setMaxJobs(10).setUseMemory(true).build()
			
 
				+        );
			
 
				+        NativeMemoryCapacity currentScale = new NativeMemoryCapacity(3 * TEST_JOB_SIZE, TEST_JOB_SIZE);
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        Optional<MlMemoryAutoscalingCapacity> decision = decider.checkForScaleUp(
			
 
				+            0,
			
 
				+            0,
			
 
				+            nodesWithRoom,
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            List.of(),
			
 
				+            List.of("foo", "bar", "baz"),
			
 
				+            null,
			
 
				+            currentScale
			
 
				+        );
			
 
				+        assertTrue(decision.isPresent());
			
 
				+        assertThat(decision.get().nodeSize().getBytes(), equalTo(4 * (TEST_JOB_SIZE + PER_NODE_OVERHEAD)));
			
 
				+        assertThat(decision.get().tierSize().getBytes(), equalTo(4 * (4 * TEST_JOB_SIZE + 2 * PER_NODE_OVERHEAD)));
			
 
				+        assertFalse(
			
 
				+            decider.checkForScaleUp(1, 0, nodesWithRoom, List.of(), List.of(), List.of(), List.of("foo", "bar"), null, currentScale)
			
 
				+                .isPresent()
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testScaleDown() {
			
 
				+        when(nodeAvailabilityZoneMapper.getNumMlAvailabilityZones()).thenReturn(OptionalInt.of(3));
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setMaxMachineMemoryPercent(25);
			
 
				+        MlScalingReason.Builder reasonBuilder = new MlScalingReason.Builder(new MlAutoscalingContext()).setPassedConfiguration(
			
 
				+            Settings.EMPTY
			
 
				+        ).setCurrentMlCapacity(AutoscalingCapacity.ZERO);
			
 
				+        { // Current capacity allows for smaller node
			
 
				+            List<NodeLoad> nodeLoads = List.of(
			
 
				+                NodeLoad.builder("foo")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(5).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("bar")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(5).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("baz")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(5).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build()
			
 
				+            );
			
 
				+            Optional<MlMemoryAutoscalingCapacity> result = decider.checkForScaleDown(
			
 
				+                nodeLoads,
			
 
				+                ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD,
			
 
				+                new NativeMemoryCapacity(
			
 
				+                    ByteSizeValue.ofGb(15).getBytes() - 3 * PER_NODE_OVERHEAD,
			
 
				+                    ByteSizeValue.ofGb(5).getBytes() - PER_NODE_OVERHEAD
			
 
				+                )
			
 
				+            );
			
 
				+            assertThat(result.isEmpty(), is(false));
			
 
				+            MlMemoryAutoscalingCapacity deciderResult = result.get();
			
 
				+            // Four times due to 25% ML memory
			
 
				+            assertThat(deciderResult.nodeSize().getBytes(), equalTo(4 * ByteSizeValue.ofGb(1).getBytes()));
			
 
				+            assertThat(deciderResult.tierSize().getBytes(), equalTo(ByteSizeValue.ofGb(12).getBytes()));
			
 
				+        }
			
 
				+        { // Current capacity allows for smaller tier
			
 
				+            List<NodeLoad> nodeLoads = List.of(
			
 
				+                NodeLoad.builder("foo")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("bar")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("baz")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofGb(1).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build()
			
 
				+            );
			
 
				+            Optional<MlMemoryAutoscalingCapacity> result = decider.checkForScaleDown(
			
 
				+                nodeLoads,
			
 
				+                ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD,
			
 
				+                new NativeMemoryCapacity(
			
 
				+                    ByteSizeValue.ofGb(3).getBytes() - 3 * PER_NODE_OVERHEAD,
			
 
				+                    ByteSizeValue.ofGb(1).getBytes() - PER_NODE_OVERHEAD
			
 
				+                )
			
 
				+            );
			
 
				+            assertThat(result.isEmpty(), is(false));
			
 
				+            MlMemoryAutoscalingCapacity deciderResult = result.get();
			
 
				+            // Four times due to 25% ML memory
			
 
				+            assertThat(deciderResult.nodeSize().getBytes(), equalTo(4 * ByteSizeValue.ofMb(100).getBytes()));
			
 
				+            assertThat(deciderResult.tierSize().getBytes(), equalTo(ByteSizeValue.ofMb(100).getBytes() * 12));
			
 
				+        }
			
 
				+        { // Scale down is not really possible
			
 
				+            List<NodeLoad> nodeLoads = List.of(
			
 
				+                NodeLoad.builder("foo")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofMb(100).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("bar")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofMb(100).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build(),
			
 
				+                NodeLoad.builder("baz")
			
 
				+                    .setMaxMemory(ByteSizeValue.ofMb(100).getBytes())
			
 
				+                    .incAssignedNativeCodeOverheadMemory(PER_NODE_OVERHEAD)
			
 
				+                    .incAssignedAnomalyDetectorMemory(ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD)
			
 
				+                    .incNumAssignedAnomalyDetectorJobs()
			
 
				+                    .build()
			
 
				+            );
			
 
				+            Optional<MlMemoryAutoscalingCapacity> result = decider.checkForScaleDown(
			
 
				+                nodeLoads,
			
 
				+                ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD,
			
 
				+                new NativeMemoryCapacity(
			
 
				+                    ByteSizeValue.ofMb(300).getBytes() - 3 * PER_NODE_OVERHEAD,
			
 
				+                    ByteSizeValue.ofMb(100).getBytes() - PER_NODE_OVERHEAD
			
 
				+                )
			
 
				+            );
			
 
				+            assertThat(result.isEmpty(), is(true));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public void testCpuModelAssignmentRequirements() {
			
 
				+        assertTrue(
			
 
				+            MlMemoryAutoscalingDecider.modelAssignmentsRequireMoreThanHalfCpu(
			
 
				+                List.of(
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 3, 2, 100, null)
			
 
				+                    ).build(),
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 1, 1, 100, null)
			
 
				+                    ).build()
			
 
				+                ),
			
 
				+                withMlNodes("ml_node_1", "ml_node_2")
			
 
				+            )
			
 
				+        );
			
 
				+        assertTrue(
			
 
				+            MlMemoryAutoscalingDecider.modelAssignmentsRequireMoreThanHalfCpu(
			
 
				+                List.of(
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 3, 1, 100, null)
			
 
				+                    ).build(),
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 1, 1, 100, null)
			
 
				+                    ).build()
			
 
				+                ),
			
 
				+                withMlNodes("ml_node_1", "ml_node_2")
			
 
				+            )
			
 
				+        );
			
 
				+        assertFalse(
			
 
				+            MlMemoryAutoscalingDecider.modelAssignmentsRequireMoreThanHalfCpu(
			
 
				+                List.of(
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 3, 1, 100, null)
			
 
				+                    ).build(),
			
 
				+                    TrainedModelAssignment.Builder.empty(
			
 
				+                        new StartTrainedModelDeploymentAction.TaskParams("model1", TEST_JOB_SIZE, 1, 1, 100, null)
			
 
				+                    ).build()
			
 
				+                ),
			
 
				+                withMlNodes("ml_node_1", "ml_node_2", "ml_node_3", "ml_node_4")
			
 
				+            )
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testEnsureScaleDown() {
			
 
				+        assertThat(
			
 
				+            MlMemoryAutoscalingDecider.ensureScaleDown(
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(1), ByteSizeValue.ofGb(8)).build(),
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(2), ByteSizeValue.ofGb(4)).build()
			
 
				+            ),
			
 
				+            equalTo(MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(1), ByteSizeValue.ofGb(4)).build())
			
 
				+        );
			
 
				+
			
 
				+        assertThat(
			
 
				+            MlMemoryAutoscalingDecider.ensureScaleDown(
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(3), ByteSizeValue.ofGb(8)).build(),
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(2), ByteSizeValue.ofGb(4)).build()
			
 
				+            ),
			
 
				+            equalTo(MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(2), ByteSizeValue.ofGb(4)).build())
			
 
				+        );
			
 
				+
			
 
				+        assertThat(
			
 
				+            MlMemoryAutoscalingDecider.ensureScaleDown(
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(3), ByteSizeValue.ofGb(4)).build(),
			
 
				+                MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(2), ByteSizeValue.ofGb(3)).build()
			
 
				+            ),
			
 
				+            equalTo(MlMemoryAutoscalingCapacity.builder(ByteSizeValue.ofGb(2), ByteSizeValue.ofGb(3)).build())
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testFutureAvailableCapacity() {
			
 
				+        nodeLoadDetector = new NodeLoadDetector(mlMemoryTracker);
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        boolean waitingAnalytics = randomBoolean();
			
 
				+        boolean waitingAnomalyJobs = waitingAnalytics == false || randomBoolean();
			
 
				+        int maxWaitingAnalytics = randomIntBetween(1, 2);
			
 
				+        int maxWaitingAnomaly = randomIntBetween(1, 2);
			
 
				+        List<String> assignedAnomalyJobs = randomList(0, 2, () -> randomAlphaOfLength(10));
			
 
				+        List<String> batchAnomalyJobs = randomList(0, 2, () -> randomAlphaOfLength(10));
			
 
				+        List<String> assignedAnalyticsJobs = randomList(0, 2, () -> randomAlphaOfLength(10));
			
 
				+        ClusterState clusterState = clusterState(
			
 
				+            assignedAnomalyJobs,
			
 
				+            batchAnomalyJobs,
			
 
				+            assignedAnalyticsJobs,
			
 
				+            waitingAnomalyJobs ? randomList(1, maxWaitingAnomaly, () -> randomAlphaOfLength(10)) : List.of(),
			
 
				+            waitingAnalytics ? randomList(1, maxWaitingAnalytics, () -> randomAlphaOfLength(10)) : List.of()
			
 
				+        );
			
 
				+
			
 
				+        Collection<DiscoveryNode> mlNodesInCluster = clusterState.getNodes().getNodes().values();
			
 
				+        Optional<NativeMemoryCapacity> nativeMemoryCapacity = decider.calculateFutureAvailableCapacity(mlNodesInCluster, clusterState);
			
 
				+        assertThat(nativeMemoryCapacity.isEmpty(), is(false));
			
 
				+        assertThat(nativeMemoryCapacity.get().getNodeMlNativeMemoryRequirementExcludingOverhead(), greaterThanOrEqualTo(TEST_JOB_SIZE));
			
 
				+        assertThat(
			
 
				+            nativeMemoryCapacity.get().getNodeMlNativeMemoryRequirementExcludingOverhead(),
			
 
				+            lessThanOrEqualTo(ML_MEMORY_FOR_TEST_NODE_SIZE)
			
 
				+        );
			
 
				+        assertThat(
			
 
				+            nativeMemoryCapacity.get().getTierMlNativeMemoryRequirementExcludingOverhead(),
			
 
				+            greaterThanOrEqualTo(TEST_JOB_SIZE * (assignedAnalyticsJobs.size() + batchAnomalyJobs.size()))
			
 
				+        );
			
 
				+        assertThat(
			
 
				+            nativeMemoryCapacity.get().getTierMlNativeMemoryRequirementExcludingOverhead(),
			
 
				+            lessThanOrEqualTo(mlNodesInCluster.size() * (ML_MEMORY_FOR_TEST_NODE_SIZE - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()))
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testScale_WithNoScaleUpButWaitingJobs() {
			
 
				+        nodeLoadDetector = new NodeLoadDetector(mlMemoryTracker);
			
 
				+        MlMemoryAutoscalingDecider decider = buildDecider();
			
 
				+        decider.setUseAuto(true);
			
 
				+        boolean waitingAnalytics = randomBoolean();
			
 
				+        boolean waitingAnomalyJobs = waitingAnalytics == false || randomBoolean();
			
 
				+        int maxWaitingAnalytics = randomIntBetween(1, 2);
			
 
				+        int maxWaitingAnomaly = randomIntBetween(1, 2);
			
 
				+        ClusterState clusterState = clusterState(
			
 
				+            randomList(0, 2, () -> randomAlphaOfLength(10)),
			
 
				+            randomList(0, 2, () -> randomAlphaOfLength(10)),
			
 
				+            randomList(0, 2, () -> randomAlphaOfLength(10)),
			
 
				+            waitingAnomalyJobs ? randomList(1, maxWaitingAnomaly, () -> randomAlphaOfLength(10)) : List.of(),
			
 
				+            waitingAnalytics ? randomList(1, maxWaitingAnalytics, () -> randomAlphaOfLength(10)) : List.of()
			
 
				+        );
			
 
				+
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .put(MlAutoscalingDeciderService.NUM_ANALYTICS_JOBS_IN_QUEUE.getKey(), maxWaitingAnalytics)
			
 
				+            .put(MlAutoscalingDeciderService.NUM_ANOMALY_JOBS_IN_QUEUE.getKey(), maxWaitingAnomaly)
			
 
				+            .build();
			
 
				+        AutoscalingCapacity autoscalingCapacity = new AutoscalingCapacity(
			
 
				+            new AutoscalingCapacity.AutoscalingResources(ByteSizeValue.ofGb(1), ByteSizeValue.ofGb(1), null),
			
 
				+            new AutoscalingCapacity.AutoscalingResources(ByteSizeValue.ofGb(1), ByteSizeValue.ofGb(1), null)
			
 
				+        );
			
 
				+
			
 
				+        DeciderContext deciderContext = new DeciderContext(clusterState, autoscalingCapacity);
			
 
				+        MlAutoscalingContext mlAutoscalingContext = new MlAutoscalingContext(clusterState);
			
 
				+
			
 
				+        MlMemoryAutoscalingCapacity result = decider.scale(settings, deciderContext, mlAutoscalingContext);
			
 
				+        assertThat(result.reason(), containsString("but the number in the queue is less than the configured maximum allowed"));
			
 
				+        assertThat(result.nodeSize(), equalTo(ByteSizeValue.ofGb(1)));
			
 
				+        assertThat(result.tierSize(), equalTo(ByteSizeValue.ofGb(1)));
			
 
				+    }
			
 
				+
			
 
				+    private MlMemoryAutoscalingDecider buildDecider() {
			
 
				+        return new MlMemoryAutoscalingDecider(
			
 
				+            settings,
			
 
				+            clusterService,
			
 
				+            nodeAvailabilityZoneMapper,
			
 
				+            nodeLoadDetector,
			
 
				+            new ScaleTimer(timeSupplier)
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    private static ClusterState clusterState(
			
 
				+        List<String> ongoingAnomalyTasks,
			
 
				+        List<String> batchAnomalyTasks,
			
 
				+        List<String> analyticsTasks,
			
 
				+        List<String> waitingAnomalyTasks,
			
 
				+        List<String> waitingAnalyticsTasks
			
 
				+    ) {
			
 
				+        List<String> nodeNames = List.of("_node_id1", "_node_id2", "_node_id3");
			
 
				+        List<DiscoveryNode> nodeList = withMlNodes(nodeNames.toArray(String[]::new));
			
 
				+        DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
			
 
				+        for (DiscoveryNode node : nodeList) {
			
 
				+            nodesBuilder.add(node);
			
 
				+        }
			
 
				+        PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder();
			
 
				+        for (String jobId : ongoingAnomalyTasks) {
			
 
				+            OpenJobPersistentTasksExecutorTests.addJobTask(
			
 
				+                jobId,
			
 
				+                randomFrom(nodeNames),
			
 
				+                randomFrom(JobState.CLOSING, JobState.OPENED, JobState.OPENING, null),
			
 
				+                tasksBuilder
			
 
				+            );
			
 
				+        }
			
 
				+        for (String jobId : batchAnomalyTasks) {
			
 
				+            String nodeAssignment = randomFrom(nodeNames);
			
 
				+            OpenJobPersistentTasksExecutorTests.addJobTask(
			
 
				+                jobId,
			
 
				+                nodeAssignment,
			
 
				+                randomFrom(JobState.CLOSING, JobState.OPENED, JobState.OPENING, null),
			
 
				+                tasksBuilder
			
 
				+            );
			
 
				+            StartDatafeedAction.DatafeedParams dfParams = new StartDatafeedAction.DatafeedParams(jobId + "-datafeed", 0);
			
 
				+            dfParams.setEndTime(new Date().getTime());
			
 
				+            tasksBuilder.addTask(
			
 
				+                MlTasks.datafeedTaskId(jobId + "-datafeed"),
			
 
				+                MlTasks.DATAFEED_TASK_NAME,
			
 
				+                dfParams,
			
 
				+                new PersistentTasksCustomMetadata.Assignment(nodeAssignment, "test")
			
 
				+            );
			
 
				+        }
			
 
				+        for (String analyticsId : analyticsTasks) {
			
 
				+            addAnalyticsTask(
			
 
				+                analyticsId,
			
 
				+                randomFrom(nodeNames),
			
 
				+                randomFrom(
			
 
				+                    DataFrameAnalyticsState.STARTED,
			
 
				+                    DataFrameAnalyticsState.REINDEXING,
			
 
				+                    DataFrameAnalyticsState.ANALYZING,
			
 
				+                    DataFrameAnalyticsState.STOPPING,
			
 
				+                    DataFrameAnalyticsState.STARTING
			
 
				+                ),
			
 
				+                tasksBuilder
			
 
				+            );
			
 
				+        }
			
 
				+        for (String job : waitingAnalyticsTasks) {
			
 
				+            addAnalyticsTask(job, null, null, tasksBuilder);
			
 
				+        }
			
 
				+        for (String job : waitingAnomalyTasks) {
			
 
				+            addJobTask(job, null, null, tasksBuilder);
			
 
				+        }
			
 
				+        PersistentTasksCustomMetadata tasks = tasksBuilder.build();
			
 
				+        ClusterState.Builder cs = ClusterState.builder(new ClusterName("_name"));
			
 
				+        cs.nodes(nodesBuilder);
			
 
				+        Metadata.Builder metadata = Metadata.builder();
			
 
				+        metadata.putCustom(PersistentTasksCustomMetadata.TYPE, tasks);
			
 
				+        cs.metadata(metadata);
			
 
				+        return cs.build();
			
 
				+    }
			
 
				+
			
 
				+    private static List<DiscoveryNode> withMlNodes(String... nodeName) {
			
 
				+        return Arrays.stream(nodeName)
			
 
				+            .map(
			
 
				+                n -> new DiscoveryNode(
			
 
				+                    n,
			
 
				+                    buildNewFakeTransportAddress(),
			
 
				+                    Map.of(
			
 
				+                        MachineLearning.MACHINE_MEMORY_NODE_ATTR,
			
 
				+                        String.valueOf(TEST_NODE_SIZE),
			
 
				+                        MachineLearning.MAX_JVM_SIZE_NODE_ATTR,
			
 
				+                        String.valueOf(TEST_JVM_SIZE),
			
 
				+                        MachineLearning.ALLOCATED_PROCESSORS_NODE_ATTR,
			
 
				+                        String.valueOf(TEST_ALLOCATED_PROCESSORS)
			
 
				+                    ),
			
 
				+                    Set.of(DiscoveryNodeRole.ML_ROLE),
			
 
				+                    Version.CURRENT
			
 
				+                )
			
 
				+            )
			
 
				+            .toList();
			
 
				+    }
			
 
				+
			
 
				+    public static void addAnalyticsTask(
			
 
				+        String jobId,
			
 
				+        String nodeId,
			
 
				+        DataFrameAnalyticsState jobState,
			
 
				+        PersistentTasksCustomMetadata.Builder builder
			
 
				+    ) {
			
 
				+        builder.addTask(
			
 
				+            MlTasks.dataFrameAnalyticsTaskId(jobId),
			
 
				+            MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME,
			
 
				+            new StartDataFrameAnalyticsAction.TaskParams(jobId, Version.CURRENT, true),
			
 
				+            nodeId == null ? AWAITING_LAZY_ASSIGNMENT : new PersistentTasksCustomMetadata.Assignment(nodeId, "test assignment")
			
 
				+        );
			
 
				+        if (jobState != null) {
			
 
				+            builder.updateTaskState(
			
 
				+                MlTasks.dataFrameAnalyticsTaskId(jobId),
			
 
				+                new DataFrameAnalyticsTaskState(jobState, builder.getLastAllocationId(), null)
			
 
				+            );
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public static void addJobTask(String jobId, String nodeId, JobState jobState, PersistentTasksCustomMetadata.Builder builder) {
			
 
				+        builder.addTask(
			
 
				+            MlTasks.jobTaskId(jobId),
			
 
				+            MlTasks.JOB_TASK_NAME,
			
 
				+            new OpenJobAction.JobParams(jobId),
			
 
				+            nodeId == null ? AWAITING_LAZY_ASSIGNMENT : new PersistentTasksCustomMetadata.Assignment(nodeId, "test assignment")
			
 
				+        );
			
 
				+        if (jobState != null) {
			
 
				+            builder.updateTaskState(MlTasks.jobTaskId(jobId), new JobTaskState(jobState, builder.getLastAllocationId(), null));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    static class DeciderContext implements AutoscalingDeciderContext {
			
 
				+
			
 
				+        private final ClusterState state;
			
 
				+        private final AutoscalingCapacity capacity;
			
 
				+
			
 
				+        DeciderContext(ClusterState state, AutoscalingCapacity capacity) {
			
 
				+            this.state = state;
			
 
				+            this.capacity = capacity;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public ClusterState state() {
			
 
				+            return state;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public AutoscalingCapacity currentCapacity() {
			
 
				+            return capacity;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Set<DiscoveryNode> nodes() {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Set<DiscoveryNodeRole> roles() {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public ClusterInfo info() {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public SnapshotShardSizeInfo snapshotShardSizeInfo() {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public void ensureNotCancelled() {
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private static long autoBytesForMl(Long nodeSize, Long jvmSize) {
			
 
				+        return NativeMemoryCalculator.allowedBytesForMl(
			
 
				+            new DiscoveryNode(
			
 
				+                "node",
			
 
				+                ESTestCase.buildNewFakeTransportAddress(),
			
 
				+                Map.of(MAX_JVM_SIZE_NODE_ATTR, jvmSize.toString(), MACHINE_MEMORY_NODE_ATTR, nodeSize.toString()),
			
 
				+                Set.of(DiscoveryNodeRole.ML_ROLE),
			
 
				+                Version.CURRENT
			
 
				+            ),
			
 
				+            0, // passing 0 proves auto is used
			
 
				+            true
			
 
				+        ).orElseThrow();
			
 
				+    }
			
 
				+}
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
@@ -9,7 +9,6 @@ package org.elasticsearch.xpack.ml.autoscaling;
 
				 
			
 
				 import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				 import org.elasticsearch.test.ESTestCase;
			
 
				-import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				 import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				 
			
 
				 import java.util.function.BiConsumer;
			
@@ -48,14 +47,14 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				      */
			
 
				     public void testAutoscalingCapacityFromZero() {
			
 
				 
			
 
				-        AutoscalingCapacity autoscalingCapacity = NativeMemoryCapacity.ZERO.autoscalingCapacity(
			
 
				+        MlMemoryAutoscalingCapacity autoscalingCapacity = NativeMemoryCapacity.ZERO.autoscalingCapacity(
			
 
				             randomIntBetween(5, 90),
			
 
				             randomBoolean(),
			
 
				             randomLongBetween(100000000L, 10000000000L),
			
 
				             randomIntBetween(0, 3)
			
 
				-        );
			
 
				-        assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(0L));
			
 
				-        assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(0L));
			
 
				+        ).build();
			
 
				+        assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(0L));
			
 
				+        assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(0L));
			
 
				     }
			
 
				 
			
 
				     public void testAutoscalingCapacity() {
			
@@ -71,25 +70,25 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				 
			
 
				         // auto is false (which should not be when autoscaling is used as intended)
			
 
				         {
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 25,
			
 
				                 false,
			
 
				                 NativeMemoryCalculator.allowedBytesForMl(BYTES_IN_64GB, 25, false),
			
 
				                 1
			
 
				-            );
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(ByteSizeValue.ofGb(1).getBytes() * 4L));
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(ByteSizeValue.ofGb(4).getBytes() * 4L));
			
 
				+            ).build();
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(ByteSizeValue.ofGb(1).getBytes() * 4L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(ByteSizeValue.ofGb(4).getBytes() * 4L));
			
 
				         }
			
 
				         // auto is true (so configured max memory percent should be ignored)
			
 
				         {
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1335885824L));
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(4557111296L));
			
 
				+            ).build();
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(1335885824L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(4557111296L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -97,20 +96,20 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(1).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 2134900736 bytes = 2036MB
			
 
				             // 2036MB node => 812MB JVM heap (40% of 2036MB rounded down to a multiple of 4MB)
			
 
				             // 2036MB - 812MB - 200MB = 1024MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2134900736L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(2134900736L));
			
 
				             // 7503609856 bytes = 7156MB
			
 
				             // 7156MB node => 2860MB JVM heap (40% of 7156MB rounded down to a multiple of 4MB)
			
 
				             // 7156MB - 2860MB - 200MB = 4096MB which is what we asked for for the tier
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(7503609856L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(7503609856L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -118,22 +117,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(1).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 2134900736 bytes = 2036MB
			
 
				             // 2036MB node => 812MB JVM heap (40% of 2036MB rounded down to a multiple of 4MB)
			
 
				             // 2036MB - 812MB - 200MB = 1024MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2134900736L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(2134900736L));
			
 
				             // 7851737088 bytes = 7488MB
			
 
				             // We expect to be given 2 nodes as there are 2 AZs, so each will be 3744MB
			
 
				             // 3744MB node => 1496MB JVM heap (40% of 3744MB rounded down to a multiple of 4MB)
			
 
				             // 3744MB - 1496MB - 200MB = 2048MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(7851737088L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(7851737088L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -141,16 +140,16 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(1).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 2134900736 bytes = 2036MB
			
 
				             // 2036MB node => 812MB JVM heap (40% of 2036MB rounded down to a multiple of 4MB)
			
 
				             // 2036MB - 812MB - 200MB = 1024MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2134900736L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(2134900736L));
			
 
				             // 8195670018 bytes = 7816MB + 2 bytes
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 2605 1/3MB
			
 
				             // 2605 1/3MB node => 1040MB JVM heap (40% of 2605 1/3MB rounded down to a multiple of 4MB)
			
@@ -158,7 +157,7 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				             // (The 2 byte discrepancy comes from the fact there are 3 nodes and 3 didn't divide exactly into the amount
			
 
				             // of memory we needed, so each node gets a fraction of a byte extra to take it up to a whole number size)
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(8195670018L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(8195670018L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -166,20 +165,20 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(3).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 5712642048 bytes = 5448MB
			
 
				             // 5448MB node => 2176MB JVM heap (40% of 5448MB rounded down to a multiple of 4MB)
			
 
				             // 5448MB - 2176MB - 200MB = 3072MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(5712642048L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(5712642048L));
			
 
				             // 7503609856 bytes = 7156MB
			
 
				             // 7156MB node => 2860MB JVM heap (40% of 7156MB rounded down to a multiple of 4MB)
			
 
				             // 7156MB - 2860MB - 200MB = 4096MB which is what we asked for for the tier
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(7503609856L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(7503609856L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -187,22 +186,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(3).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 5712642048 bytes = 5448MB
			
 
				             // 5448MB node => 2176MB JVM heap (40% of 5448MB rounded down to a multiple of 4MB)
			
 
				             // 5448MB - 2176MB - 200MB = 3072MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(5712642048L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(5712642048L));
			
 
				             // 7851737088 bytes = 7488MB
			
 
				             // We expect to be given 2 nodes as there are 2 AZs, so each will be 3744MB
			
 
				             // 3744MB node => 1496MB JVM heap (40% of 3744MB rounded down to a multiple of 4MB)
			
 
				             // 3744MB - 1496MB - 200MB = 2048MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(7851737088L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(7851737088L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement below JVM size knot point, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -210,16 +209,16 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(4).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(3).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 5712642048 bytes = 5448MB
			
 
				             // 5448MB node => 2176MB JVM heap (40% of 5448MB rounded down to a multiple of 4MB)
			
 
				             // 5448MB - 2176MB - 200MB = 3072MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(5712642048L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(5712642048L));
			
 
				             // 8195670018 bytes = 7816MB + 2 bytes
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 2605 1/3MB
			
 
				             // 2605 1/3MB node => 1040MB JVM heap (40% of 2605 1/3MB rounded down to a multiple of 4MB)
			
@@ -227,7 +226,7 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				             // (The 2 byte discrepancy comes from the fact there are 3 nodes and 3 didn't divide exactly into the amount
			
 
				             // of memory we needed, so each node gets a fraction of a byte extra to take it up to a whole number size)
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(8195670018L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(8195670018L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -235,20 +234,20 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 41750102016 bytes = 39816MB
			
 
				             // 39816MB node => 8896MB JVM heap (40% of 16384MB + 10% of 23432MB rounded down to a multiple of 4MB)
			
 
				             // 39816MB - 8896MB - 200MB = 30720MB which is what we asked for for the tier
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(41750102016L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(41750102016L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -256,22 +255,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 47706013696 bytes = 45496MB
			
 
				             // We expect to be given 2 nodes as there are 2 AZs, so each will be 22748MB
			
 
				             // 22748MB node => 7188MB JVM heap (40% of 16384MB + 10% of 6364MB rounded down to a multiple of 4MB)
			
 
				             // 22748MB - 7188MB - 200MB = 15360MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(47706013696L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(47706013696L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -279,22 +278,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 53666119680 bytes = 51180MB
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 17060MB
			
 
				             // 17060MB node => 6620MB JVM heap (40% of 16384MB + 10% of 676MB rounded down to a multiple of 4MB)
			
 
				             // 17060MB - 6620MB - 200MB = 10240MB which is one third of what we asked for for the tier
			
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(53666119680L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(53666119680L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -302,20 +301,20 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(20).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 29817307136 bytes = 28436MB
			
 
				             // 28436MB node => 7756MB JVM heap (40% of 16384MB + 10% of 12052MB rounded down to a multiple of 4MB)
			
 
				             // 28436MB - 7756MB - 200MB = 20480MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(29817307136L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(29817307136L));
			
 
				             // 41750102016 bytes = 39816MB
			
 
				             // 39816MB node => 8896MB JVM heap (40% of 16384MB + 10% of 23432MB rounded down to a multiple of 4MB)
			
 
				             // 39816MB - 8896MB - 200MB = 30720MB which is what we asked for for the tier
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(41750102016L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(41750102016L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -323,22 +322,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(20).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 29817307136 bytes = 28436MB
			
 
				             // 28436MB node => 7756MB JVM heap (40% of 16384MB + 10% of 12052MB rounded down to a multiple of 4MB)
			
 
				             // 28436MB - 7756MB - 200MB = 20480MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(29817307136L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(29817307136L));
			
 
				             // 47706013696 bytes = 45496MB
			
 
				             // We expect to be given 2 nodes as there are 2 AZs, so each will be 22748MB
			
 
				             // 22748MB node => 7188MB JVM heap (40% of 16384MB + 10% of 6364MB rounded down to a multiple of 4MB)
			
 
				             // 22748MB - 7188MB - 200MB = 15360MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(47706013696L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(47706013696L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above JVM size knot point, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -346,22 +345,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(30).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(20).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 29817307136 bytes = 28436MB
			
 
				             // 28436MB node => 7756MB JVM heap (40% of 16384MB + 10% of 12052MB rounded down to a multiple of 4MB)
			
 
				             // 28436MB - 7756MB - 200MB = 20480MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(29817307136L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(29817307136L));
			
 
				             // 53666119680 bytes = 51180MB
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 17060MB
			
 
				             // 17060MB node => 6620MB JVM heap (40% of 16384MB + 10% of 676MB rounded down to a multiple of 4MB)
			
 
				             // 17060MB - 6620MB - 200MB = 10240MB which is one third of what we asked for for the tier
			
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(53666119680L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(53666119680L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -369,22 +368,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(100).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 131222994944 bytes = 125178MB
			
 
				             // 125144MB requirement => 2 nodes needed, each 62572MB
			
 
				             // 62572MB node => 11172MB JVM heap (40% of 16384MB + 10% of 46188MB rounded down to a multiple of 4MB)
			
 
				             // 62572MB - 11172MB - 200MB = 51200MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(131222994944L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(131222994944L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -392,22 +391,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(100).getBytes() - 2 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 131222994944 bytes = 125178MB
			
 
				             // We expect to be given 2 nodes as there are 2 AZs, so each will be 62572MB
			
 
				             // 62572MB node => 11172MB JVM heap (40% of 16384MB + 10% of 46188MB rounded down to a multiple of 4MB)
			
 
				             // 62572MB - 11172MB - 200MB = 51200MB which is half of what we asked for for the tier
			
 
				             // So with 2 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(131222994944L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(131222994944L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -415,16 +414,16 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(100).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(5).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 9294577664 bytes = 8864MB
			
 
				             // 8864MB node => 3544MB JVM heap (40% of 8864MB rounded down to a multiple of 4MB)
			
 
				             // 8864MB - 3544MB - 200MB = 5120MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(9294577664L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(9294577664L));
			
 
				             // 137170518018 bytes = 130816MB + 2 bytes
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 43605 1/3MB
			
 
				             // 43605 1/3MB node => 9272MB JVM heap (40% of 16384MB + 10% of 27221 1/3MB rounded down to a multiple of 4MB)
			
@@ -432,7 +431,7 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				             // (The 2 byte discrepancy comes from the fact there are 3 nodes and 3 didn't divide exactly into the amount
			
 
				             // of memory we needed, so each node gets a fraction of a byte extra to take it up to a whole number size)
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(137170518018L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(137170518018L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 1 AZ (this is a realistic case for Cloud)
			
 
				         {
			
@@ -440,16 +439,16 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(155).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(50).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 1
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 65611497472 bytes = 62572MB
			
 
				             // 62572MB node => 11172MB JVM heap (40% of 16384MB + 10% of 46188MB rounded down to a multiple of 4MB)
			
 
				             // 62572MB - 11172MB - 200MB = 51200MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(65611497472L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(65611497472L));
			
 
				             // 202794598401 bytes = 193400MB + 1 byte
			
 
				             // 193406MB requirement => 3 nodes needed, each 64466 2/3MB
			
 
				             // 64466 2/3MB node => 11360MB JVM heap (40% of 16384MB + 10% of 48082 2/3MB rounded down to a multiple of 4MB)
			
@@ -457,7 +456,7 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				             // (The 1 byte discrepancy comes from the fact there are 3 nodes and 3 didn't divide exactly into the amount
			
 
				             // of memory we needed, so each node gets a fraction of a byte extra to take it up to a whole number size)
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(202794598401L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(202794598401L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 2 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -465,23 +464,23 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(155).getBytes() - 4 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(50).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 2
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 65611497472 bytes = 62572MB
			
 
				             // 62572MB node => 11172MB JVM heap (40% of 16384MB + 10% of 46188MB rounded down to a multiple of 4MB)
			
 
				             // 62572MB - 11172MB - 200MB = 51200MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(65611497472L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(65611497472L));
			
 
				             // 208758898688 bytes = 199088MB
			
 
				             // We expect to be given a multiple of 2 nodes as there are 2 AZs
			
 
				             // 199088MB requirement => 4 nodes needed, each 49772MB
			
 
				             // 49772MB node => 9892MB JVM heap (40% of 16384MB + 10% of 33388MB rounded down to a multiple of 4MB)
			
 
				             // 49772MB - 9892MB - 200MB = 39680MB which is one quarter of what we asked for for the tier
			
 
				             // So with 4 nodes of this size we'll have the requested amount
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(208758898688L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(208758898688L));
			
 
				         }
			
 
				         // auto is true with unknown jvm size, memory requirement above single node size, 3 AZs (this is a realistic case for Cloud)
			
 
				         {
			
@@ -489,16 +488,16 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(155).getBytes() - 3 * NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(),
			
 
				                 ByteSizeValue.ofGb(50).getBytes() - NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes()
			
 
				             );
			
 
				-            AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(
			
 
				                 randomIntBetween(5, 90),
			
 
				                 true,
			
 
				                 AUTO_ML_MEMORY_FOR_64GB_NODE,
			
 
				                 3
			
 
				-            );
			
 
				+            ).build();
			
 
				             // 65611497472 bytes = 62572MB
			
 
				             // 62572MB node => 11172MB JVM heap (40% of 16384MB + 10% of 46188MB rounded down to a multiple of 4MB)
			
 
				             // 62572MB - 11172MB - 200MB = 51200MB which is what we need on a single node
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(65611497472L));
			
 
				+            assertThat(autoscalingCapacity.nodeSize().getBytes(), equalTo(65611497472L));
			
 
				             // 202794598401 bytes = 193400MB + 1 byte
			
 
				             // We expect to be given 3 nodes as there are 3 AZs, so each will be 64466 2/3MB
			
 
				             // 64466 2/3MB node => 11360MB JVM heap (40% of 16384MB + 10% of 48082 2/3MB rounded down to a multiple of 4MB)
			
@@ -506,25 +505,22 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				             // So with 3 nodes of this size we'll have the requested amount
			
 
				             // (The 1 byte discrepancy comes from the fact there are 3 nodes and 3 didn't divide exactly into the amount
			
 
				             // of memory we needed, so each node gets a fraction of a byte extra to take it up to a whole number size)
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(202794598401L));
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), equalTo(202794598401L));
			
 
				         }
			
 
				     }
			
 
				 
			
 
				     public void testAutoscalingCapacityConsistency() {
			
 
				         final BiConsumer<NativeMemoryCapacity, Integer> consistentAutoAssertions = (nativeMemory, memoryPercentage) -> {
			
 
				-            AutoscalingCapacity autoscalingCapacity = nativeMemory.autoscalingCapacity(25, true, Long.MAX_VALUE, 1);
			
 
				+            MlMemoryAutoscalingCapacity autoscalingCapacity = nativeMemory.autoscalingCapacity(25, true, Long.MAX_VALUE, 1).build();
			
 
				             assertThat(
			
 
				-                autoscalingCapacity.total().memory().getBytes(),
			
 
				+                autoscalingCapacity.tierSize().getBytes(),
			
 
				                 greaterThan(nativeMemory.getTierMlNativeMemoryRequirementExcludingOverhead())
			
 
				             );
			
 
				             assertThat(
			
 
				-                autoscalingCapacity.node().memory().getBytes(),
			
 
				+                autoscalingCapacity.nodeSize().getBytes(),
			
 
				                 greaterThan(nativeMemory.getNodeMlNativeMemoryRequirementExcludingOverhead())
			
 
				             );
			
 
				-            assertThat(
			
 
				-                autoscalingCapacity.total().memory().getBytes(),
			
 
				-                greaterThanOrEqualTo(autoscalingCapacity.node().memory().getBytes())
			
 
				-            );
			
 
				+            assertThat(autoscalingCapacity.tierSize().getBytes(), greaterThanOrEqualTo(autoscalingCapacity.nodeSize().getBytes()));
			
 
				         };
			
 
				 
			
 
				         { // 0 memory
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
@@ -22,10 +22,10 @@ import org.elasticsearch.common.transport.TransportAddress;
 
				 import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				 import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.test.ESTestCase;
			
 
				-import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				 import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
			
 
				 import org.elasticsearch.xpack.core.ml.job.config.Job;
			
 
				 import org.elasticsearch.xpack.ml.MachineLearning;
			
 
				+import org.elasticsearch.xpack.ml.autoscaling.MlMemoryAutoscalingCapacity;
			
 
				 import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity;
			
 
				 
			
 
				 import java.net.InetAddress;
			
@@ -107,15 +107,15 @@ public class NativeMemoryCalculatorTests extends ESTestCase {
 
				 
			
 
				                     NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(bytesForML, bytesForML, jvmSize);
			
 
				 
			
 
				-                    AutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true, Long.MAX_VALUE, 1);
			
 
				+                    MlMemoryAutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true, Long.MAX_VALUE, 1).build();
			
 
				                     // We don't allow node sizes below 1GB, so we will always be at least that large
			
 
				                     // Also, allow 1 byte off for weird rounding issues
			
 
				                     assertThat(
			
 
				-                        capacity.node().memory().getBytes(),
			
 
				+                        capacity.nodeSize().getBytes(),
			
 
				                         greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)
			
 
				                     );
			
 
				                     assertThat(
			
 
				-                        capacity.total().memory().getBytes(),
			
 
				+                        capacity.tierSize().getBytes(),
			
 
				                         greaterThanOrEqualTo(Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L)
			
 
				                     );
			
 
				                 }