4 years ago · f9991e6149
--- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
+++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java
@@ -35,7 +35,7 @@ import java.util.stream.Collectors;
 
				 
			
 
				 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
			
 
				 import static org.hamcrest.Matchers.containsString;
			
 
				-import static org.hamcrest.Matchers.equalTo;
			
 
				+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
			
 
				 import static org.hamcrest.Matchers.hasKey;
			
 
				 
			
 
				 public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
			
@@ -52,7 +52,7 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
				             Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build());
			
 
				         final PutAutoscalingPolicyAction.Request request = new PutAutoscalingPolicyAction.Request(
			
 
				             "ml_test",
			
 
				-            new TreeSet<>(Arrays.asList("ml")),
			
 
				+            new TreeSet<>(Arrays.asList("master","data","ingest","ml")),
			
 
				             deciders
			
 
				         );
			
 
				         assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet());
			
@@ -151,8 +151,8 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {
 
				 
			
 
				         AutoscalingDeciderResult autoscalingDeciderResult = autoscalingDeciderResults.results().get("ml");
			
 
				         assertThat(autoscalingDeciderResult.reason().summary(), containsString(reason));
			
 
				-        assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), equalTo(tierBytes));
			
 
				-        assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), equalTo(nodeBytes));
			
 
				+        assertThat(autoscalingDeciderResult.requiredCapacity().total().memory().getBytes(), greaterThanOrEqualTo(tierBytes - 1L));
			
 
				+        assertThat(autoscalingDeciderResult.requiredCapacity().node().memory().getBytes(), greaterThanOrEqualTo(nodeBytes - 1L));
			
 
				     }
			
 
				 
			
 
				     private void putJob(String jobId, long limitMb) {
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
@@ -71,6 +71,8 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				     private static final Duration DEFAULT_MEMORY_REFRESH_RATE = Duration.ofMinutes(15);
			
 
				     private static final String MEMORY_STALE = "unable to make scaling decision as job memory requirements are stale";
			
 
				     private static final long NO_SCALE_DOWN_POSSIBLE = -1L;
			
 
				+    // If ensureScaleDown changes the calculation by more than this much, log the error
			
 
				+    private static final long ACCEPTABLE_DIFFERENCE = ByteSizeValue.ofMb(1).getBytes();
			
 
				 
			
 
				     public static final String NAME = "ml";
			
 
				     public static final Setting<Integer> NUM_ANOMALY_JOBS_IN_QUEUE = Setting.intSetting("num_anomaly_jobs_in_queue", 0, 0);
			
@@ -359,6 +361,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				 
			
 
				         final List<DiscoveryNode> nodes = getNodes(clusterState);
			
 
				         final NativeMemoryCapacity currentScale = currentScale(nodes);
			
 
				+
			
 
				         final MlScalingReason.Builder reasonBuilder = MlScalingReason.builder()
			
 
				             .setWaitingAnomalyJobs(waitingAnomalyJobs)
			
 
				             .setWaitingAnalyticsJobs(waitingAnalyticsJobs)
			
@@ -527,9 +530,18 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     .build()));
			
 
				         }
			
 
				 
			
 
				-        final Optional<AutoscalingDeciderResult> scaleDownDecision = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder);
			
 
				+        final Optional<AutoscalingDeciderResult> maybeScaleDown = checkForScaleDown(nodeLoads, largestJob, currentScale, reasonBuilder)
			
 
				+            // Due to weird rounding errors, it may be that a scale down result COULD cause a scale up
			
 
				+            // Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
			
 
				+            // This is safe as we know that ALL jobs are assigned at the current capacity
			
 
				+            .map(result -> new AutoscalingDeciderResult(
			
 
				+                ensureScaleDown(result.requiredCapacity(), context.currentCapacity()), result.reason()
			
 
				+            ));
			
 
				+
			
 
				+        if (maybeScaleDown.isPresent()) {
			
 
				+            final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get();
			
 
				 
			
 
				-        if (scaleDownDecision.isPresent()) {
			
 
				+            context.currentCapacity();
			
 
				             // Given maxOpenJobs, could we scale down to just one node?
			
 
				             // We have no way of saying "we need X nodes"
			
 
				             if (nodeLoads.size() > 1) {
			
@@ -546,14 +558,14 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                         MAX_OPEN_JOBS_PER_NODE.getKey());
			
 
				                     logger.info(() -> new ParameterizedMessage("{} Calculated potential scaled down capacity [{}] ",
			
 
				                         msg,
			
 
				-                        scaleDownDecision.get().requiredCapacity()));
			
 
				+                        scaleDownDecisionResult.requiredCapacity()));
			
 
				                     return new AutoscalingDeciderResult(context.currentCapacity(), reasonBuilder.setSimpleReason(msg).build());
			
 
				                 }
			
 
				             }
			
 
				 
			
 
				             long msLeftToScale = msLeftToDownScale(configuration);
			
 
				             if (msLeftToScale <= 0) {
			
 
				-                return scaleDownDecision.get();
			
 
				+                return scaleDownDecisionResult;
			
 
				             }
			
 
				             TimeValue downScaleDelay = DOWN_SCALE_DELAY.get(configuration);
			
 
				             logger.debug(() -> new ParameterizedMessage(
			
@@ -561,7 +573,7 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     " The last time scale down was detected [{}]. Calculated scaled down capacity [{}] ",
			
 
				                 downScaleDelay.getStringRep(),
			
 
				                 XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(scaleDownDetected),
			
 
				-                scaleDownDecision.get().requiredCapacity()));
			
 
				+                scaleDownDecisionResult.requiredCapacity()));
			
 
				             return new AutoscalingDeciderResult(
			
 
				                 context.currentCapacity(),
			
 
				                 reasonBuilder
			
@@ -586,6 +598,28 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				                     .build()));
			
 
				     }
			
 
				 
			
 
				+    static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) {
			
 
				+        AutoscalingCapacity newCapacity = new AutoscalingCapacity(
			
 
				+            new AutoscalingCapacity.AutoscalingResources(
			
 
				+                currentCapacity.total().storage(),
			
 
				+                ByteSizeValue.ofBytes(Math.min(scaleDownResult.total().memory().getBytes(), currentCapacity.total().memory().getBytes()))
			
 
				+            ),
			
 
				+            new AutoscalingCapacity.AutoscalingResources(
			
 
				+                currentCapacity.node().storage(),
			
 
				+                ByteSizeValue.ofBytes(Math.min(scaleDownResult.node().memory().getBytes(), currentCapacity.node().memory().getBytes()))
			
 
				+            )
			
 
				+        );
			
 
				+        if (scaleDownResult.node().memory().getBytes() - newCapacity.node().memory().getBytes() > ACCEPTABLE_DIFFERENCE
			
 
				+            || scaleDownResult.total().memory().getBytes() - newCapacity.total().memory().getBytes() > ACCEPTABLE_DIFFERENCE) {
			
 
				+            logger.warn(
			
 
				+                "scale down accidentally requested a scale up, auto-corrected; initial scaling [{}], corrected [{}]",
			
 
				+                scaleDownResult,
			
 
				+                newCapacity
			
 
				+            );
			
 
				+        }
			
 
				+        return newCapacity;
			
 
				+    }
			
 
				+
			
 
				     AutoscalingDeciderResult noScaleResultOrRefresh(MlScalingReason.Builder reasonBuilder,
			
 
				                                                     boolean memoryTrackingStale,
			
 
				                                                     AutoscalingDeciderResult potentialResult) {
			
@@ -842,8 +876,11 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
 
				         // Or our largest job could be on a smaller node (meaning the same size tier but smaller nodes are possible).
			
 
				         if (currentlyNecessaryTier < currentCapacity.getTier() || currentlyNecessaryNode < currentCapacity.getNode()) {
			
 
				             NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
			
 
				-                currentlyNecessaryTier,
			
 
				-                currentlyNecessaryNode,
			
 
				+                // Since we are in the `scaleDown` branch, we know jobs are running and we could be smaller
			
 
				+                // If we have some weird rounding errors, it may be that the `currentlyNecessary` values are larger than
			
 
				+                // current capacity. We never want to accidentally say "scale up" via a scale down.
			
 
				+                Math.min(currentlyNecessaryTier, currentCapacity.getTier()),
			
 
				+                Math.min(currentlyNecessaryNode, currentCapacity.getNode()),
			
 
				                 // If our newly suggested native capacity is the same, we can use the previously stored jvm size
			
 
				                 currentlyNecessaryNode == currentCapacity.getNode() ? currentCapacity.getJvmSize() : null);
			
 
				             AutoscalingCapacity requiredCapacity = nativeMemoryCapacity.autoscalingCapacity(maxMachineMemoryPercent, useAuto);
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacity.java
@@ -12,6 +12,9 @@ import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
 
				 import org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator;
			
 
				 
			
 
				 import java.util.Objects;
			
 
				+import java.util.Optional;
			
 
				+
			
 
				+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNativeMemorySize;
			
 
				 
			
 
				 // Used for storing native memory capacity and then transforming it into an autoscaling capacity
			
 
				 // which takes into account the whole node size
			
@@ -49,7 +52,11 @@ public class NativeMemoryCapacity  {
 
				         return this;
			
 
				     }
			
 
				 
			
 
				-    AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {
			
 
				+    public AutoscalingCapacity autoscalingCapacity(int maxMemoryPercent, boolean useAuto) {
			
 
				+        // We calculate the JVM size here first to ensure it stays the same given the rest of the calculations
			
 
				+        final Long jvmSize = useAuto ?
			
 
				+            Optional.ofNullable(this.jvmSize).orElse(dynamicallyCalculateJvmSizeFromNativeMemorySize(node)) :
			
 
				+            null;
			
 
				         // We first need to calculate the actual node size given the current native memory size.
			
 
				         // This way we can accurately determine the required node size AND what the overall memory percentage will be
			
 
				         long actualNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(node, jvmSize, maxMemoryPercent, useAuto);
			
@@ -57,14 +64,14 @@ public class NativeMemoryCapacity  {
 
				         // This simplifies calculating the tier as it means that each node in the tier
			
 
				         // will have the same dynamic memory calculation. And thus the tier is simply the sum of the memory necessary
			
 
				         // times that scaling factor.
			
 
				-        int memoryPercentForMl = (int)Math.floor(NativeMemoryCalculator.modelMemoryPercent(
			
 
				+        double memoryPercentForMl = NativeMemoryCalculator.modelMemoryPercent(
			
 
				             actualNodeSize,
			
 
				             jvmSize,
			
 
				             maxMemoryPercent,
			
 
				             useAuto
			
 
				-        ));
			
 
				+        );
			
 
				         double inverseScale = memoryPercentForMl <= 0 ? 0 : 100.0 / memoryPercentForMl;
			
 
				-        long actualTier = (long)Math.ceil(tier * inverseScale);
			
 
				+        long actualTier = Math.round(tier * inverseScale);
			
 
				         return new AutoscalingCapacity(
			
 
				             // Tier should always be AT LEAST the largest node size.
			
 
				             // This Math.max catches any strange rounding errors or weird input.
			
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculator.java
@@ -23,6 +23,7 @@ import static org.elasticsearch.xpack.ml.MachineLearning.USE_AUTO_MACHINE_MEMORY
 
				 
			
 
				 public final class NativeMemoryCalculator {
			
 
				 
			
 
				+    private static final long STATIC_JVM_UPPER_THRESHOLD = ByteSizeValue.ofGb(2).getBytes();
			
 
				     static final long MINIMUM_AUTOMATIC_NODE_SIZE = ByteSizeValue.ofGb(1).getBytes();
			
 
				     private static final long OS_OVERHEAD = ByteSizeValue.ofMb(200L).getBytes();
			
 
				 
			
@@ -80,15 +81,11 @@ public final class NativeMemoryCalculator {
 
				         if (useAuto) {
			
 
				             // TODO utilize official ergonomic JVM size calculations when available.
			
 
				             jvmSize = jvmSize == null ? dynamicallyCalculateJvmSizeFromNativeMemorySize(nativeMachineMemory) : jvmSize;
			
 
				-            // We use a Math.floor here to ensure we have AT LEAST enough memory given rounding.
			
 
				-            int modelMemoryPercent = (int)Math.floor(modelMemoryPercent(
			
 
				-                nativeMachineMemory + jvmSize + OS_OVERHEAD,
			
 
				-                jvmSize,
			
 
				-                maxMemoryPercent,
			
 
				-                true));
			
 
				-            // We calculate the inverse percentage of `nativeMachineMemory + OS_OVERHEAD` as `OS_OVERHEAD` is always present
			
 
				-            // on the native memory side and we need to account for it when we invert the model memory percentage
			
 
				-            return Math.max((long)Math.ceil((100.0/modelMemoryPercent) * (nativeMachineMemory + OS_OVERHEAD)), MINIMUM_AUTOMATIC_NODE_SIZE);
			
 
				+            // We haven't reached our 90% threshold, so, simply summing up the values is adequate
			
 
				+            if ((jvmSize + OS_OVERHEAD)/(double)nativeMachineMemory > 0.1) {
			
 
				+                return Math.max(nativeMachineMemory + jvmSize + OS_OVERHEAD, MINIMUM_AUTOMATIC_NODE_SIZE);
			
 
				+            }
			
 
				+            return Math.round((nativeMachineMemory/0.9));
			
 
				         }
			
 
				         return (long) ((100.0/maxMemoryPercent) * nativeMachineMemory);
			
 
				     }
			
@@ -118,18 +115,11 @@ public final class NativeMemoryCalculator {
 
				         return maxMemoryPercent;
			
 
				     }
			
 
				 
			
 
				-    public static int modelMemoryPercent(long machineMemory, int maxMemoryPercent, boolean useAuto) {
			
 
				-        return (int)Math.ceil(modelMemoryPercent(machineMemory,
			
 
				-            null,
			
 
				-            maxMemoryPercent,
			
 
				-            useAuto));
			
 
				-    }
			
 
				-
			
 
				-    private static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) {
			
 
				+    static long allowedBytesForMl(long machineMemory, Long jvmSize, int maxMemoryPercent, boolean useAuto) {
			
 
				         if (useAuto && jvmSize != null) {
			
 
				             // It is conceivable that there is a machine smaller than 200MB.
			
 
				             // If the administrator wants to use the auto configuration, the node should be larger.
			
 
				-            if (machineMemory - jvmSize < OS_OVERHEAD || machineMemory == 0) {
			
 
				+            if (machineMemory - jvmSize <= OS_OVERHEAD || machineMemory == 0) {
			
 
				                 return machineMemory / 100;
			
 
				             }
			
 
				             // This calculation is dynamic and designed to maximally take advantage of the underlying machine for machine learning
			
@@ -139,8 +129,8 @@ public final class NativeMemoryCalculator {
 
				             // 2GB node -> 66%
			
 
				             // 16GB node -> 87%
			
 
				             // 64GB node -> 90%
			
 
				-            long memoryPercent = Math.min(90, (int)Math.ceil(((machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory) * 100.0D));
			
 
				-            return (long)(machineMemory * (memoryPercent / 100.0));
			
 
				+            double memoryProportion = Math.min(0.90, (machineMemory - jvmSize - OS_OVERHEAD) / (double)machineMemory);
			
 
				+            return Math.round(machineMemory * memoryProportion);
			
 
				         }
			
 
				 
			
 
				         return (long)(machineMemory * (maxMemoryPercent / 100.0));
			
@@ -154,17 +144,20 @@ public final class NativeMemoryCalculator {
 
				     }
			
 
				 
			
 
				     // TODO replace with official ergonomic calculation
			
 
				-    private static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) {
			
 
				-        if (nodeSize < ByteSizeValue.ofGb(2).getBytes()) {
			
 
				-            return (long) (nodeSize * 0.40);
			
 
				+    public static long dynamicallyCalculateJvmSizeFromNodeSize(long nodeSize) {
			
 
				+        // While the original idea here was to predicate on 2Gb, it has been found that the knot points of
			
 
				+        // 2GB and 8GB cause weird issues where the JVM size will "jump the gap" from one to the other when
			
 
				+        // considering true tier sizes in elastic cloud.
			
 
				+        if (nodeSize < ByteSizeValue.ofMb(1280).getBytes()) {
			
 
				+            return (long)(nodeSize * 0.40);
			
 
				         }
			
 
				         if (nodeSize < ByteSizeValue.ofGb(8).getBytes()) {
			
 
				-            return (long) (nodeSize * 0.25);
			
 
				+            return (long)(nodeSize * 0.25);
			
 
				         }
			
 
				-        return ByteSizeValue.ofGb(2).getBytes();
			
 
				+        return STATIC_JVM_UPPER_THRESHOLD;
			
 
				     }
			
 
				 
			
 
				-    private static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) {
			
 
				+    public static long dynamicallyCalculateJvmSizeFromNativeMemorySize(long nativeMachineMemory) {
			
 
				         // See dynamicallyCalculateJvm the following JVM calculations are arithmetic inverses of JVM calculation
			
 
				         //
			
 
				         // Example: For < 2GB node, the JVM is 0.4 * total_node_size. This means, the rest is 0.6 the node size.
			
@@ -172,12 +165,12 @@ public final class NativeMemoryCalculator {
 
				         // Consequently jvmSize = (nativeAndOverHead / 0.6)*0.4 = nativeAndOverHead * 2/3
			
 
				         long nativeAndOverhead = nativeMachineMemory + OS_OVERHEAD;
			
 
				         if (nativeAndOverhead < (ByteSizeValue.ofGb(2).getBytes() * 0.60)) {
			
 
				-            return (long) Math.ceil(nativeAndOverhead * (2.0 / 3.0));
			
 
				+            return Math.round((nativeAndOverhead / 0.6) * 0.4);
			
 
				         }
			
 
				         if (nativeAndOverhead < (ByteSizeValue.ofGb(8).getBytes() * 0.75)) {
			
 
				-            return (long) Math.ceil(nativeAndOverhead / 3.0);
			
 
				+            return Math.round((nativeAndOverhead / 0.75) * 0.25);
			
 
				         }
			
 
				-        return ByteSizeValue.ofGb(2).getBytes();
			
 
				+        return STATIC_JVM_UPPER_THRESHOLD;
			
 
				     }
			
 
				 
			
 
				 }
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -152,8 +152,19 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
 
				             assertFalse(decision.isEmpty());
			
 
				-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
			
 
				-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(9382475687L));
			
 
				+            AutoscalingDeciderResult result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().node().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().total().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 3 + OVERHEAD));
			
 
				         }
			
 
				         { // we allow one job in the analytics queue
			
 
				             Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(0, 1,
			
@@ -164,8 +175,19 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
 
				             assertFalse(decision.isEmpty());
			
 
				-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
			
 
				-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(6270180545L));
			
 
				+            AutoscalingDeciderResult result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().node().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().total().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() * 2 + OVERHEAD));
			
 
				         }
			
 
				         { // we allow one job in the anomaly queue and analytics queue
			
 
				             Optional<AutoscalingDeciderResult> decision = service.checkForScaleUp(1, 1,
			
@@ -176,8 +198,19 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				                 NativeMemoryCapacity.ZERO,
			
 
				                 reasonBuilder);
			
 
				             assertFalse(decision.isEmpty());
			
 
				-            assertThat(decision.get().requiredCapacity().node().memory().getBytes(), equalTo(3512729601L));
			
 
				-            assertThat(decision.get().requiredCapacity().total().memory().getBytes(), equalTo(3512729601L));
			
 
				+            AutoscalingDeciderResult result = decision.get();
			
 
				+            long allowedBytesForMlNode = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().node().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            long allowedBytesForMlTier = NativeMemoryCalculator.allowedBytesForMl(
			
 
				+                result.requiredCapacity().total().memory().getBytes(),
			
 
				+                30,
			
 
				+                true
			
 
				+            );
			
 
				+            assertThat(allowedBytesForMlNode, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				+            assertThat(allowedBytesForMlTier, greaterThanOrEqualTo(ByteSizeValue.ofGb(2).getBytes() + OVERHEAD));
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -361,7 +394,7 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				         {// Current capacity allows for smaller tier
			
 
				             Optional<AutoscalingDeciderResult> result = service.checkForScaleDown(nodeLoads,
			
 
				                 ByteSizeValue.ofMb(100).getBytes(),
			
 
				-                new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofMb(100).getBytes()),
			
 
				+                new NativeMemoryCapacity(ByteSizeValue.ofGb(4).getBytes(), ByteSizeValue.ofGb(1).getBytes()),
			
 
				                 reasonBuilder);
			
 
				             assertThat(result.isEmpty(), is(false));
			
 
				             AutoscalingDeciderResult autoscalingDeciderResult = result.get();
			
@@ -379,6 +412,56 @@ public class MlAutoscalingDeciderServiceTests extends ESTestCase {
 
				         }
			
 
				     }
			
 
				 
			
 
				+    public void testEnsureScaleDown() {
			
 
				+        assertThat(
			
 
				+            MlAutoscalingDeciderService.ensureScaleDown(
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1))
			
 
				+                ),
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
			
 
				+                )
			
 
				+            ), equalTo(new AutoscalingCapacity(
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(1))
			
 
				+            ))
			
 
				+        );
			
 
				+
			
 
				+        assertThat(
			
 
				+            MlAutoscalingDeciderService.ensureScaleDown(
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(8)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3))
			
 
				+                ),
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
			
 
				+                )
			
 
				+            ), equalTo(new AutoscalingCapacity(
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
			
 
				+            ))
			
 
				+        );
			
 
				+
			
 
				+        assertThat(
			
 
				+            MlAutoscalingDeciderService.ensureScaleDown(
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3))
			
 
				+                ),
			
 
				+                new AutoscalingCapacity(
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)),
			
 
				+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
			
 
				+                )
			
 
				+            ), equalTo(new AutoscalingCapacity(
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(3)),
			
 
				+                new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(2))
			
 
				+            ))
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				     public void testFutureAvailableCapacity() {
			
 
				         nodeLoadDetector = new NodeLoadDetector(mlMemoryTracker);
			
 
				         MlAutoscalingDeciderService service = buildService();
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/NativeMemoryCapacityTests.java
@@ -59,8 +59,8 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				         }
			
 
				         { // auto is true
			
 
				             AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true);
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1604321280L));
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5174659393L));
			
 
				+            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(1335885824L));
			
 
				+            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(5343543296L));
			
 
				         }
			
 
				         { // auto is true with unknown jvm size
			
 
				             capacity = new NativeMemoryCapacity(
			
@@ -68,8 +68,8 @@ public class NativeMemoryCapacityTests extends ESTestCase {
 
				                 ByteSizeValue.ofGb(1).getBytes()
			
 
				             );
			
 
				             AutoscalingCapacity autoscalingCapacity = capacity.autoscalingCapacity(25, true);
			
 
				-            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2566914048L));
			
 
				-            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(6507526207L));
			
 
				+            assertThat(autoscalingCapacity.node().memory().getBytes(), equalTo(2139095040L));
			
 
				+            assertThat(autoscalingCapacity.total().memory().getBytes(), equalTo(8556380160L));
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NativeMemoryCalculatorTests.java
@@ -15,9 +15,14 @@ import org.elasticsearch.common.settings.ClusterSettings;
 
				 import org.elasticsearch.common.settings.Settings;
			
 
				 import org.elasticsearch.common.unit.ByteSizeValue;
			
 
				 import org.elasticsearch.common.util.set.Sets;
			
 
				+import org.elasticsearch.core.Tuple;
			
 
				 import org.elasticsearch.test.ESTestCase;
			
 
				+import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCapacity;
			
 
				+import org.elasticsearch.xpack.ml.autoscaling.NativeMemoryCapacity;
			
 
				 
			
 
				+import java.util.Arrays;
			
 
				 import java.util.HashMap;
			
 
				+import java.util.List;
			
 
				 import java.util.Map;
			
 
				 import java.util.OptionalLong;
			
 
				 import java.util.function.BiConsumer;
			
@@ -27,6 +32,7 @@ import static org.elasticsearch.xpack.ml.MachineLearning.MAX_JVM_SIZE_NODE_ATTR;
 
				 import static org.elasticsearch.xpack.ml.MachineLearning.MAX_MACHINE_MEMORY_PERCENT;
			
 
				 import static org.elasticsearch.xpack.ml.MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT;
			
 
				 import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.MINIMUM_AUTOMATIC_NODE_SIZE;
			
 
				+import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.dynamicallyCalculateJvmSizeFromNodeSize;
			
 
				 import static org.hamcrest.Matchers.equalTo;
			
 
				 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
			
 
				 
			
@@ -49,6 +55,54 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				         }
			
 
				     }
			
 
				 
			
 
				+    public void testConsistencyInAutoCalculation() {
			
 
				+        for (Tuple<Long, Long> nodeAndJvmSize : Arrays.asList(
			
 
				+            Tuple.tuple(1073741824L, 432013312L), // 1GB and true JVM size
			
 
				+            Tuple.tuple(2147483648L, 536870912L), // 2GB ...
			
 
				+            Tuple.tuple(4294967296L, 1073741824L), // 4GB ...
			
 
				+            Tuple.tuple(8589934592L, 2147483648L), // 8GB ...
			
 
				+            Tuple.tuple(17179869184L, 2147483648L), // 16GB ...
			
 
				+            Tuple.tuple(34359738368L, 2147483648L), // 32GB ...
			
 
				+            Tuple.tuple(68719476736L, 2147483648L), // 64GB ...
			
 
				+            Tuple.tuple(16106127360L, 2147483648L), // 15GB ...
			
 
				+            Tuple.tuple(32212254720L, 2147483648L), // 30GB ...
			
 
				+            Tuple.tuple(64424509440L, 2147483648L) // 60GB ...
			
 
				+        )) {
			
 
				+            final long trueJvmSize = nodeAndJvmSize.v2();
			
 
				+            final long trueNodeSize = nodeAndJvmSize.v1();
			
 
				+            List<Long> nodeSizes = Arrays.asList(
			
 
				+                trueNodeSize + ByteSizeValue.ofMb(10).getBytes(),
			
 
				+                trueNodeSize - ByteSizeValue.ofMb(10).getBytes(),
			
 
				+                trueNodeSize
			
 
				+            );
			
 
				+            for (long nodeSize : nodeSizes) {
			
 
				+                // Simulate having a true size that already exists from the node vs. us dynamically calculating it
			
 
				+                long jvmSize = randomBoolean() ? dynamicallyCalculateJvmSizeFromNodeSize(nodeSize) : trueJvmSize;
			
 
				+                DiscoveryNode node = newNode(jvmSize, nodeSize);
			
 
				+                Settings settings = newSettings(30, true);
			
 
				+                ClusterSettings clusterSettings = newClusterSettings(30, true);
			
 
				+
			
 
				+                long bytesForML = randomBoolean() ?
			
 
				+                    NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong() :
			
 
				+                    NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong();
			
 
				+
			
 
				+                NativeMemoryCapacity nativeMemoryCapacity = new NativeMemoryCapacity(
			
 
				+                    bytesForML,
			
 
				+                    bytesForML,
			
 
				+                    jvmSize
			
 
				+                );
			
 
				+
			
 
				+                AutoscalingCapacity capacity = nativeMemoryCapacity.autoscalingCapacity(30, true);
			
 
				+                // We don't allow node sizes below 1GB, so we will always be at least that large
			
 
				+                // Also, allow 1 byte off for weird rounding issues
			
 
				+                assertThat(capacity.node().memory().getBytes(), greaterThanOrEqualTo(
			
 
				+                    Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L));
			
 
				+                assertThat(capacity.total().memory().getBytes(), greaterThanOrEqualTo(
			
 
				+                    Math.max(nodeSize, ByteSizeValue.ofGb(1).getBytes()) - 1L));
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     public void testAllowedBytesForMlWhenAutoIsTrue() {
			
 
				         for (int i = 0; i < NUM_TEST_RUNS; i++) {
			
 
				             long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes());
			
@@ -58,10 +112,10 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				             Settings settings = newSettings(percent, true);
			
 
				             ClusterSettings clusterSettings = newClusterSettings(percent, true);
			
 
				 
			
 
				-            int truePercent = Math.min(
			
 
				+            double truePercent = Math.min(
			
 
				                 90,
			
 
				-                (int)Math.ceil(((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D));
			
 
				-            long expected = (long)(nodeSize * (truePercent / 100.0));
			
 
				+                ((nodeSize - jvmSize - ByteSizeValue.ofMb(200).getBytes()) / (double)nodeSize) * 100.0D);
			
 
				+            long expected = Math.round(nodeSize * (truePercent / 100.0));
			
 
				 
			
 
				             assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected));
			
 
				             assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected));
			
@@ -69,20 +123,6 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				         }
			
 
				     }
			
 
				 
			
 
				-    public void testAllowedBytesForMlWhenAutoIsTrueButJVMSizeIsUnknown() {
			
 
				-        long nodeSize = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(64).getBytes());
			
 
				-        int percent = randomIntBetween(5, 200);
			
 
				-        DiscoveryNode node = newNode(null, nodeSize);
			
 
				-        Settings settings = newSettings(percent, true);
			
 
				-        ClusterSettings clusterSettings = newClusterSettings(percent, true);
			
 
				-
			
 
				-        long expected = (long)(nodeSize * (percent / 100.0));
			
 
				-
			
 
				-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, settings).getAsLong(), equalTo(expected));
			
 
				-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings).getAsLong(), equalTo(expected));
			
 
				-        assertThat(NativeMemoryCalculator.allowedBytesForMl(node, percent, false).getAsLong(), equalTo(expected));
			
 
				-    }
			
 
				-
			
 
				     public void testAllowedBytesForMlWhenBothJVMAndNodeSizeAreUnknown() {
			
 
				         int percent = randomIntBetween(5, 200);
			
 
				         DiscoveryNode node = newNode(null, null);
			
@@ -110,7 +150,6 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				     }
			
 
				 
			
 
				     public void testActualNodeSizeCalculationConsistency() {
			
 
				-
			
 
				         final TriConsumer<Long, Integer, Long> consistentAutoAssertions = (nativeMemory, memoryPercentage, delta) -> {
			
 
				             long autoNodeSize = NativeMemoryCalculator.calculateApproxNecessaryNodeSize(nativeMemory, null, memoryPercentage, true);
			
 
				             // It should always be greater than the minimum supported node size
			
@@ -119,12 +158,13 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				                 greaterThanOrEqualTo(MINIMUM_AUTOMATIC_NODE_SIZE));
			
 
				             // Our approximate real node size should always return a usable native memory size that is at least the original native memory
			
 
				             // size. Rounding errors may cause it to be non-exact.
			
 
				+            long allowedBytesForMl = NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true);
			
 
				             assertThat("native memory ["
			
 
				-                    + NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true)
			
 
				+                    + allowedBytesForMl
			
 
				                     + "] smaller than original native memory ["
			
 
				                     + nativeMemory
			
 
				                     + "]",
			
 
				-                NativeMemoryCalculator.allowedBytesForMl(autoNodeSize, memoryPercentage, true),
			
 
				+                allowedBytesForMl,
			
 
				                 greaterThanOrEqualTo(nativeMemory - delta));
			
 
				         };
			
 
				 
			
@@ -155,18 +195,18 @@ public class NativeMemoryCalculatorTests extends ESTestCase{
 
				             int memoryPercentage = randomIntBetween(5, 200);
			
 
				             { // tiny memory
			
 
				                 long nodeMemory = randomLongBetween(ByteSizeValue.ofKb(100).getBytes(), ByteSizeValue.ofMb(500).getBytes());
			
 
				-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L);
			
 
				+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
			
 
				                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
			
 
				             }
			
 
				             { // normal-ish memory
			
 
				                 long nodeMemory = randomLongBetween(ByteSizeValue.ofMb(500).getBytes(), ByteSizeValue.ofGb(4).getBytes());
			
 
				                 // periodically, the calculated assertions end up being about 6% off, allowing this small delta to account for flakiness
			
 
				-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, (long) (0.06 * nodeMemory));
			
 
				+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
			
 
				                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
			
 
				             }
			
 
				             { // huge memory
			
 
				                 long nodeMemory = randomLongBetween(ByteSizeValue.ofGb(30).getBytes(), ByteSizeValue.ofGb(60).getBytes());
			
 
				-                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 0L);
			
 
				+                consistentAutoAssertions.apply(nodeMemory, memoryPercentage, 1L);
			
 
				                 consistentManualAssertions.accept(nodeMemory, memoryPercentage);
			
 
				             }
			
 
				         }