|
@@ -36,8 +36,8 @@ import org.elasticsearch.xpack.ml.notifications.InferenceAuditor;
|
|
|
import org.junit.After;
|
|
|
import org.junit.Before;
|
|
|
|
|
|
-import java.io.IOException;
|
|
|
import java.time.Instant;
|
|
|
+import java.time.temporal.ChronoUnit;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.Set;
|
|
@@ -114,7 +114,12 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
return clusterState;
|
|
|
}
|
|
|
|
|
|
- private GetDeploymentStatsAction.Response getDeploymentStatsResponse(int numAllocations, int inferenceCount, double latency) {
|
|
|
+ private GetDeploymentStatsAction.Response getDeploymentStatsResponse(
|
|
|
+ int numAllocations,
|
|
|
+ int inferenceCount,
|
|
|
+ double latency,
|
|
|
+ boolean recentStartup
|
|
|
+ ) {
|
|
|
return new GetDeploymentStatsAction.Response(
|
|
|
List.of(),
|
|
|
List.of(),
|
|
@@ -127,7 +132,7 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
new AdaptiveAllocationsSettings(true, null, null),
|
|
|
1024,
|
|
|
ByteSizeValue.ZERO,
|
|
|
- Instant.now(),
|
|
|
+ Instant.now().minus(1, ChronoUnit.DAYS),
|
|
|
List.of(
|
|
|
AssignmentStats.NodeStats.forStartedState(
|
|
|
randomBoolean() ? DiscoveryNodeUtils.create("node_1") : null,
|
|
@@ -140,7 +145,7 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
0,
|
|
|
0,
|
|
|
Instant.now(),
|
|
|
- Instant.now(),
|
|
|
+ recentStartup ? Instant.now() : Instant.now().minus(1, ChronoUnit.HOURS),
|
|
|
1,
|
|
|
numAllocations,
|
|
|
inferenceCount,
|
|
@@ -156,7 +161,7 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- public void test() throws IOException {
|
|
|
+ public void test_scaleUp() {
|
|
|
// Initialize the cluster with a deployment with 1 allocation.
|
|
|
ClusterState clusterState = getClusterState(1);
|
|
|
when(clusterService.state()).thenReturn(clusterState);
|
|
@@ -168,7 +173,9 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
inferenceAuditor,
|
|
|
meterRegistry,
|
|
|
true,
|
|
|
- 1
|
|
|
+ 1,
|
|
|
+ 60,
|
|
|
+ 60_000
|
|
|
);
|
|
|
service.start();
|
|
|
|
|
@@ -182,7 +189,7 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
doAnswer(invocationOnMock -> {
|
|
|
@SuppressWarnings("unchecked")
|
|
|
var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
- listener.onResponse(getDeploymentStatsResponse(1, 1, 11.0));
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 1, 11.0, false));
|
|
|
return Void.TYPE;
|
|
|
}).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
|
|
@@ -198,7 +205,7 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
doAnswer(invocationOnMock -> {
|
|
|
@SuppressWarnings("unchecked")
|
|
|
var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
- listener.onResponse(getDeploymentStatsResponse(1, 150, 10.0));
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 150, 10.0, false));
|
|
|
return Void.TYPE;
|
|
|
}).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
doAnswer(invocationOnMock -> {
|
|
@@ -229,7 +236,137 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
doAnswer(invocationOnMock -> {
|
|
|
@SuppressWarnings("unchecked")
|
|
|
var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
- listener.onResponse(getDeploymentStatsResponse(2, 0, 9.0));
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(2, 0, 9.0, false));
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<CreateTrainedModelAssignmentAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(null);
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), any(), any());
|
|
|
+
|
|
|
+ safeSleep(1000);
|
|
|
+
|
|
|
+ verify(client, times(1)).threadPool();
|
|
|
+ verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+
|
|
|
+ service.stop();
|
|
|
+ }
|
|
|
+
|
|
|
+ public void test_scaleDownToZero_whenNoRequests() {
|
|
|
+ // Initialize the cluster with a deployment with 1 allocation.
|
|
|
+ ClusterState clusterState = getClusterState(1);
|
|
|
+ when(clusterService.state()).thenReturn(clusterState);
|
|
|
+
|
|
|
+ AdaptiveAllocationsScalerService service = new AdaptiveAllocationsScalerService(
|
|
|
+ threadPool,
|
|
|
+ clusterService,
|
|
|
+ client,
|
|
|
+ inferenceAuditor,
|
|
|
+ meterRegistry,
|
|
|
+ true,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 2_000
|
|
|
+ );
|
|
|
+ service.start();
|
|
|
+
|
|
|
+ verify(clusterService).state();
|
|
|
+ verify(clusterService).addListener(same(service));
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+ reset(client, clusterService);
|
|
|
+
|
|
|
+ // First cycle: 1 inference request, so no need for scaling.
|
|
|
+ when(client.threadPool()).thenReturn(threadPool);
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 1, 11.0, false));
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
+
|
|
|
+ safeSleep(1200);
|
|
|
+
|
|
|
+ verify(client, times(1)).threadPool();
|
|
|
+ verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+ reset(client, clusterService);
|
|
|
+
|
|
|
+ // Second cycle: 0 inference requests for 1 second, so scale down to 0 allocations.
|
|
|
+ when(client.threadPool()).thenReturn(threadPool);
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 0, 10.0, false));
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<CreateTrainedModelAssignmentAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(null);
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), any(), any());
|
|
|
+
|
|
|
+ safeSleep(1000);
|
|
|
+
|
|
|
+ verify(client, times(2)).threadPool();
|
|
|
+ verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
+ var updateRequest = new UpdateTrainedModelDeploymentAction.Request("test-deployment");
|
|
|
+ updateRequest.setNumberOfAllocations(0);
|
|
|
+ updateRequest.setIsInternal(true);
|
|
|
+ verify(client, times(1)).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), eq(updateRequest), any());
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+
|
|
|
+ service.stop();
|
|
|
+ }
|
|
|
+
|
|
|
+ public void test_noScaleDownToZero_whenRecentlyScaledUpByOtherNode() {
|
|
|
+ // Initialize the cluster with a deployment with 1 allocation.
|
|
|
+ ClusterState clusterState = getClusterState(1);
|
|
|
+ when(clusterService.state()).thenReturn(clusterState);
|
|
|
+
|
|
|
+ AdaptiveAllocationsScalerService service = new AdaptiveAllocationsScalerService(
|
|
|
+ threadPool,
|
|
|
+ clusterService,
|
|
|
+ client,
|
|
|
+ inferenceAuditor,
|
|
|
+ meterRegistry,
|
|
|
+ true,
|
|
|
+ 1,
|
|
|
+ 1,
|
|
|
+ 2_000
|
|
|
+ );
|
|
|
+ service.start();
|
|
|
+
|
|
|
+ verify(clusterService).state();
|
|
|
+ verify(clusterService).addListener(same(service));
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+ reset(client, clusterService);
|
|
|
+
|
|
|
+ // First cycle: 1 inference request, so no need for scaling.
|
|
|
+ when(client.threadPool()).thenReturn(threadPool);
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 1, 11.0, true));
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
+
|
|
|
+ safeSleep(1200);
|
|
|
+
|
|
|
+ verify(client, times(1)).threadPool();
|
|
|
+ verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
+ reset(client, clusterService);
|
|
|
+
|
|
|
+ // Second cycle: 0 inference requests for 1 second, but a recent scale up by another node.
|
|
|
+ when(client.threadPool()).thenReturn(threadPool);
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 0, 10.0, true));
|
|
|
return Void.TYPE;
|
|
|
}).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
doAnswer(invocationOnMock -> {
|
|
@@ -244,6 +381,32 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
verify(client, times(1)).threadPool();
|
|
|
verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
verifyNoMoreInteractions(client, clusterService);
|
|
|
+ reset(client, clusterService);
|
|
|
+
|
|
|
+ // Third cycle: 0 inference requests for 1 second and no recent scale up, so scale down to 0 allocations.
|
|
|
+ when(client.threadPool()).thenReturn(threadPool);
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<GetDeploymentStatsAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(getDeploymentStatsResponse(1, 0, 10.0, false));
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(GetDeploymentStatsAction.INSTANCE), eq(new GetDeploymentStatsAction.Request("test-deployment")), any());
|
|
|
+ doAnswer(invocationOnMock -> {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ var listener = (ActionListener<CreateTrainedModelAssignmentAction.Response>) invocationOnMock.getArguments()[2];
|
|
|
+ listener.onResponse(null);
|
|
|
+ return Void.TYPE;
|
|
|
+ }).when(client).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), any(), any());
|
|
|
+
|
|
|
+ safeSleep(1000);
|
|
|
+
|
|
|
+ verify(client, times(2)).threadPool();
|
|
|
+ verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
|
|
|
+ var updateRequest = new UpdateTrainedModelDeploymentAction.Request("test-deployment");
|
|
|
+ updateRequest.setNumberOfAllocations(0);
|
|
|
+ updateRequest.setIsInternal(true);
|
|
|
+ verify(client, times(1)).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), eq(updateRequest), any());
|
|
|
+ verifyNoMoreInteractions(client, clusterService);
|
|
|
|
|
|
service.stop();
|
|
|
}
|
|
@@ -256,7 +419,9 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
inferenceAuditor,
|
|
|
meterRegistry,
|
|
|
true,
|
|
|
- 1
|
|
|
+ 1,
|
|
|
+ 60,
|
|
|
+ 60_000
|
|
|
);
|
|
|
|
|
|
when(client.threadPool()).thenReturn(threadPool);
|
|
@@ -289,7 +454,9 @@ public class AdaptiveAllocationsScalerServiceTests extends ESTestCase {
|
|
|
inferenceAuditor,
|
|
|
meterRegistry,
|
|
|
true,
|
|
|
- 1
|
|
|
+ 1,
|
|
|
+ 60,
|
|
|
+ 60_000
|
|
|
);
|
|
|
|
|
|
var latch = new CountDownLatch(1);
|