|
@@ -81,13 +81,13 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
|
|
|
public void testFailOver() throws Exception {
|
|
|
internalCluster().ensureAtLeastNumDataNodes(3);
|
|
|
- ensureStableClusterOnAllNodes(3);
|
|
|
+ ensureStableCluster();
|
|
|
run("fail-over-job", () -> {
|
|
|
GetJobsStatsAction.Request request = new GetJobsStatsAction.Request("fail-over-job");
|
|
|
GetJobsStatsAction.Response response = client().execute(GetJobsStatsAction.INSTANCE, request).actionGet();
|
|
|
DiscoveryNode discoveryNode = response.getResponse().results().get(0).getNode();
|
|
|
internalCluster().stopRandomNode(settings -> discoveryNode.getName().equals(settings.get("node.name")));
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
});
|
|
|
}
|
|
|
|
|
@@ -97,7 +97,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
internalCluster().startMasterOnlyNode();
|
|
|
logger.info("Starting ml and data node...");
|
|
|
String mlAndDataNode = internalCluster().startNode(onlyRoles(Set.of(DiscoveryNodeRole.DATA_ROLE, MachineLearning.ML_ROLE)));
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
run("lose-dedicated-master-node-job", () -> {
|
|
|
logger.info("Stopping dedicated master node");
|
|
|
Settings masterDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName());
|
|
@@ -112,19 +112,18 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
.put(masterDataPathSettings)
|
|
|
.put(masterOnlyNode())
|
|
|
.build());
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- @AwaitsFix( bugUrl = "https://github.com/elastic/elasticsearch/issues/58532")
|
|
|
public void testFullClusterRestart() throws Exception {
|
|
|
internalCluster().ensureAtLeastNumDataNodes(3);
|
|
|
- ensureStableClusterOnAllNodes(3);
|
|
|
+ ensureStableCluster();
|
|
|
run("full-cluster-restart-job", () -> {
|
|
|
logger.info("Restarting all nodes");
|
|
|
internalCluster().fullRestart();
|
|
|
logger.info("Restarted all nodes");
|
|
|
- ensureStableClusterOnAllNodes(3);
|
|
|
+ ensureStableCluster();
|
|
|
});
|
|
|
}
|
|
|
|
|
@@ -134,7 +133,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
internalCluster().startNode(onlyRoles(Set.of(DiscoveryNodeRole.DATA_ROLE, DiscoveryNodeRole.MASTER_ROLE)));
|
|
|
logger.info("Starting ml and data node...");
|
|
|
internalCluster().startNode(onlyRoles(Set.of(DiscoveryNodeRole.DATA_ROLE, MachineLearning.ML_ROLE)));
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// index some datafeed data
|
|
|
client().admin().indices().prepareCreate("data")
|
|
@@ -211,7 +210,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
}
|
|
|
logger.info("Starting dedicated ml node...");
|
|
|
internalCluster().startNode(onlyRole(MachineLearning.ML_ROLE));
|
|
|
- ensureStableClusterOnAllNodes(4);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// index some datafeed data
|
|
|
client().admin().indices().prepareCreate("data")
|
|
@@ -318,7 +317,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
internalCluster().startMasterOnlyNode();
|
|
|
logger.info("Starting ml and data node...");
|
|
|
internalCluster().startNode(onlyRoles(Set.of(DiscoveryNodeRole.DATA_ROLE, MachineLearning.ML_ROLE)));
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// index some datafeed data
|
|
|
client().admin().indices().prepareCreate("data")
|
|
@@ -364,7 +363,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
public void testJobRelocationIsMemoryAware() throws Exception {
|
|
|
|
|
|
internalCluster().ensureAtLeastNumDataNodes(1);
|
|
|
- ensureStableClusterOnAllNodes(1);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// Open 4 small jobs. Since there is only 1 node in the cluster they'll have to go on that node.
|
|
|
|
|
@@ -377,7 +376,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
// same node because we don't rebalance jobs that are happily running.
|
|
|
|
|
|
internalCluster().ensureAtLeastNumDataNodes(3);
|
|
|
- ensureStableClusterOnAllNodes(3);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// Wait for the cluster to be green - this means the indices have been replicated.
|
|
|
|
|
@@ -390,7 +389,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
// Stop the current master node - this should be the one with the 4 small jobs on.
|
|
|
|
|
|
internalCluster().stopCurrentMasterNode();
|
|
|
- ensureStableClusterOnAllNodes(2);
|
|
|
+ ensureStableCluster();
|
|
|
|
|
|
// If memory requirements are used to reallocate the 4 small jobs (as we expect) then they should
|
|
|
// all reallocate to the same node, that being the one that doesn't have the big job on. If job counts
|
|
@@ -542,7 +541,7 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|
|
}, 30, TimeUnit.SECONDS);
|
|
|
}
|
|
|
|
|
|
- private void ensureStableClusterOnAllNodes(int nodeCount) {
|
|
|
- ensureStableCluster(nodeCount, TimeValue.timeValueSeconds(60));
|
|
|
+ private void ensureStableCluster() {
|
|
|
+ ensureStableCluster(internalCluster().getNodeNames().length, TimeValue.timeValueSeconds(60));
|
|
|
}
|
|
|
}
|