|
@@ -48,6 +48,7 @@ import java.util.stream.Collectors;
|
|
|
|
|
|
import static org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.EXTREME_DELAY_VARIABILITY;
|
|
|
import static org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService.ClusterFormationStateOrException;
|
|
|
+import static org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService.CoordinationDiagnosticsStatus;
|
|
|
import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
|
|
|
import static org.hamcrest.Matchers.containsString;
|
|
|
import static org.hamcrest.Matchers.emptyOrNullString;
|
|
@@ -496,6 +497,84 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ public void testRedForNoMasterQueryingNonMaster() {
|
|
|
+ /*
|
|
|
+ * This test simulates a cluster with 3 master-eligible nodes and two data nodes. It disconnects all master-eligible nodes
|
|
|
+ * except one random one, and then asserts that we get the expected response from calling diagnoseMasterStability() on each of
|
|
|
+ * the data nodes. It then sets various values for
|
|
|
+ * remoteCoordinationDiagnosisResult on each of the non-master-eligible nodes (simulating different
|
|
|
+ * responses from a master-eligible node that it has polled), and then asserts that the correct result comes back from
|
|
|
+ * diagnoseMasterStability().
|
|
|
+ */
|
|
|
+ try (Cluster cluster = new Cluster(3, true, Settings.EMPTY)) {
|
|
|
+ createAndAddNonMasterNode(cluster);
|
|
|
+ createAndAddNonMasterNode(cluster);
|
|
|
+ cluster.runRandomly(false, true, EXTREME_DELAY_VARIABILITY);
|
|
|
+ cluster.stabilise();
|
|
|
+ DiscoveryNode nonKilledMasterNode = cluster.getAnyLeader().getLocalNode();
|
|
|
+ for (Cluster.ClusterNode node : cluster.clusterNodes) {
|
|
|
+ if (node.getLocalNode().isMasterNode() && node.getLocalNode().equals(nonKilledMasterNode) == false) {
|
|
|
+ node.disconnect();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
+ for (Cluster.ClusterNode node : cluster.clusterNodes.stream()
|
|
|
+ .filter(node -> node.getLocalNode().isMasterNode() == false)
|
|
|
+ .toList()) {
|
|
|
+ CoordinationDiagnosticsService.CoordinationDiagnosticsResult healthIndicatorResult = node.coordinationDiagnosticsService
|
|
|
+ .diagnoseMasterStability(true);
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsStatus.RED));
|
|
|
+ String summary = healthIndicatorResult.summary();
|
|
|
+ assertThat(
|
|
|
+ summary,
|
|
|
+ containsString("No master node observed in the last 30s, and the master eligible nodes are unable to form a quorum")
|
|
|
+ );
|
|
|
+ CoordinationDiagnosticsStatus artificialRemoteStatus = randomValueOtherThan(
|
|
|
+ CoordinationDiagnosticsStatus.GREEN,
|
|
|
+ () -> randomFrom(CoordinationDiagnosticsStatus.values())
|
|
|
+ );
|
|
|
+ String artificialRemoteStatusSummary = "Artificial failure";
|
|
|
+ CoordinationDiagnosticsService.CoordinationDiagnosticsResult artificialRemoteResult =
|
|
|
+ new CoordinationDiagnosticsService.CoordinationDiagnosticsResult(
|
|
|
+ artificialRemoteStatus,
|
|
|
+ artificialRemoteStatusSummary,
|
|
|
+ null
|
|
|
+ );
|
|
|
+ node.coordinationDiagnosticsService.remoteCoordinationDiagnosisResult = new AtomicReference<>(
|
|
|
+ new CoordinationDiagnosticsService.RemoteMasterHealthResult(nonKilledMasterNode, artificialRemoteResult, null)
|
|
|
+ );
|
|
|
+ healthIndicatorResult = node.coordinationDiagnosticsService.diagnoseMasterStability(true);
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(artificialRemoteStatus));
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString(artificialRemoteStatusSummary));
|
|
|
+
|
|
|
+ artificialRemoteResult = new CoordinationDiagnosticsService.CoordinationDiagnosticsResult(
|
|
|
+ CoordinationDiagnosticsStatus.GREEN,
|
|
|
+ artificialRemoteStatusSummary,
|
|
|
+ null
|
|
|
+ );
|
|
|
+ node.coordinationDiagnosticsService.remoteCoordinationDiagnosisResult = new AtomicReference<>(
|
|
|
+ new CoordinationDiagnosticsService.RemoteMasterHealthResult(nonKilledMasterNode, artificialRemoteResult, null)
|
|
|
+ );
|
|
|
+ healthIndicatorResult = node.coordinationDiagnosticsService.diagnoseMasterStability(true);
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("reports that the status is GREEN"));
|
|
|
+
|
|
|
+ Exception artificialRemoteResultException = new RuntimeException(artificialRemoteStatusSummary);
|
|
|
+ node.coordinationDiagnosticsService.remoteCoordinationDiagnosisResult = new AtomicReference<>(
|
|
|
+ new CoordinationDiagnosticsService.RemoteMasterHealthResult(nonKilledMasterNode, null, artificialRemoteResultException)
|
|
|
+ );
|
|
|
+ healthIndicatorResult = node.coordinationDiagnosticsService.diagnoseMasterStability(true);
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("received an exception"));
|
|
|
+ }
|
|
|
+
|
|
|
+ while (cluster.clusterNodes.stream().anyMatch(Cluster.ClusterNode::deliverBlackholedRequests)) {
|
|
|
+ logger.debug("--> stabilising again after delivering blackholed requests");
|
|
|
+ cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public void testYellowWithTooManyMasterChanges() {
|
|
|
testChangeMasterThreeTimes(2, 100, "The elected master node has changed");
|
|
|
}
|
|
@@ -1064,6 +1143,18 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ public void testRemoteMasterHealthResult() {
|
|
|
+ expectThrows(IllegalArgumentException.class, () -> new CoordinationDiagnosticsService.RemoteMasterHealthResult(null, null, null));
|
|
|
+ expectThrows(
|
|
|
+ IllegalArgumentException.class,
|
|
|
+ () -> new CoordinationDiagnosticsService.RemoteMasterHealthResult(null, null, new RuntimeException())
|
|
|
+ );
|
|
|
+ expectThrows(
|
|
|
+ IllegalArgumentException.class,
|
|
|
+ () -> new CoordinationDiagnosticsService.RemoteMasterHealthResult(mock(DiscoveryNode.class), null, null)
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
public void testResultSerialization() {
|
|
|
CoordinationDiagnosticsService.CoordinationDiagnosticsStatus status = getRandomStatus();
|
|
|
CoordinationDiagnosticsService.CoordinationDiagnosticsDetails details = getRandomDetails();
|