|
|
@@ -18,7 +18,10 @@ import org.elasticsearch.cluster.node.DiscoveryNodeRole;
|
|
|
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
|
|
import org.elasticsearch.cluster.routing.RoutingTable;
|
|
|
import org.elasticsearch.cluster.service.ClusterService;
|
|
|
+import org.elasticsearch.common.collect.ImmutableOpenMap;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
+import org.elasticsearch.core.TimeValue;
|
|
|
+import org.elasticsearch.core.Tuple;
|
|
|
import org.elasticsearch.monitor.StatusInfo;
|
|
|
import org.elasticsearch.test.EqualsHashCodeTestUtils;
|
|
|
import org.elasticsearch.threadpool.Scheduler;
|
|
|
@@ -29,12 +32,16 @@ import org.junit.Before;
|
|
|
import java.io.IOException;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.Arrays;
|
|
|
+import java.util.Collection;
|
|
|
import java.util.Collections;
|
|
|
import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
import java.util.UUID;
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
import java.util.concurrent.ConcurrentMap;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
import static org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.EXTREME_DELAY_VARIABILITY;
|
|
|
import static org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService.ClusterFormationStateOrException;
|
|
|
@@ -44,6 +51,7 @@ import static org.hamcrest.Matchers.emptyOrNullString;
|
|
|
import static org.hamcrest.Matchers.endsWith;
|
|
|
import static org.hamcrest.Matchers.equalTo;
|
|
|
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
|
|
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
|
|
|
import static org.hamcrest.Matchers.not;
|
|
|
import static org.hamcrest.Matchers.notNullValue;
|
|
|
import static org.hamcrest.Matchers.startsWith;
|
|
|
@@ -370,25 +378,114 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- public void testRedForNoMaster() {
|
|
|
+ public void testRedForNoMasterNoQuorum() {
|
|
|
+ /*
|
|
|
+ * This test brings up a 4-node cluster (3 master-eligible, one not). After the cluster stabilizes, one node is elected master.
|
|
|
+ * Then we disconnect all master nodes from all other nodes and allow the cluster to simulate running for a little longer. At
|
|
|
+ * that point, the non-master node thinks that there are no master nodes. The two master eligible nodes that were not initially
|
|
|
+ * elected master think that no quorum can be formed. Depending on timing, the node that had originally been elected master
|
|
|
+ * either thinks that it is still master, or that no quorum can be formed.
|
|
|
+ */
|
|
|
try (Cluster cluster = new Cluster(4, false, Settings.EMPTY)) {
|
|
|
// The allNodesMasterEligible=false passed to the Cluster constructor does not guarantee a non-master node in the cluster:
|
|
|
createAndAddNonMasterNode(cluster);
|
|
|
cluster.runRandomly();
|
|
|
cluster.stabilise();
|
|
|
+ int masterNodeCount = 0;
|
|
|
for (Cluster.ClusterNode node : cluster.clusterNodes) {
|
|
|
if (node.getLocalNode().isMasterNode()) {
|
|
|
node.disconnect();
|
|
|
+ masterNodeCount++;
|
|
|
}
|
|
|
}
|
|
|
+ int redNonMasterCount = 0;
|
|
|
+ int redMasterCount = 0;
|
|
|
+ int greenMasterCount = 0;
|
|
|
cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
for (Cluster.ClusterNode node : cluster.clusterNodes) {
|
|
|
CoordinationDiagnosticsService.CoordinationDiagnosticsResult healthIndicatorResult = node.coordinationDiagnosticsService
|
|
|
.diagnoseMasterStability(true);
|
|
|
if (node.getLocalNode().isMasterNode() == false) {
|
|
|
assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("No master eligible nodes found in the cluster"));
|
|
|
+ redNonMasterCount++;
|
|
|
+ } else if (healthIndicatorResult.status().equals(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED)) {
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("unable to form a quorum"));
|
|
|
+ redMasterCount++;
|
|
|
+ } else {
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.GREEN));
|
|
|
+ greenMasterCount++;
|
|
|
}
|
|
|
}
|
|
|
+ // Non-master nodes see only themselves and think that there are no master nodes:
|
|
|
+ assertThat(redNonMasterCount, equalTo(5 - masterNodeCount));
|
|
|
+ // The original master node sees only itself, but might think it is still master:
|
|
|
+ assertThat(greenMasterCount, lessThanOrEqualTo(1));
|
|
|
+ // The other master nodes only see themselves and cannot form a quorum (and sometimes the original master already sees this):
|
|
|
+ assertThat(redMasterCount, greaterThanOrEqualTo(masterNodeCount - 1));
|
|
|
+
|
|
|
+ while (cluster.clusterNodes.stream().anyMatch(Cluster.ClusterNode::deliverBlackholedRequests)) {
|
|
|
+ logger.debug("--> stabilising again after delivering blackholed requests");
|
|
|
+ cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testRedForDiscoveryProblems() {
|
|
|
+ /*
|
|
|
+ * In this test we bring up a 4-node cluster (3 master-eligible) and allow it to stabilize. Then we disconnect all master nodes.
|
|
|
+ * This actually causes a quorum failure (see testRedForNoMasterNoQuorum()). The reason for this is so that we get into the state
|
|
|
+ * where we have not had a master node recently. Then we swap out he clusterFormationResponses so that the reason appears to be
|
|
|
+ * a discovery problem instead of a quorum problem.
|
|
|
+ */
|
|
|
+ try (Cluster cluster = new Cluster(3, true, Settings.EMPTY)) {
|
|
|
+ // The allNodesMasterEligible=false passed to the Cluster constructor does not guarantee a non-master node in the cluster:
|
|
|
+ createAndAddNonMasterNode(cluster);
|
|
|
+ cluster.runRandomly();
|
|
|
+ cluster.stabilise();
|
|
|
+ int masterNodeCount = 0;
|
|
|
+ ConcurrentHashMap<DiscoveryNode, CoordinationDiagnosticsService.ClusterFormationStateOrException> clusterFormationStates =
|
|
|
+ new ConcurrentHashMap<>();
|
|
|
+ for (Cluster.ClusterNode node : cluster.clusterNodes) {
|
|
|
+ if (node.getLocalNode().isMasterNode()) {
|
|
|
+ clusterFormationStates.put(
|
|
|
+ node.getLocalNode(),
|
|
|
+ new CoordinationDiagnosticsService.ClusterFormationStateOrException(getClusterFormationState(false, true))
|
|
|
+ );
|
|
|
+ node.disconnect();
|
|
|
+ masterNodeCount++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ int redNonMasterCount = 0;
|
|
|
+ int redMasterCount = 0;
|
|
|
+ int greenMasterCount = 0;
|
|
|
+ cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
+ for (Cluster.ClusterNode node : cluster.clusterNodes) {
|
|
|
+ if (node.getLocalNode().isMasterNode()) {
|
|
|
+ // This is artificially forcing a discovery problem:
|
|
|
+ node.coordinationDiagnosticsService.clusterFormationResponses = clusterFormationStates;
|
|
|
+ }
|
|
|
+ CoordinationDiagnosticsService.CoordinationDiagnosticsResult healthIndicatorResult = node.coordinationDiagnosticsService
|
|
|
+ .diagnoseMasterStability(true);
|
|
|
+ if (node.getLocalNode().isMasterNode() == false) {
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("No master eligible nodes found in the cluster"));
|
|
|
+ redNonMasterCount++;
|
|
|
+ } else if (healthIndicatorResult.status().equals(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED)) {
|
|
|
+ assertThat(healthIndicatorResult.summary(), containsString("unable to discover other master eligible nodes"));
|
|
|
+ redMasterCount++;
|
|
|
+ } else {
|
|
|
+ assertThat(healthIndicatorResult.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.GREEN));
|
|
|
+ greenMasterCount++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // Non-master nodes see only themselves and think that there are no master nodes:
|
|
|
+ assertThat(redNonMasterCount, equalTo(1));
|
|
|
+ // The original master node sees only itself, but might think it is still master:
|
|
|
+ assertThat(greenMasterCount, lessThanOrEqualTo(1));
|
|
|
+ // The other master nodes only see themselves and cannot form a quorum (and sometimes the original master already sees this):
|
|
|
+ assertThat(redMasterCount, greaterThanOrEqualTo(masterNodeCount - 1));
|
|
|
+
|
|
|
while (cluster.clusterNodes.stream().anyMatch(Cluster.ClusterNode::deliverBlackholedRequests)) {
|
|
|
logger.debug("--> stabilising again after delivering blackholed requests");
|
|
|
cluster.runFor(DEFAULT_STABILISATION_TIME, "Cannot call stabilise() because there is no master");
|
|
|
@@ -501,7 +598,11 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
assertThat(recentMaster.getName(), notNullValue());
|
|
|
assertThat(recentMaster.getId(), not(emptyOrNullString()));
|
|
|
}
|
|
|
- assertThat(result.details().clusterFormationDescription(), startsWith("master not discovered yet"));
|
|
|
+ assertThat(result.details().nodeToClusterFormationDescriptionMap().size(), equalTo(1));
|
|
|
+ assertThat(
|
|
|
+ result.details().nodeToClusterFormationDescriptionMap().values().iterator().next(),
|
|
|
+ startsWith("master not" + " discovered")
|
|
|
+ );
|
|
|
}
|
|
|
cluster.clusterNodes.addAll(removedClusterNodes);
|
|
|
while (cluster.clusterNodes.stream().anyMatch(Cluster.ClusterNode::deliverBlackholedRequests)) {
|
|
|
@@ -545,7 +646,11 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
assertThat(recentMaster.getName(), notNullValue());
|
|
|
assertThat(recentMaster.getId(), not(emptyOrNullString()));
|
|
|
}
|
|
|
- assertThat(result.details().clusterFormationDescription(), startsWith("master not discovered"));
|
|
|
+ assertThat(result.details().nodeToClusterFormationDescriptionMap().size(), equalTo(1));
|
|
|
+ assertThat(
|
|
|
+ result.details().nodeToClusterFormationDescriptionMap().values().iterator().next(),
|
|
|
+ startsWith("master not" + " discovered")
|
|
|
+ );
|
|
|
// This restores the PeerFinder so that the test cleanup doesn't fail:
|
|
|
node.coordinator.getPeerFinder().activate(lastAcceptedNodes);
|
|
|
}
|
|
|
@@ -583,7 +688,11 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
assertThat(recentMaster.getName(), notNullValue());
|
|
|
assertThat(recentMaster.getId(), not(emptyOrNullString()));
|
|
|
}
|
|
|
- assertThat(result.details().clusterFormationDescription(), startsWith("master not discovered"));
|
|
|
+ assertThat(result.details().nodeToClusterFormationDescriptionMap().size(), equalTo(1));
|
|
|
+ assertThat(
|
|
|
+ result.details().nodeToClusterFormationDescriptionMap().values().iterator().next(),
|
|
|
+ startsWith("master not" + " discovered")
|
|
|
+ );
|
|
|
}
|
|
|
}
|
|
|
while (cluster.clusterNodes.stream().anyMatch(Cluster.ClusterNode::deliverBlackholedRequests)) {
|
|
|
@@ -593,6 +702,210 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ public void testDiagnoseOnHaveNotSeenMasterRecentlyAndWeAreMasterEligible() {
|
|
|
+ MasterHistory localMasterHistory = mock(MasterHistory.class);
|
|
|
+ Collection<DiscoveryNode> masterEligibleNodes = List.of(node1, node2, node3);
|
|
|
+ boolean explain = true;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test, we have an exception rather than the ClusterFormationState for one of the nodes, we expect the status to be RED
|
|
|
+ * and the summary to indicate that there was an exception.
|
|
|
+ */
|
|
|
+ ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node1,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node2,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(new RuntimeException()));
|
|
|
+ TimeValue nodeHasMasterLookupTimeframe = new TimeValue(1, TimeUnit.MILLISECONDS);
|
|
|
+ Coordinator coordinator = mock(Coordinator.class);
|
|
|
+ ClusterFormationFailureHelper.ClusterFormationState localClusterFormationState = getClusterFormationState(true, true);
|
|
|
+ when(coordinator.getClusterFormationState()).thenReturn(localClusterFormationState);
|
|
|
+ when(coordinator.getLocalNode()).thenReturn(node1);
|
|
|
+ CoordinationDiagnosticsService.CoordinationDiagnosticsResult result = CoordinationDiagnosticsService
|
|
|
+ .diagnoseOnHaveNotSeenMasterRecentlyAndWeAreMasterEligible(
|
|
|
+ localMasterHistory,
|
|
|
+ masterEligibleNodes,
|
|
|
+ coordinator,
|
|
|
+ clusterFormationResponses,
|
|
|
+ nodeHasMasterLookupTimeframe,
|
|
|
+ explain
|
|
|
+ );
|
|
|
+ assertThat(result.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(result.summary(), containsString(" an exception occurred while reaching out to "));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test, the ClusterFormationStates of all of the nodes appear fine. Since we only run this method when there has been no
|
|
|
+ * master recently, we expect the status to be RED and the summary to indicate that we don't know what the problem is.
|
|
|
+ */
|
|
|
+ clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(node1, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ result = CoordinationDiagnosticsService.diagnoseOnHaveNotSeenMasterRecentlyAndWeAreMasterEligible(
|
|
|
+ localMasterHistory,
|
|
|
+ masterEligibleNodes,
|
|
|
+ coordinator,
|
|
|
+ clusterFormationResponses,
|
|
|
+ nodeHasMasterLookupTimeframe,
|
|
|
+ explain
|
|
|
+ );
|
|
|
+ assertThat(result.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(result.summary(), containsString(" the cause has not been determined"));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test, we have the ClusterFormationState of one of the node reports that it cannot form a quorum, so we expect the
|
|
|
+ * status to be RED and the summary to indicate that we are unable to form a quorum.
|
|
|
+ */
|
|
|
+ clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(node1, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(getClusterFormationState(true, false)));
|
|
|
+ result = CoordinationDiagnosticsService.diagnoseOnHaveNotSeenMasterRecentlyAndWeAreMasterEligible(
|
|
|
+ localMasterHistory,
|
|
|
+ masterEligibleNodes,
|
|
|
+ coordinator,
|
|
|
+ clusterFormationResponses,
|
|
|
+ nodeHasMasterLookupTimeframe,
|
|
|
+ explain
|
|
|
+ );
|
|
|
+ assertThat(result.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(result.summary(), containsString(" the master eligible nodes are unable to form a quorum"));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test, we have the ClusterFormationState of one of the node reports that its foundPeers does not include all known
|
|
|
+ * master eligible nodes, so we expect the status to be RED and the summary to indicate that we are unable to discover all nodes.
|
|
|
+ */
|
|
|
+ clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(node1, new ClusterFormationStateOrException(getClusterFormationState(true, true)));
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(false, true)));
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(getClusterFormationState(true, false)));
|
|
|
+ result = CoordinationDiagnosticsService.diagnoseOnHaveNotSeenMasterRecentlyAndWeAreMasterEligible(
|
|
|
+ localMasterHistory,
|
|
|
+ masterEligibleNodes,
|
|
|
+ coordinator,
|
|
|
+ clusterFormationResponses,
|
|
|
+ nodeHasMasterLookupTimeframe,
|
|
|
+ explain
|
|
|
+ );
|
|
|
+ assertThat(result.status(), equalTo(CoordinationDiagnosticsService.CoordinationDiagnosticsStatus.RED));
|
|
|
+ assertThat(result.summary(), containsString(" some master eligible nodes are unable to discover other master eligible nodes"));
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testAnyNodeInClusterReportsDiscoveryProblems() {
|
|
|
+ Collection<DiscoveryNode> masterEligibleNodes = List.of(node1, node2, node3);
|
|
|
+ /*
|
|
|
+ * In this test, the foundPeers of the ClusterFormationStates of all nodes contain all known master eligible nodes, so we expect
|
|
|
+ * anyNodeInClusterReportsDiscoveryProblems() to be false since there are no discovery problems.
|
|
|
+ */
|
|
|
+ ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(node1, new ClusterFormationStateOrException(getClusterFormationState(true, randomBoolean())));
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(true, randomBoolean())));
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(getClusterFormationState(true, randomBoolean())));
|
|
|
+ Map<DiscoveryNode, ClusterFormationFailureHelper.ClusterFormationState> nodeClusterFormationStateMap = clusterFormationResponses
|
|
|
+ .entrySet()
|
|
|
+ .stream()
|
|
|
+ .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().clusterFormationState()));
|
|
|
+ assertFalse(
|
|
|
+ CoordinationDiagnosticsService.anyNodeInClusterReportsDiscoveryProblems(masterEligibleNodes, nodeClusterFormationStateMap)
|
|
|
+ );
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test, the foundPeers of at least one ClusterFormationState does _not_ include all known master eligible nodes, so we
|
|
|
+ * expect anyNodeInClusterReportsDiscoveryProblems() to be true.
|
|
|
+ */
|
|
|
+ clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node1,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(false, randomBoolean())));
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node3,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ nodeClusterFormationStateMap = clusterFormationResponses.entrySet()
|
|
|
+ .stream()
|
|
|
+ .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().clusterFormationState()));
|
|
|
+ assertTrue(
|
|
|
+ CoordinationDiagnosticsService.anyNodeInClusterReportsDiscoveryProblems(masterEligibleNodes, nodeClusterFormationStateMap)
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testAnyNodeInClusterReportsQuorumProblems() {
|
|
|
+ /*
|
|
|
+ * In this test the hasDiscoveredQuorum field of all ClusterFormationStates is true, so we expect
|
|
|
+ * anyNodeInClusterReportsQuorumProblems() to return false.
|
|
|
+ */
|
|
|
+ ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(node1, new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), true)));
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), true)));
|
|
|
+ clusterFormationResponses.put(node3, new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), true)));
|
|
|
+ Map<DiscoveryNode, ClusterFormationFailureHelper.ClusterFormationState> nodeClusterFormationStateMap = clusterFormationResponses
|
|
|
+ .entrySet()
|
|
|
+ .stream()
|
|
|
+ .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().clusterFormationState()));
|
|
|
+ assertFalse(CoordinationDiagnosticsService.anyNodeInClusterReportsQuorumProblems(nodeClusterFormationStateMap));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In this test the hasDiscoveredQuorum field of at least one ClusterFormationState is false, so we expect
|
|
|
+ * anyNodeInClusterReportsQuorumProblems() to return true.
|
|
|
+ */
|
|
|
+ clusterFormationResponses = new ConcurrentHashMap<>();
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node1,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ clusterFormationResponses.put(node2, new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), false)));
|
|
|
+ clusterFormationResponses.put(
|
|
|
+ node3,
|
|
|
+ new ClusterFormationStateOrException(getClusterFormationState(randomBoolean(), randomBoolean()))
|
|
|
+ );
|
|
|
+ nodeClusterFormationStateMap = clusterFormationResponses.entrySet()
|
|
|
+ .stream()
|
|
|
+ .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().clusterFormationState()));
|
|
|
+ assertTrue(CoordinationDiagnosticsService.anyNodeInClusterReportsQuorumProblems(nodeClusterFormationStateMap));
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Creates a new ClusterFormationState for testing. If hasDiscoveredAllNodes is false, then the foundPeers on the returned
|
|
|
+ * ClusterFormationState will be missing at least one known master eligible node. The hasDiscoveredQuorum field in the returned
|
|
|
+ * ClusterFormationState is set to the hasDiscoveredQuorum argument of this method.
|
|
|
+ */
|
|
|
+ private ClusterFormationFailureHelper.ClusterFormationState getClusterFormationState(
|
|
|
+ boolean hasDiscoveredAllNodes,
|
|
|
+ boolean hasDiscoveredQuorum
|
|
|
+ ) {
|
|
|
+ Map<String, DiscoveryNode> masterEligibleNodesMap = Map.of("node1", node1, "node2", node2, "node3", node3);
|
|
|
+ List<String> initialMasterNodesSetting = Arrays.stream(generateRandomStringArray(7, 30, false, false)).toList();
|
|
|
+ DiscoveryNode localNode = masterEligibleNodesMap.values().stream().findAny().get();
|
|
|
+ ImmutableOpenMap.Builder<String, DiscoveryNode> masterEligibleNodesBuilder = new ImmutableOpenMap.Builder<>();
|
|
|
+ masterEligibleNodesMap.forEach(masterEligibleNodesBuilder::put);
|
|
|
+ ImmutableOpenMap<String, DiscoveryNode> masterEligibleNodes = masterEligibleNodesBuilder.build();
|
|
|
+ List<DiscoveryNode> allMasterEligibleNodes = List.of(node1, node2, node3);
|
|
|
+ return new ClusterFormationFailureHelper.ClusterFormationState(
|
|
|
+ initialMasterNodesSetting,
|
|
|
+ localNode,
|
|
|
+ masterEligibleNodes,
|
|
|
+ randomLong(),
|
|
|
+ randomLong(),
|
|
|
+ new CoordinationMetadata.VotingConfiguration(Collections.emptySet()),
|
|
|
+ new CoordinationMetadata.VotingConfiguration(Collections.emptySet()),
|
|
|
+ Collections.emptyList(),
|
|
|
+ hasDiscoveredAllNodes
|
|
|
+ ? allMasterEligibleNodes
|
|
|
+ : randomSubsetOf(randomInt(allMasterEligibleNodes.size() - 1), allMasterEligibleNodes),
|
|
|
+ randomLong(),
|
|
|
+ hasDiscoveredQuorum,
|
|
|
+ new StatusInfo(randomFrom(StatusInfo.Status.HEALTHY, StatusInfo.Status.UNHEALTHY), randomAlphaOfLength(20)),
|
|
|
+ Collections.emptyList()
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
public void testBeginPollingClusterFormationInfo() {
|
|
|
/*
|
|
|
* This test sets up a 4-node cluster (3 master eligible). We call beginPollingClusterFormationInfo() on each node. This is allowed
|
|
|
@@ -747,7 +1060,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
originalDetails.recentMasters(),
|
|
|
originalDetails.remoteExceptionMessage(),
|
|
|
originalDetails.remoteExceptionStackTrace(),
|
|
|
- originalDetails.clusterFormationDescription()
|
|
|
+ originalDetails.nodeToClusterFormationDescriptionMap()
|
|
|
);
|
|
|
}
|
|
|
case 2 -> {
|
|
|
@@ -756,7 +1069,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
List.of(node1, node2, node3),
|
|
|
originalDetails.remoteExceptionMessage(),
|
|
|
originalDetails.remoteExceptionStackTrace(),
|
|
|
- originalDetails.clusterFormationDescription()
|
|
|
+ originalDetails.nodeToClusterFormationDescriptionMap()
|
|
|
);
|
|
|
}
|
|
|
case 3 -> {
|
|
|
@@ -765,7 +1078,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
originalDetails.recentMasters(),
|
|
|
randomAlphaOfLength(30),
|
|
|
originalDetails.remoteExceptionStackTrace(),
|
|
|
- originalDetails.clusterFormationDescription()
|
|
|
+ originalDetails.nodeToClusterFormationDescriptionMap()
|
|
|
);
|
|
|
}
|
|
|
case 4 -> {
|
|
|
@@ -774,7 +1087,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
originalDetails.recentMasters(),
|
|
|
originalDetails.remoteExceptionMessage(),
|
|
|
randomAlphaOfLength(100),
|
|
|
- originalDetails.clusterFormationDescription()
|
|
|
+ originalDetails.nodeToClusterFormationDescriptionMap()
|
|
|
);
|
|
|
}
|
|
|
case 5 -> {
|
|
|
@@ -783,7 +1096,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
originalDetails.recentMasters(),
|
|
|
originalDetails.remoteExceptionMessage(),
|
|
|
originalDetails.remoteExceptionStackTrace(),
|
|
|
- randomAlphaOfLength(100)
|
|
|
+ randomMap(0, 7, () -> new Tuple<>(randomNodeId(), randomAlphaOfLength(100)))
|
|
|
);
|
|
|
}
|
|
|
default -> throw new IllegalStateException();
|
|
|
@@ -848,7 +1161,7 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
|
|
|
List.of(node1, node2),
|
|
|
randomNullableStringOfLengthBetween(0, 30),
|
|
|
randomNullableStringOfLengthBetween(0, 30),
|
|
|
- randomAlphaOfLengthBetween(0, 30)
|
|
|
+ Map.of(randomNodeId(), randomAlphaOfLengthBetween(0, 30))
|
|
|
);
|
|
|
}
|
|
|
|