浏览代码

Periodic warning for 1-node cluster w/ seed hosts (#88013)

For fully-formed single-node clusters, emit a periodic warning if seed_hosts has been set to a non-empty list.

Closes #85222
Iraklis Psaroudakis 3 年之前
父节点
当前提交
50d2cf31b8

+ 6 - 0
docs/changelog/88013.yaml

@@ -0,0 +1,6 @@
+pr: 88013
+summary: Periodic warning for 1-node cluster w/ seed hosts
+area: Cluster Coordination
+type: enhancement
+issues:
+ - 85222

+ 5 - 0
docs/reference/modules/discovery/discovery-settings.asciidoc

@@ -201,6 +201,11 @@ Sets how long the master node waits for each cluster state update to be
 completely published to all nodes, unless `discovery.type` is set to
 `single-node`. The default value is `30s`. See <<cluster-state-publishing>>.
 
+`cluster.discovery_configuration_check.interval `::
+(<<static-cluster-setting,Static>>)
+Sets the interval of some checks that will log warnings about an
+incorrect discovery configuration. The default value is `30s`.
+
 `cluster.join_validation.cache_timeout`::
 (<<static-cluster-setting,Static>>)
 When a node requests to join the cluster, the elected master node sends it a

+ 58 - 0
server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

@@ -92,6 +92,7 @@ import java.util.stream.StreamSupport;
 
 import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ID;
 import static org.elasticsearch.core.Strings.format;
+import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
 import static org.elasticsearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered;
 import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
 import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
@@ -116,6 +117,13 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
         Setting.Property.NodeScope
     );
 
+    public static final Setting<TimeValue> SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING = Setting.timeSetting(
+        "cluster.discovery_configuration_check.interval",
+        TimeValue.timeValueMillis(30000),
+        TimeValue.timeValueMillis(1),
+        Setting.Property.NodeScope
+    );
+
     public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state";
 
     private final Settings settings;
@@ -140,6 +148,9 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
     private final SeedHostsResolver configuredHostsResolver;
     private final TimeValue publishTimeout;
     private final TimeValue publishInfoTimeout;
+    private final TimeValue singleNodeClusterSeedHostsCheckInterval;
+    @Nullable
+    private Scheduler.Cancellable singleNodeClusterChecker = null;
     private final PublicationTransportHandler publicationHandler;
     private final LeaderChecker leaderChecker;
     private final FollowersChecker followersChecker;
@@ -218,6 +229,7 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
         this.joinAccumulator = new InitialJoinAccumulator();
         this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings);
         this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings);
+        this.singleNodeClusterSeedHostsCheckInterval = SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings);
         this.random = random;
         this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool());
         this.preVoteCollector = new PreVoteCollector(
@@ -739,6 +751,38 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
         }
     }
 
+    private void cancelSingleNodeClusterChecker() {
+        assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
+        if (singleNodeClusterChecker != null) {
+            singleNodeClusterChecker.cancel();
+            singleNodeClusterChecker = null;
+        }
+    }
+
+    private void checkSingleNodeCluster() {
+        if (applierState.nodes().size() > 1) {
+            return;
+        }
+
+        if (DISCOVERY_SEED_HOSTS_SETTING.exists(settings)) {
+            if (DISCOVERY_SEED_HOSTS_SETTING.get(settings).isEmpty()) {
+                // For a single-node cluster, the only acceptable setting is an empty list.
+                return;
+            } else {
+                logger.warn(
+                    """
+                        This node is a fully-formed single-node cluster with cluster UUID [{}], but it is configured as if to \
+                        discover other nodes and form a multi-node cluster via the [{}] setting. Fully-formed clusters do not \
+                        attempt to discover other nodes, and nodes with different cluster UUIDs cannot belong to the same cluster. \
+                        The cluster UUID persists across restarts and can only be changed by deleting the contents of the node's \
+                        data path(s). Remove the discovery configuration to suppress this message.""",
+                    applierState.metadata().clusterUUID(),
+                    DISCOVERY_SEED_HOSTS_SETTING.getKey() + "=" + DISCOVERY_SEED_HOSTS_SETTING.get(settings)
+                );
+            }
+        }
+    }
+
     void becomeCandidate(String method) {
         assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
         logger.debug(
@@ -748,6 +792,7 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
             mode,
             lastKnownLeader
         );
+        cancelSingleNodeClusterChecker();
 
         if (mode != Mode.CANDIDATE) {
             final Mode prevMode = mode;
@@ -803,6 +848,13 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
 
         assert leaderChecker.leader() == null : leaderChecker.leader();
         followersChecker.updateFastResponseState(getCurrentTerm(), mode);
+
+        if (applierState.nodes().size() > 1) {
+            cancelSingleNodeClusterChecker();
+        } else if (singleNodeClusterChecker == null) {
+            singleNodeClusterChecker = transportService.getThreadPool()
+                .scheduleWithFixedDelay(() -> { checkSingleNodeCluster(); }, this.singleNodeClusterSeedHostsCheckInterval, Names.SAME);
+        }
     }
 
     void becomeFollower(String method, DiscoveryNode leaderNode) {
@@ -822,6 +874,7 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
                 lastKnownLeader
             );
         }
+        cancelSingleNodeClusterChecker();
 
         final boolean restartLeaderChecker = (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) == false;
 
@@ -1028,6 +1081,10 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
                     : coordinationState.get().getLastAcceptedConfiguration()
                         + " != "
                         + coordinationState.get().getLastCommittedConfiguration();
+
+                if (coordinationState.get().getLastAcceptedState().nodes().size() == 1) {
+                    assert singleNodeClusterChecker != null;
+                }
             } else if (mode == Mode.FOLLOWER) {
                 assert coordinationState.get().electionWon() == false : getLocalNode() + " is FOLLOWER so electionWon() should be false";
                 assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false);
@@ -1045,6 +1102,7 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
                 assert currentPublication.map(Publication::isCommitted).orElse(true);
                 assert preVoteCollector.getLeader().equals(lastKnownLeader.get()) : preVoteCollector;
                 assert clusterFormationFailureHelper.isRunning() == false;
+                assert singleNodeClusterChecker == null;
             } else {
                 assert mode == Mode.CANDIDATE;
                 assert joinAccumulator instanceof JoinHelper.CandidateJoinAccumulator;

+ 1 - 0
server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

@@ -492,6 +492,7 @@ public final class ClusterSettings extends AbstractScopedSettings {
         ElectionSchedulerFactory.ELECTION_DURATION_SETTING,
         Coordinator.PUBLISH_TIMEOUT_SETTING,
         Coordinator.PUBLISH_INFO_TIMEOUT_SETTING,
+        Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING,
         JoinValidationService.JOIN_VALIDATION_CACHE_TIMEOUT_SETTING,
         FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING,
         FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING,

+ 56 - 0
server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java

@@ -77,6 +77,7 @@ import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MAS
 import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
 import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
 import static org.elasticsearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING;
+import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
 import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
 import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
 import static org.elasticsearch.test.NodeRoles.nonMasterNode;
@@ -2109,6 +2110,61 @@ public class CoordinatorTests extends AbstractCoordinatorTestCase {
         }
     }
 
+    @TestLogging(
+        reason = "testing warning of a single-node cluster having discovery seed hosts",
+        value = "org.elasticsearch.cluster.coordination.Coordinator:WARN"
+    )
+    public void testLogsWarningPeriodicallyIfSingleNodeClusterHasSeedHosts() throws IllegalAccessException {
+        final long warningDelayMillis;
+        final Settings settings;
+        final String fakeSeedHost = buildNewFakeTransportAddress().toString();
+        if (randomBoolean()) {
+            settings = Settings.builder().putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost).build();
+            warningDelayMillis = Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings).millis();
+        } else {
+            warningDelayMillis = randomLongBetween(1, 100000);
+            settings = Settings.builder()
+                .put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms")
+                .putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost)
+                .build();
+        }
+        logger.info("--> emitting warnings every [{}ms]", warningDelayMillis);
+
+        try (Cluster cluster = new Cluster(1, true, settings)) {
+            cluster.runRandomly();
+            cluster.stabilise();
+
+            for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) {
+                final MockLogAppender mockLogAppender = new MockLogAppender();
+                try {
+                    mockLogAppender.start();
+                    Loggers.addAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
+                    mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() {
+                        String loggedClusterUuid;
+
+                        @Override
+                        public void match(LogEvent event) {
+                            final String message = event.getMessage().getFormattedMessage();
+                            assertThat(message, startsWith("This node is a fully-formed single-node cluster with cluster UUID"));
+                            loggedClusterUuid = (String) event.getMessage().getParameters()[0];
+                        }
+
+                        @Override
+                        public void assertMatched() {
+                            final String clusterUuid = cluster.getAnyNode().getLastAppliedClusterState().metadata().clusterUUID();
+                            assertThat(loggedClusterUuid + " vs " + clusterUuid, clusterUuid, equalTo(clusterUuid));
+                        }
+                    });
+                    cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted");
+                    mockLogAppender.assertAllExpectationsMatched();
+                } finally {
+                    Loggers.removeAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
+                    mockLogAppender.stop();
+                }
+            }
+        }
+    }
+
     @TestLogging(
         reason = "testing LagDetector and CoordinatorPublication logging",
         value = "org.elasticsearch.cluster.coordination.LagDetector:DEBUG,"