Browse Source

Polling cluster formation state for master-is-stable health indicator (#88397)

This change polls all known master-eligible nodes for cluster formation information in the event
that the existing master node goes null. This is so that the information is available to the health
API when it is needed.
Keith Massey 3 năm trước cách đây
mục cha
commit
3c5bf7ee44

+ 5 - 0
docs/changelog/88397.yaml

@@ -0,0 +1,5 @@
+pr: 88397
+summary: Polling cluster formation state for master-is-stable health indicator
+area: Health
+type: enhancement
+issues: []

+ 196 - 0
server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationDiagnosticsService.java

@@ -10,16 +10,29 @@ package org.elasticsearch.cluster.coordination;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.elasticsearch.Version;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.ActionListenerResponseHandler;
+import org.elasticsearch.action.StepListener;
+import org.elasticsearch.action.admin.cluster.coordination.ClusterFormationInfoAction;
 import org.elasticsearch.cluster.ClusterChangedEvent;
 import org.elasticsearch.cluster.ClusterStateListener;
 import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.service.ClusterApplierService;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.threadpool.Scheduler;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.ConnectionProfile;
+import org.elasticsearch.transport.TransportRequestOptions;
+import org.elasticsearch.transport.TransportService;
 
 import java.io.IOException;
 import java.io.PrintWriter;
@@ -30,7 +43,12 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.TimeUnit;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
 /**
@@ -47,6 +65,7 @@ import java.util.stream.Collectors;
  */
 public class CoordinationDiagnosticsService implements ClusterStateListener {
     private final ClusterService clusterService;
+    private final TransportService transportService;
     private final Coordinator coordinator;
     private final MasterHistoryService masterHistoryService;
     /**
@@ -63,6 +82,19 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
      */
     private final int unacceptableIdentityChanges;
 
+    /*
+     * This is a list of tasks that are periodically reaching out to other master eligible nodes to get their ClusterFormationStates for
+     * diagnosis.
+     * The field is accessed (reads/writes) from multiple threads, but the reference itself is only ever changed on the cluster change
+     * event thread.
+     */
+    private volatile List<Scheduler.Cancellable> clusterFormationInfoTasks = null;
+    /*
+     * This field holds the results of the tasks in the clusterFormationInfoTasks field above. The field is accessed (reads/writes) from
+     * multiple threads, but the reference itself is only ever changed on the cluster change event thread.
+     */
+    private volatile ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> clusterFormationResponses = null;
+
     private static final Logger logger = LogManager.getLogger(CoordinationDiagnosticsService.class);
 
     /**
@@ -98,10 +130,12 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
 
     public CoordinationDiagnosticsService(
         ClusterService clusterService,
+        TransportService transportService,
         Coordinator coordinator,
         MasterHistoryService masterHistoryService
     ) {
         this.clusterService = clusterService;
+        this.transportService = transportService;
         this.coordinator = coordinator;
         this.masterHistoryService = masterHistoryService;
         this.nodeHasMasterLookupTimeframe = NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING.get(clusterService.getSettings());
@@ -410,6 +444,168 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
                 }
             }
         }
+        if (currentMaster == null && clusterService.localNode().isMasterNode()) {
+            /*
+             * This begins polling all master-eligible nodes for cluster formation information. However there's a 10-second delay before it
+             * starts, so in the normal situation where during a master transition it flips from master1 -> null -> master2, it the
+             * polling tasks will be canceled before any requests are actually made.
+             */
+            beginPollingClusterFormationInfo();
+        } else {
+            cancelPollingClusterFormationInfo();
+        }
+    }
+
+    /**
+     * This method begins polling all known master-eligible nodes for cluster formation information. After a 10-second initial delay, it
+     * polls each node every 10 seconds until cancelPollingClusterFormationInfo() is called.
+     */
+    private void beginPollingClusterFormationInfo() {
+        assert ThreadPool.assertCurrentThreadPool(ClusterApplierService.CLUSTER_UPDATE_THREAD_NAME);
+        cancelPollingClusterFormationInfo();
+        ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> responses = new ConcurrentHashMap<>();
+        List<Scheduler.Cancellable> cancellables = new CopyOnWriteArrayList<>();
+        beginPollingClusterFormationInfo(getMasterEligibleNodes(), responses::put, cancellables::add);
+        clusterFormationResponses = responses;
+        clusterFormationInfoTasks = cancellables;
+    }
+
+    /**
+     * This method returns quickly, but in the background schedules to query the remote node's cluster formation state in 10 seconds, and
+     * repeats doing that until cancel() is called on all of the Cancellable that this method inserts into cancellables. This method
+     * exists (rather than being just part of the beginPollingClusterFormationInfo() above) in order to facilitate unit testing.
+     * @param nodeResponseConsumer A consumer for any results produced for a node by this method
+     * @param cancellableConsumer A consumer for any Cancellable tasks produced by this method
+     */
+    // Non-private for testing
+    void beginPollingClusterFormationInfo(
+        Collection<DiscoveryNode> masterEligibleNodes,
+        BiConsumer<DiscoveryNode, ClusterFormationStateOrException> nodeResponseConsumer,
+        Consumer<Scheduler.Cancellable> cancellableConsumer
+    ) {
+        masterEligibleNodes.forEach(masterEligibleNode -> {
+            Consumer<ClusterFormationStateOrException> responseConsumer = result -> nodeResponseConsumer.accept(masterEligibleNode, result);
+            cancellableConsumer.accept(
+                fetchClusterFormationInfo(
+                    masterEligibleNode,
+                    responseConsumer.andThen(rescheduleFetchConsumer(masterEligibleNode, responseConsumer, cancellableConsumer))
+                )
+            );
+        });
+    }
+
+    /**
+     * This wraps the responseConsumer in a Consumer that will run rescheduleFetchConsumer() after responseConsumer has
+     * completed, adding the resulting Cancellable to cancellableConsumer.
+     * @param masterEligibleNode The node being polled
+     * @param responseConsumer The response consumer to be wrapped
+     * @param cancellableConsumer The list of Cancellables
+     * @return
+     */
+    private Consumer<CoordinationDiagnosticsService.ClusterFormationStateOrException> rescheduleFetchConsumer(
+        DiscoveryNode masterEligibleNode,
+        Consumer<CoordinationDiagnosticsService.ClusterFormationStateOrException> responseConsumer,
+        Consumer<Scheduler.Cancellable> cancellableConsumer
+    ) {
+        return response -> {
+            cancellableConsumer.accept(
+                fetchClusterFormationInfo(
+                    masterEligibleNode,
+                    responseConsumer.andThen(rescheduleFetchConsumer(masterEligibleNode, responseConsumer, cancellableConsumer))
+                )
+            );
+        };
+    }
+
+    private void cancelPollingClusterFormationInfo() {
+        assert ThreadPool.assertCurrentThreadPool(ClusterApplierService.CLUSTER_UPDATE_THREAD_NAME);
+        if (clusterFormationResponses != null) {
+            clusterFormationInfoTasks.forEach(Scheduler.Cancellable::cancel);
+            clusterFormationResponses = null;
+            clusterFormationInfoTasks = null;
+        }
+    }
+
+    /**
+     * This method returns quickly, but in the background schedules to query the remote node's cluster formation state in 10 seconds
+     * unless cancel() is called on the Cancellable that this method returns.
+     * @param node The node to poll for cluster formation information
+     * @param responseConsumer The consumer of the cluster formation info for the node, or the exception encountered while contacting it
+     * @return A Cancellable for the task that is scheduled to fetch cluster formation information
+     */
+    private Scheduler.Cancellable fetchClusterFormationInfo(
+        DiscoveryNode node,
+        Consumer<ClusterFormationStateOrException> responseConsumer
+    ) {
+        StepListener<Releasable> connectionListener = new StepListener<>();
+        StepListener<ClusterFormationInfoAction.Response> fetchClusterInfoListener = new StepListener<>();
+        long startTime = System.nanoTime();
+        connectionListener.whenComplete(releasable -> {
+            logger.trace("Opened connection to {}, making cluster coordination info request", node);
+            // If we don't get a response in 10 seconds that is a failure worth capturing on its own:
+            final TimeValue transportTimeout = TimeValue.timeValueSeconds(10);
+            transportService.sendRequest(
+                node,
+                ClusterFormationInfoAction.NAME,
+                new ClusterFormationInfoAction.Request(),
+                TransportRequestOptions.timeout(transportTimeout),
+                new ActionListenerResponseHandler<>(
+                    ActionListener.runBefore(fetchClusterInfoListener, () -> Releasables.close(releasable)),
+                    ClusterFormationInfoAction.Response::new
+                )
+            );
+        }, e -> {
+            logger.warn("Exception connecting to master node", e);
+            responseConsumer.accept(new ClusterFormationStateOrException(e));
+        });
+
+        fetchClusterInfoListener.whenComplete(response -> {
+            long endTime = System.nanoTime();
+            logger.trace("Received cluster coordination info from {} in {}", node, TimeValue.timeValueNanos(endTime - startTime));
+            responseConsumer.accept(new ClusterFormationStateOrException(response.getClusterFormationState()));
+        }, e -> {
+            logger.warn("Exception in cluster coordination info request to master node", e);
+            responseConsumer.accept(new ClusterFormationStateOrException(e));
+        });
+
+        return transportService.getThreadPool().schedule(() -> {
+            Version minSupportedVersion = Version.V_8_4_0;
+            if (node.getVersion().onOrAfter(minSupportedVersion) == false) { // This was introduced in 8.4.0
+                logger.trace(
+                    "Cannot get cluster coordination info for {} because it is at version {} and {} is required",
+                    node,
+                    node.getVersion(),
+                    minSupportedVersion
+                );
+            } else {
+                transportService.connectToNode(
+                    // Note: This connection must be explicitly closed in the connectionListener
+                    node,
+                    ConnectionProfile.buildDefaultConnectionProfile(clusterService.getSettings()),
+                    connectionListener
+                );
+            }
+        }, new TimeValue(10, TimeUnit.SECONDS), ThreadPool.Names.SAME);
+    }
+
+    // Non-private for testing
+    record ClusterFormationStateOrException(
+        ClusterFormationFailureHelper.ClusterFormationState clusterFormationState,
+        Exception exception
+    ) {
+        ClusterFormationStateOrException {
+            if (clusterFormationState != null && exception != null) {
+                throw new IllegalArgumentException("Cluster formation state and exception cannot both be non-null");
+            }
+        }
+
+        ClusterFormationStateOrException(ClusterFormationFailureHelper.ClusterFormationState clusterFormationState) {
+            this(clusterFormationState, null);
+        }
+
+        ClusterFormationStateOrException(Exception exception) {
+            this(null, exception);
+        }
     }
 
     public record CoordinationDiagnosticsResult(

+ 1 - 0
server/src/main/java/org/elasticsearch/node/Node.java

@@ -917,6 +917,7 @@ public class Node implements Closeable {
             MasterHistoryService masterHistoryService = new MasterHistoryService(transportService, threadPool, clusterService);
             CoordinationDiagnosticsService coordinationDiagnosticsService = new CoordinationDiagnosticsService(
                 clusterService,
+                transportService,
                 discoveryModule.getCoordinator(),
                 masterHistoryService
             );

+ 119 - 1
server/src/test/java/org/elasticsearch/cluster/coordination/CoordinationDiagnosticsServiceTests.java

@@ -21,7 +21,9 @@ import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.monitor.StatusInfo;
 import org.elasticsearch.test.EqualsHashCodeTestUtils;
+import org.elasticsearch.threadpool.Scheduler;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportService;
 import org.junit.Before;
 
 import java.io.IOException;
@@ -30,7 +32,12 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
 
+import static org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.EXTREME_DELAY_VARIABILITY;
+import static org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService.ClusterFormationStateOrException;
 import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.emptyOrNullString;
@@ -586,6 +593,116 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
         }
     }
 
+    public void testBeginPollingClusterFormationInfo() {
+        /*
+         * This test sets up a 4-node cluster (3 master eligible). We call beginPollingClusterFormationInfo() on each node. This is allowed
+         * to run for a bit, and then we assert that we have cluster formation information from each master eligible node. Then we
+         * disconnect a random master eligible node, allow the polling to continue to run (we never cancelled it), and assert that we
+         * have the expected exceptions in the polling results.
+         */
+        try (Cluster cluster = new Cluster(3, true, Settings.EMPTY)) {
+            createAndAddNonMasterNode(cluster);
+            cluster.runRandomly();
+            cluster.stabilise();
+            List<DiscoveryNode> masterNodes = cluster.clusterNodes.stream()
+                .map(Cluster.ClusterNode::getLocalNode)
+                .filter(DiscoveryNode::isMasterNode)
+                .toList();
+            cluster.clusterNodes.stream().filter(node -> node.getLocalNode().isMasterNode()).forEach(node -> {
+                ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> nodeToClusterFormationStateMap = new ConcurrentHashMap<>();
+                node.coordinationDiagnosticsService.beginPollingClusterFormationInfo(
+                    masterNodes,
+                    nodeToClusterFormationStateMap::put,
+                    cancellable -> {}
+                );
+
+                cluster.runRandomly(false, true, EXTREME_DELAY_VARIABILITY);
+                cluster.stabilise();
+
+                /*
+                 * The cluster has now run normally for some period of time, so check that the outputs of
+                 * beginPollingClusterFormationInfo() are present with no exceptions:
+                 */
+                assertThat(nodeToClusterFormationStateMap.size(), equalTo(masterNodes.size()));
+                masterNodes.stream().filter(masterNode -> node.getLocalNode().equals(masterNode) == false).forEach(masterNode -> {
+                    ClusterFormationStateOrException clusterFormationStateOrException = nodeToClusterFormationStateMap.get(masterNode);
+                    assertNotNull(clusterFormationStateOrException);
+                    assertNotNull(clusterFormationStateOrException.clusterFormationState());
+                    assertNull(clusterFormationStateOrException.exception());
+                    ClusterFormationFailureHelper.ClusterFormationState clusterFormationState = clusterFormationStateOrException
+                        .clusterFormationState();
+                    assertThat(clusterFormationState.getDescription(), not(emptyOrNullString()));
+                });
+
+                /*
+                 * Now we disconnect a random node, simulate running the cluster for a little while, and make sure that the results of
+                 * beginPollingClusterFormationInfo() contain the expected exceptions.
+                 */
+                Cluster.ClusterNode nodeToDisconnect = cluster.clusterNodes.stream()
+                    .filter(clusterNode -> clusterNode.getLocalNode().isMasterNode())
+                    .findAny()
+                    .get();
+                nodeToDisconnect.disconnect();
+                cluster.stabilise();
+                assertThat(nodeToClusterFormationStateMap.size(), equalTo(masterNodes.size()));
+                AtomicInteger exceptions = new AtomicInteger();
+                masterNodes.stream().filter(masterNode -> node.getLocalNode().equals(masterNode) == false).forEach(masterNode -> {
+                    ClusterFormationStateOrException clusterFormationStateOrException = nodeToClusterFormationStateMap.get(masterNode);
+                    assertNotNull(clusterFormationStateOrException);
+                    if (clusterFormationStateOrException.clusterFormationState() != null) {
+                        assertNull(clusterFormationStateOrException.exception());
+                        ClusterFormationFailureHelper.ClusterFormationState clusterFormationState = clusterFormationStateOrException
+                            .clusterFormationState();
+                        assertThat(clusterFormationState.getDescription(), not(emptyOrNullString()));
+                    } else {
+                        assertNotNull(clusterFormationStateOrException.exception());
+                        exceptions.getAndIncrement();
+                    }
+                });
+                if (node.equals(nodeToDisconnect)) {
+                    // If this was the disconnected node, it will have encountered exceptions contacting all nodes except itself:
+                    assertThat(exceptions.get(), equalTo(masterNodes.size() - 1));
+                } else {
+                    // Other nodes will only have encountered an exception contacting the disconnected node:
+                    assertThat(exceptions.get(), equalTo(1));
+                }
+                nodeToDisconnect.heal();
+            });
+        }
+    }
+
+    public void testBeginPollingClusterFormationInfoCancel() {
+        /*
+         * This test sets up a 4-node cluster (3 master eligible). We call beginPollingClusterFormationInfo() on each node. We then
+         * cancel all tasks. This simulates what will happen most often in practice -- polling is triggered when the master node goes
+         * null, and then polling is cancelled immediately when a new master node is elected, well within the 10 second initial delay. We
+         * then simulate the cluster running for a little while, and assert that there are no results from
+         * beginPollingClusterFormationInfo().
+         */
+        try (Cluster cluster = new Cluster(3, true, Settings.EMPTY)) {
+            createAndAddNonMasterNode(cluster);
+            cluster.runRandomly();
+            cluster.stabilise();
+            List<DiscoveryNode> masterNodes = cluster.clusterNodes.stream()
+                .map(Cluster.ClusterNode::getLocalNode)
+                .filter(DiscoveryNode::isMasterNode)
+                .toList();
+            cluster.clusterNodes.stream().filter(node -> node.getLocalNode().isMasterNode()).forEach(node -> {
+                ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> nodeToClusterFormationStateMap = new ConcurrentHashMap<>();
+                List<Scheduler.Cancellable> cancellables = new ArrayList<>();
+                node.coordinationDiagnosticsService.beginPollingClusterFormationInfo(
+                    masterNodes,
+                    nodeToClusterFormationStateMap::put,
+                    cancellables::add
+                );
+                cancellables.forEach(Scheduler.Cancellable::cancel); // This is what will most often happen in practice
+                cluster.runRandomly(false, true, EXTREME_DELAY_VARIABILITY);
+                cluster.stabilise();
+                assertThat(nodeToClusterFormationStateMap.size(), equalTo(0));  // Everything was cancelled
+            });
+        }
+    }
+
     public void testResultSerialization() {
         CoordinationDiagnosticsService.CoordinationDiagnosticsStatus status = getRandomStatus();
         CoordinationDiagnosticsService.CoordinationDiagnosticsDetails details = getRandomDetails();
@@ -790,7 +907,8 @@ public class CoordinationDiagnosticsServiceTests extends AbstractCoordinatorTest
         when(localNode.isMasterNode()).thenReturn(false);
         Coordinator coordinator = mock(Coordinator.class);
         when(coordinator.getFoundPeers()).thenReturn(Collections.emptyList());
-        return new CoordinationDiagnosticsService(clusterService, coordinator, masterHistoryService);
+        TransportService transportService = mock(TransportService.class);
+        return new CoordinationDiagnosticsService(clusterService, transportService, coordinator, masterHistoryService);
     }
 
     private void createAndAddNonMasterNode(Cluster cluster) {

+ 3 - 1
server/src/test/java/org/elasticsearch/cluster/coordination/StableMasterHealthIndicatorServiceTests.java

@@ -26,6 +26,7 @@ import org.elasticsearch.health.HealthIndicatorResult;
 import org.elasticsearch.health.HealthStatus;
 import org.elasticsearch.health.UserAction;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportService;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
@@ -284,8 +285,9 @@ public class StableMasterHealthIndicatorServiceTests extends AbstractCoordinator
         when(localNode.isMasterNode()).thenReturn(false);
         Coordinator coordinator = mock(Coordinator.class);
         when(coordinator.getFoundPeers()).thenReturn(Collections.emptyList());
+        TransportService transportService = mock(TransportService.class);
         return new StableMasterHealthIndicatorService(
-            new CoordinationDiagnosticsService(clusterService, coordinator, masterHistoryService)
+            new CoordinationDiagnosticsService(clusterService, transportService, coordinator, masterHistoryService)
         );
     }
 

+ 22 - 14
test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java

@@ -15,6 +15,7 @@ import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.admin.cluster.coordination.ClusterFormationInfoAction;
 import org.elasticsearch.action.admin.cluster.coordination.MasterHistoryAction;
 import org.elasticsearch.action.admin.cluster.node.hotthreads.NodesHotThreadsAction;
 import org.elasticsearch.action.admin.cluster.node.hotthreads.TransportNodesHotThreadsAction;
@@ -1232,19 +1233,6 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
                 );
                 final AllocationService allocationService = ESAllocationTestCase.createAllocationService(Settings.EMPTY);
                 final NodeClient client = new NodeClient(Settings.EMPTY, threadPool);
-                client.initialize(
-                    Map.of(
-                        NodesHotThreadsAction.INSTANCE,
-                        new TransportNodesHotThreadsAction(threadPool, clusterService, transportService, new ActionFilters(emptySet())),
-                        MasterHistoryAction.INSTANCE,
-                        new MasterHistoryAction.TransportAction(transportService, new ActionFilters(Set.of()), masterHistoryService)
-                    ),
-                    transportService.getTaskManager(),
-                    localNode::getId,
-                    transportService.getLocalNodeConnection(),
-                    null,
-                    getNamedWriteableRegistry()
-                );
                 coordinator = new Coordinator(
                     "test_node",
                     settings,
@@ -1263,7 +1251,27 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
                     getElectionStrategy(),
                     nodeHealthService
                 );
-                coordinationDiagnosticsService = new CoordinationDiagnosticsService(clusterService, coordinator, masterHistoryService);
+                client.initialize(
+                    Map.of(
+                        NodesHotThreadsAction.INSTANCE,
+                        new TransportNodesHotThreadsAction(threadPool, clusterService, transportService, new ActionFilters(emptySet())),
+                        MasterHistoryAction.INSTANCE,
+                        new MasterHistoryAction.TransportAction(transportService, new ActionFilters(Set.of()), masterHistoryService),
+                        ClusterFormationInfoAction.INSTANCE,
+                        new ClusterFormationInfoAction.TransportAction(transportService, new ActionFilters(Set.of()), coordinator)
+                    ),
+                    transportService.getTaskManager(),
+                    localNode::getId,
+                    transportService.getLocalNodeConnection(),
+                    null,
+                    getNamedWriteableRegistry()
+                );
+                coordinationDiagnosticsService = new CoordinationDiagnosticsService(
+                    clusterService,
+                    transportService,
+                    coordinator,
+                    masterHistoryService
+                );
                 stableMasterHealthIndicatorService = new StableMasterHealthIndicatorService(coordinationDiagnosticsService);
                 masterService.setClusterStatePublisher(coordinator);
                 final GatewayService gatewayService = new GatewayService(