|
@@ -13,8 +13,12 @@ import org.apache.logging.log4j.Logger;
|
|
|
import org.elasticsearch.Version;
|
|
|
import org.elasticsearch.action.ActionListener;
|
|
|
import org.elasticsearch.action.ActionListenerResponseHandler;
|
|
|
+import org.elasticsearch.action.ActionRequest;
|
|
|
+import org.elasticsearch.action.ActionResponse;
|
|
|
+import org.elasticsearch.action.ActionType;
|
|
|
import org.elasticsearch.action.StepListener;
|
|
|
import org.elasticsearch.action.admin.cluster.coordination.ClusterFormationInfoAction;
|
|
|
+import org.elasticsearch.action.admin.cluster.coordination.CoordinationDiagnosticsAction;
|
|
|
import org.elasticsearch.cluster.ClusterChangedEvent;
|
|
|
import org.elasticsearch.cluster.ClusterStateListener;
|
|
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
|
@@ -49,7 +53,9 @@ import java.util.Set;
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
import java.util.concurrent.ConcurrentMap;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
+import java.util.concurrent.atomic.AtomicReference;
|
|
|
import java.util.function.BiConsumer;
|
|
|
+import java.util.function.BiFunction;
|
|
|
import java.util.function.Consumer;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
@@ -99,6 +105,18 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
// Non-private for testing
|
|
|
volatile ConcurrentMap<DiscoveryNode, ClusterFormationStateOrException> clusterFormationResponses = null;
|
|
|
|
|
|
+ /*
|
|
|
+ * This is a reference to the task that is periodically reaching out to a master eligible node to get its CoordinationDiagnosticsResult
|
|
|
+ * for diagnosis. It is null when no polling is occurring.
|
|
|
+ * The field is accessed (reads/writes) from multiple threads, and is also reassigned on multiple threads.
|
|
|
+ */
|
|
|
+ volatile AtomicReference<Scheduler.Cancellable> remoteCoordinationDiagnosisTask = null;
|
|
|
+ /*
|
|
|
+ * This field holds the result of the task in the remoteCoordinationDiagnosisTask field above. The field is accessed
|
|
|
+ * (reads/writes) from multiple threads, but is only ever reassigned on a single thread (the cluster change event thread).
|
|
|
+ */
|
|
|
+ volatile AtomicReference<RemoteMasterHealthResult> remoteCoordinationDiagnosisResult = null;
|
|
|
+
|
|
|
/**
|
|
|
* This is the amount of time that we wait before scheduling a remote request to gather diagnostic information. It is not
|
|
|
* user-configurable, but is non-final so that integration tests don't have to waste 10 seconds.
|
|
@@ -636,6 +654,13 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
} else {
|
|
|
cancelPollingClusterFormationInfo();
|
|
|
}
|
|
|
+ if (clusterService.localNode().isMasterNode() == false) {
|
|
|
+ if (currentMaster == null) {
|
|
|
+ beginPollingRemoteMasterStabilityDiagnostic();
|
|
|
+ } else {
|
|
|
+ cancelPollingRemoteMasterStabilityDiagnostic();
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -649,7 +674,7 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
Map<DiscoveryNode, Scheduler.Cancellable> cancellables = new ConcurrentHashMap<>();
|
|
|
/*
|
|
|
* Assignment of clusterFormationInfoTasks must be done before the call to beginPollingClusterFormationInfo because it is used
|
|
|
- * asynchronously by rescheduleFetchConsumer, called from beginPollingClusterFormationInfo.
|
|
|
+ * asynchronously by rescheduleClusterFormationFetchConsumer, called from beginPollingClusterFormationInfo.
|
|
|
*/
|
|
|
clusterFormationInfoTasks = cancellables;
|
|
|
clusterFormationResponses = responses;
|
|
@@ -676,7 +701,9 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
masterEligibleNode,
|
|
|
fetchClusterFormationInfo(
|
|
|
masterEligibleNode,
|
|
|
- responseConsumer.andThen(rescheduleFetchConsumer(masterEligibleNode, responseConsumer, cancellables))
|
|
|
+ responseConsumer.andThen(
|
|
|
+ rescheduleClusterFormationFetchConsumer(masterEligibleNode, responseConsumer, cancellables)
|
|
|
+ )
|
|
|
)
|
|
|
);
|
|
|
} catch (EsRejectedExecutionException e) {
|
|
@@ -690,14 +717,14 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * This wraps the responseConsumer in a Consumer that will run rescheduleFetchConsumer() after responseConsumer has
|
|
|
- * completed, adding the resulting Cancellable to cancellableConsumer.
|
|
|
+ * This wraps the responseConsumer in a Consumer that will run rescheduleClusterFormationFetchConsumer() after responseConsumer has
|
|
|
+ * completed, adding the resulting Cancellable to cancellables.
|
|
|
* @param masterEligibleNode The node being polled
|
|
|
* @param responseConsumer The response consumer to be wrapped
|
|
|
* @param cancellables The Map of Cancellables, one for each node being polled
|
|
|
* @return
|
|
|
*/
|
|
|
- private Consumer<CoordinationDiagnosticsService.ClusterFormationStateOrException> rescheduleFetchConsumer(
|
|
|
+ private Consumer<CoordinationDiagnosticsService.ClusterFormationStateOrException> rescheduleClusterFormationFetchConsumer(
|
|
|
DiscoveryNode masterEligibleNode,
|
|
|
Consumer<CoordinationDiagnosticsService.ClusterFormationStateOrException> responseConsumer,
|
|
|
Map<DiscoveryNode, Scheduler.Cancellable> cancellables
|
|
@@ -718,15 +745,18 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
if (cancellables.equals(clusterFormationInfoTasks)) {
|
|
|
/*
|
|
|
* As mentioned in the comment in cancelPollingClusterFormationInfo(), there is a slim possibility here that we will
|
|
|
- * add a task here for a poll that has already been cancelled. But when it completes and runs rescheduleFetchConsumer()
|
|
|
- * we will then see that clusterFormationInfoTasks does not equal cancellables, so it will not be run again.
|
|
|
+ * add a task here for a poll that has already been cancelled. But when it completes and runs
|
|
|
+ * rescheduleClusterFormationFetchConsumer() we will then see that clusterFormationInfoTasks does not equal
|
|
|
+ * cancellables, so it will not be run again.
|
|
|
*/
|
|
|
try {
|
|
|
cancellables.put(
|
|
|
masterEligibleNode,
|
|
|
fetchClusterFormationInfo(
|
|
|
masterEligibleNode,
|
|
|
- responseConsumer.andThen(rescheduleFetchConsumer(masterEligibleNode, responseConsumer, cancellables))
|
|
|
+ responseConsumer.andThen(
|
|
|
+ rescheduleClusterFormationFetchConsumer(masterEligibleNode, responseConsumer, cancellables)
|
|
|
+ )
|
|
|
)
|
|
|
);
|
|
|
} catch (EsRejectedExecutionException e) {
|
|
@@ -771,58 +801,237 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
private Scheduler.Cancellable fetchClusterFormationInfo(
|
|
|
DiscoveryNode node,
|
|
|
Consumer<ClusterFormationStateOrException> responseConsumer
|
|
|
+ ) {
|
|
|
+ return sendTransportRequest(
|
|
|
+ node,
|
|
|
+ responseConsumer,
|
|
|
+ ClusterFormationInfoAction.INSTANCE,
|
|
|
+ new ClusterFormationInfoAction.Request(),
|
|
|
+ (response, e) -> {
|
|
|
+ assert response != null || e != null : "a response or an exception must be provided";
|
|
|
+ if (response != null) {
|
|
|
+ return new ClusterFormationStateOrException(response.getClusterFormationState());
|
|
|
+ } else {
|
|
|
+ return new ClusterFormationStateOrException(e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ void beginPollingRemoteMasterStabilityDiagnostic() {
|
|
|
+ assert ThreadPool.assertCurrentThreadPool(ClusterApplierService.CLUSTER_UPDATE_THREAD_NAME);
|
|
|
+ AtomicReference<Scheduler.Cancellable> cancellableReference = new AtomicReference<>();
|
|
|
+ AtomicReference<RemoteMasterHealthResult> resultReference = new AtomicReference<>();
|
|
|
+ remoteCoordinationDiagnosisTask = cancellableReference;
|
|
|
+ remoteCoordinationDiagnosisResult = resultReference;
|
|
|
+ beginPollingRemoteMasterStabilityDiagnostic(resultReference::set, cancellableReference);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method returns quickly, but in the background schedules to query a remote master node's cluster diagnostics in 10 seconds, and
|
|
|
+ * repeats doing that until cancelPollingRemoteMasterStabilityDiagnostic() is called. This method
|
|
|
+ * exists (rather than being just part of the beginPollingRemoteMasterStabilityDiagnostic() above) in order to facilitate
|
|
|
+ * unit testing.
|
|
|
+ * @param responseConsumer A consumer for any results produced for a node by this method
|
|
|
+ * @param cancellableReference The Cancellable reference to assign the current Cancellable for this polling attempt
|
|
|
+ */
|
|
|
+ // Non-private for testing
|
|
|
+ void beginPollingRemoteMasterStabilityDiagnostic(
|
|
|
+ Consumer<RemoteMasterHealthResult> responseConsumer,
|
|
|
+ AtomicReference<Scheduler.Cancellable> cancellableReference
|
|
|
+ ) {
|
|
|
+ DiscoveryNode masterEligibleNode = getMasterEligibleNodes().stream().findAny().orElse(null);
|
|
|
+ try {
|
|
|
+ cancellableReference.set(
|
|
|
+ fetchCoordinationDiagnostics(
|
|
|
+ masterEligibleNode,
|
|
|
+ responseConsumer.andThen(rescheduleDiagnosticsFetchConsumer(responseConsumer, cancellableReference))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ } catch (EsRejectedExecutionException e) {
|
|
|
+ if (e.isExecutorShutdown()) {
|
|
|
+ logger.trace("Not rescheduling request for cluster coordination info because this node is being shutdown", e);
|
|
|
+ } else {
|
|
|
+ throw e;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This wraps the responseConsumer in a Consumer that will run rescheduleDiagnosticsFetchConsumer() after responseConsumer has
|
|
|
+ * completed, adding the resulting Cancellable to cancellableReference.
|
|
|
+ * @param responseConsumer The response consumer to be wrapped
|
|
|
+ * @param cancellableReference The Cancellable reference to assign the current Cancellable for this polling attempt
|
|
|
+ * @return A wrapped Consumer that will run fetchCoordinationDiagnostics()
|
|
|
+ */
|
|
|
+ private Consumer<RemoteMasterHealthResult> rescheduleDiagnosticsFetchConsumer(
|
|
|
+ Consumer<RemoteMasterHealthResult> responseConsumer,
|
|
|
+ AtomicReference<Scheduler.Cancellable> cancellableReference
|
|
|
+ ) {
|
|
|
+ return response -> {
|
|
|
+ /*
|
|
|
+ * If the cancellableReference for this poll attempt is equal to remoteCoordinationDiagnosisTask, then that means that
|
|
|
+ * this poll attempt is the current one. If they are not equal, that means that
|
|
|
+ * cancelPollingRemoteMasterStabilityDiagnostic() has been called on this poll attempt but this thread is not yet
|
|
|
+ * aware. So we cancel the Cancellable in cancellableReference if it is not null. Note that
|
|
|
+ * remoteCoordinationDiagnosisTask can be null.
|
|
|
+ */
|
|
|
+ if (cancellableReference.equals(remoteCoordinationDiagnosisTask)) {
|
|
|
+ /*
|
|
|
+ * Because this is not synchronized with the cancelPollingRemoteMasterStabilityDiagnostic() method, there is a
|
|
|
+ * slim chance that we will add a task here for a poll that has already been cancelled. But when it completes and runs
|
|
|
+ * rescheduleDiagnosticsFetchConsumer() we will then see that remoteCoordinationDiagnosisTask does not equal
|
|
|
+ * cancellableReference, so it will not be run again.
|
|
|
+ */
|
|
|
+ try {
|
|
|
+ DiscoveryNode masterEligibleNode = getMasterEligibleNodes().stream().findAny().orElse(null);
|
|
|
+ cancellableReference.set(
|
|
|
+ fetchCoordinationDiagnostics(
|
|
|
+ masterEligibleNode,
|
|
|
+ responseConsumer.andThen(rescheduleDiagnosticsFetchConsumer(responseConsumer, cancellableReference))
|
|
|
+ )
|
|
|
+ );
|
|
|
+ } catch (EsRejectedExecutionException e) {
|
|
|
+ if (e.isExecutorShutdown()) {
|
|
|
+ logger.trace("Not rescheduling request for cluster coordination info because this node is being shutdown", e);
|
|
|
+ } else {
|
|
|
+ throw e;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ Scheduler.Cancellable cancellable = cancellableReference.get();
|
|
|
+ if (cancellable != null) {
|
|
|
+ cancellable.cancel();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method returns quickly, but in the background schedules to query the remote masterEligibleNode's cluster diagnostics in 10
|
|
|
+ * seconds unless cancel() is called on the Cancellable that this method returns.
|
|
|
+ * @param masterEligibleNode The masterEligibleNode to poll for cluster diagnostics. This masterEligibleNode can be null in the case
|
|
|
+ * when there are not yet any master-eligible nodes known to this masterEligibleNode's PeerFinder.
|
|
|
+ * @param responseConsumer The consumer of the cluster diagnostics for the masterEligibleNode, or the exception encountered while
|
|
|
+ * contacting it
|
|
|
+ * @return A Cancellable for the task that is scheduled to fetch cluster diagnostics
|
|
|
+ */
|
|
|
+ private Scheduler.Cancellable fetchCoordinationDiagnostics(
|
|
|
+ @Nullable DiscoveryNode masterEligibleNode,
|
|
|
+ Consumer<RemoteMasterHealthResult> responseConsumer
|
|
|
+ ) {
|
|
|
+ return sendTransportRequest(
|
|
|
+ masterEligibleNode,
|
|
|
+ responseConsumer,
|
|
|
+ CoordinationDiagnosticsAction.INSTANCE,
|
|
|
+ new CoordinationDiagnosticsAction.Request(true),
|
|
|
+ (response, e) -> {
|
|
|
+ assert response != null || e != null : "a response or an exception must be provided";
|
|
|
+ if (response != null) {
|
|
|
+ return new RemoteMasterHealthResult(masterEligibleNode, response.getCoordinationDiagnosticsResult(), null);
|
|
|
+ } else {
|
|
|
+ return new RemoteMasterHealthResult(masterEligibleNode, null, e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method connects to masterEligibleNode and sends it a transport request for a response of type R. The response or exception
|
|
|
+ * are transformed into a common type T with responseToResultFunction or exceptionToResultFunction, and then consumed by
|
|
|
+ * responseConsumer. This method is meant to be used when there is potentially no elected master node, so it first calls
|
|
|
+ * connectToNode before sending the request.
|
|
|
+ * @param masterEligibleNode The master eligible node to be queried, or null if we do not yet know of a master eligible node.
|
|
|
+ * If this is null, the responseConsumer will be given a null response
|
|
|
+ * @param responseConsumer The consumer of the transformed response
|
|
|
+ * @param transportActionType The ActionType for the transport action
|
|
|
+ * @param transportActionRequest The ActionRequest to be sent
|
|
|
+ * @param responseTransformationFunction A function that converts a response or exception to the response type expected by the
|
|
|
+ * responseConsumer
|
|
|
+ * @return A Cancellable for the task that is scheduled to fetch the remote information
|
|
|
+ */
|
|
|
+ private <R extends ActionResponse, T> Scheduler.Cancellable sendTransportRequest(
|
|
|
+ @Nullable DiscoveryNode masterEligibleNode,
|
|
|
+ Consumer<T> responseConsumer,
|
|
|
+ ActionType<R> transportActionType,
|
|
|
+ ActionRequest transportActionRequest,
|
|
|
+ BiFunction<R, Exception, T> responseTransformationFunction
|
|
|
) {
|
|
|
StepListener<Releasable> connectionListener = new StepListener<>();
|
|
|
- StepListener<ClusterFormationInfoAction.Response> fetchClusterInfoListener = new StepListener<>();
|
|
|
+ StepListener<R> fetchRemoteResultListener = new StepListener<>();
|
|
|
long startTime = System.nanoTime();
|
|
|
connectionListener.whenComplete(releasable -> {
|
|
|
- logger.trace("Opened connection to {}, making cluster coordination info request", node);
|
|
|
- // If we don't get a response in 10 seconds that is a failure worth capturing on its own:
|
|
|
- final TimeValue transportTimeout = TimeValue.timeValueSeconds(10);
|
|
|
- transportService.sendRequest(
|
|
|
- node,
|
|
|
- ClusterFormationInfoAction.NAME,
|
|
|
- new ClusterFormationInfoAction.Request(),
|
|
|
- TransportRequestOptions.timeout(transportTimeout),
|
|
|
- new ActionListenerResponseHandler<>(
|
|
|
- ActionListener.runBefore(fetchClusterInfoListener, () -> Releasables.close(releasable)),
|
|
|
- ClusterFormationInfoAction.Response::new
|
|
|
- )
|
|
|
- );
|
|
|
+ if (masterEligibleNode == null) {
|
|
|
+ responseConsumer.accept(null);
|
|
|
+ } else {
|
|
|
+ logger.trace("Opened connection to {}, making transport request", masterEligibleNode);
|
|
|
+ // If we don't get a response in 10 seconds that is a failure worth capturing on its own:
|
|
|
+ final TimeValue transportTimeout = TimeValue.timeValueSeconds(10);
|
|
|
+ transportService.sendRequest(
|
|
|
+ masterEligibleNode,
|
|
|
+ transportActionType.name(),
|
|
|
+ transportActionRequest,
|
|
|
+ TransportRequestOptions.timeout(transportTimeout),
|
|
|
+ new ActionListenerResponseHandler<>(
|
|
|
+ ActionListener.runBefore(fetchRemoteResultListener, () -> Releasables.close(releasable)),
|
|
|
+ transportActionType.getResponseReader()
|
|
|
+ )
|
|
|
+ );
|
|
|
+ }
|
|
|
}, e -> {
|
|
|
- logger.warn("Exception connecting to master node", e);
|
|
|
- responseConsumer.accept(new ClusterFormationStateOrException(e));
|
|
|
+ logger.warn("Exception connecting to master masterEligibleNode", e);
|
|
|
+ responseConsumer.accept(responseTransformationFunction.apply(null, e));
|
|
|
});
|
|
|
|
|
|
- fetchClusterInfoListener.whenComplete(response -> {
|
|
|
+ fetchRemoteResultListener.whenComplete(response -> {
|
|
|
long endTime = System.nanoTime();
|
|
|
- logger.trace("Received cluster coordination info from {} in {}", node, TimeValue.timeValueNanos(endTime - startTime));
|
|
|
- responseConsumer.accept(new ClusterFormationStateOrException(response.getClusterFormationState()));
|
|
|
+ logger.trace("Received remote response from {} in {}", masterEligibleNode, TimeValue.timeValueNanos(endTime - startTime));
|
|
|
+ responseConsumer.accept(responseTransformationFunction.apply(response, null));
|
|
|
}, e -> {
|
|
|
- logger.warn("Exception in cluster coordination info request to master node", e);
|
|
|
- responseConsumer.accept(new ClusterFormationStateOrException(e));
|
|
|
+ logger.warn("Exception in remote request to master masterEligibleNode", e);
|
|
|
+ responseConsumer.accept(responseTransformationFunction.apply(null, e));
|
|
|
});
|
|
|
|
|
|
return transportService.getThreadPool().schedule(() -> {
|
|
|
- Version minSupportedVersion = Version.V_8_4_0;
|
|
|
- if (node.getVersion().onOrAfter(minSupportedVersion) == false) { // This was introduced in 8.4.0
|
|
|
- logger.trace(
|
|
|
- "Cannot get cluster coordination info for {} because it is at version {} and {} is required",
|
|
|
- node,
|
|
|
- node.getVersion(),
|
|
|
- minSupportedVersion
|
|
|
- );
|
|
|
+ if (masterEligibleNode == null) {
|
|
|
+ /*
|
|
|
+ * This node's PeerFinder hasn't yet discovered the master-eligible nodes. By notifying the responseConsumer with a null
|
|
|
+ * value we effectively do nothing, and allow this request to be recheduled.
|
|
|
+ */
|
|
|
+ responseConsumer.accept(null);
|
|
|
} else {
|
|
|
- transportService.connectToNode(
|
|
|
- // Note: This connection must be explicitly closed in the connectionListener
|
|
|
- node,
|
|
|
- ConnectionProfile.buildDefaultConnectionProfile(clusterService.getSettings()),
|
|
|
- connectionListener
|
|
|
- );
|
|
|
+ Version minSupportedVersion = Version.V_8_4_0;
|
|
|
+ if (masterEligibleNode.getVersion().onOrAfter(minSupportedVersion) == false) {
|
|
|
+ logger.trace(
|
|
|
+ "Cannot get remote result from {} because it is at version {} and {} is required",
|
|
|
+ masterEligibleNode,
|
|
|
+ masterEligibleNode.getVersion(),
|
|
|
+ minSupportedVersion
|
|
|
+ );
|
|
|
+ } else {
|
|
|
+ transportService.connectToNode(
|
|
|
+ // Note: This connection must be explicitly closed in the connectionListener
|
|
|
+ masterEligibleNode,
|
|
|
+ ConnectionProfile.buildDefaultConnectionProfile(clusterService.getSettings()),
|
|
|
+ connectionListener
|
|
|
+ );
|
|
|
+ }
|
|
|
}
|
|
|
}, remoteRequestInitialDelay, ThreadPool.Names.SAME);
|
|
|
}
|
|
|
|
|
|
+ void cancelPollingRemoteMasterStabilityDiagnostic() {
|
|
|
+ assert ThreadPool.assertCurrentThreadPool(ClusterApplierService.CLUSTER_UPDATE_THREAD_NAME);
|
|
|
+ if (remoteCoordinationDiagnosisTask != null) {
|
|
|
+ Scheduler.Cancellable task = remoteCoordinationDiagnosisTask.get();
|
|
|
+ if (task != null) {
|
|
|
+ task.cancel();
|
|
|
+ }
|
|
|
+ remoteCoordinationDiagnosisResult = null;
|
|
|
+ remoteCoordinationDiagnosisTask = null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
// Non-private for testing
|
|
|
record ClusterFormationStateOrException(
|
|
|
ClusterFormationFailureHelper.ClusterFormationState clusterFormationState,
|
|
@@ -975,4 +1184,7 @@ public class CoordinationDiagnosticsService implements ClusterStateListener {
|
|
|
}
|
|
|
|
|
|
}
|
|
|
+
|
|
|
+ // Non-private for testing:
|
|
|
+ record RemoteMasterHealthResult(DiscoveryNode node, CoordinationDiagnosticsResult result, Exception remoteException) {}
|
|
|
}
|