|
|
@@ -35,6 +35,7 @@ import org.elasticsearch.cluster.NotMasterException;
|
|
|
import org.elasticsearch.cluster.block.ClusterBlockException;
|
|
|
import org.elasticsearch.cluster.health.ClusterHealthStatus;
|
|
|
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
|
|
|
+import org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException;
|
|
|
import org.elasticsearch.cluster.routing.UnassignedInfo;
|
|
|
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
|
|
import org.elasticsearch.cluster.service.ClusterService;
|
|
|
@@ -96,7 +97,7 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
waitForEventsAndExecuteHealth(request, listener, waitCount, threadPool.relativeTimeInMillis() + request.timeout().millis());
|
|
|
} else {
|
|
|
executeHealth(request, clusterService.state(), listener, waitCount,
|
|
|
- clusterState -> listener.onResponse(getResponse(request, clusterState, waitCount, false)));
|
|
|
+ clusterState -> listener.onResponse(getResponse(request, clusterState, waitCount, TimeoutState.OK)));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -129,6 +130,7 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
}
|
|
|
});
|
|
|
} else {
|
|
|
+ final TimeValue taskTimeout = TimeValue.timeValueMillis(Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis()));
|
|
|
clusterService.submitStateUpdateTask("cluster_health (wait_for_events [" + request.waitForEvents() + "])",
|
|
|
new ClusterStateUpdateTask(request.waitForEvents()) {
|
|
|
@Override
|
|
|
@@ -136,6 +138,11 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
return currentState;
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ public TimeValue timeout() {
|
|
|
+ return taskTimeout;
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
|
|
|
final long timeoutInMillis = Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis());
|
|
|
@@ -161,8 +168,12 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
|
|
|
@Override
|
|
|
public void onFailure(String source, Exception e) {
|
|
|
- logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
|
|
|
- listener.onFailure(e);
|
|
|
+ if (e instanceof ProcessClusterEventTimeoutException) {
|
|
|
+ listener.onResponse(getResponse(request, clusterService.state(), waitCount, TimeoutState.TIMED_OUT));
|
|
|
+ } else {
|
|
|
+ logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
|
|
|
+ listener.onFailure(e);
|
|
|
+ }
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
@@ -175,13 +186,13 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
final Consumer<ClusterState> onNewClusterStateAfterDelay) {
|
|
|
|
|
|
if (request.timeout().millis() == 0) {
|
|
|
- listener.onResponse(getResponse(request, currentState, waitCount, true));
|
|
|
+ listener.onResponse(getResponse(request, currentState, waitCount, TimeoutState.ZERO_TIMEOUT));
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
final Predicate<ClusterState> validationPredicate = newState -> validateRequest(request, newState, waitCount);
|
|
|
if (validationPredicate.test(currentState)) {
|
|
|
- listener.onResponse(getResponse(request, currentState, waitCount, false));
|
|
|
+ listener.onResponse(getResponse(request, currentState, waitCount, TimeoutState.OK));
|
|
|
} else {
|
|
|
final ClusterStateObserver observer
|
|
|
= new ClusterStateObserver(currentState, clusterService, null, logger, threadPool.getThreadContext());
|
|
|
@@ -198,7 +209,7 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
|
|
|
@Override
|
|
|
public void onTimeout(TimeValue timeout) {
|
|
|
- listener.onResponse(getResponse(request, observer.setAndGetObservedState(), waitCount, true));
|
|
|
+ listener.onResponse(getResponse(request, observer.setAndGetObservedState(), waitCount, TimeoutState.TIMED_OUT));
|
|
|
}
|
|
|
};
|
|
|
observer.waitForNextChange(stateListener, validationPredicate, request.timeout());
|
|
|
@@ -234,19 +245,23 @@ public class TransportClusterHealthAction extends TransportMasterNodeReadAction<
|
|
|
return prepareResponse(request, response, clusterState, indexNameExpressionResolver) == waitCount;
|
|
|
}
|
|
|
|
|
|
+ private enum TimeoutState {
|
|
|
+ OK,
|
|
|
+ TIMED_OUT,
|
|
|
+ ZERO_TIMEOUT
|
|
|
+ }
|
|
|
+
|
|
|
private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState,
|
|
|
- final int waitFor, boolean timedOut) {
|
|
|
+ final int waitFor, TimeoutState timeoutState) {
|
|
|
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(),
|
|
|
allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime());
|
|
|
int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver);
|
|
|
boolean valid = (readyCounter == waitFor);
|
|
|
- assert valid || timedOut;
|
|
|
- // we check for a timeout here since this method might be called from the wait_for_events
|
|
|
- // response handler which might have timed out already.
|
|
|
- // if the state is sufficient for what we where waiting for we don't need to mark this as timedOut.
|
|
|
- // We spend too much time in waiting for events such that we might already reached a valid state.
|
|
|
- // this should not mark the request as timed out
|
|
|
- response.setTimedOut(timedOut && valid == false);
|
|
|
+ assert valid || (timeoutState != TimeoutState.OK);
|
|
|
+ // If valid && timeoutState == TimeoutState.ZERO_TIMEOUT then we immediately found **and processed** a valid state, so we don't
|
|
|
+ // consider this a timeout. However if timeoutState == TimeoutState.TIMED_OUT then we didn't process a valid state (perhaps we
|
|
|
+ // failed on wait_for_events) so this does count as a timeout.
|
|
|
+ response.setTimedOut(valid == false || timeoutState == TimeoutState.TIMED_OUT);
|
|
|
return response;
|
|
|
}
|
|
|
|