|
@@ -32,6 +32,7 @@ import org.elasticsearch.common.settings.ClusterSettings;
|
|
|
import org.elasticsearch.common.settings.Setting;
|
|
|
import org.elasticsearch.common.settings.Settings;
|
|
|
import org.elasticsearch.common.text.Text;
|
|
|
+import org.elasticsearch.common.util.CollectionUtils;
|
|
|
import org.elasticsearch.common.util.concurrent.CountDown;
|
|
|
import org.elasticsearch.common.util.concurrent.EsExecutors;
|
|
|
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
|
|
@@ -39,6 +40,8 @@ import org.elasticsearch.common.util.concurrent.FutureUtils;
|
|
|
import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor;
|
|
|
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
|
|
import org.elasticsearch.core.Nullable;
|
|
|
+import org.elasticsearch.core.Releasable;
|
|
|
+import org.elasticsearch.core.Releasables;
|
|
|
import org.elasticsearch.core.SuppressForbidden;
|
|
|
import org.elasticsearch.core.TimeValue;
|
|
|
import org.elasticsearch.node.Node;
|
|
@@ -50,7 +53,9 @@ import org.elasticsearch.threadpool.Scheduler;
|
|
|
import org.elasticsearch.threadpool.ThreadPool;
|
|
|
|
|
|
import java.util.Arrays;
|
|
|
+import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
import java.util.Objects;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.function.Consumer;
|
|
@@ -157,7 +162,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
@Override
|
|
|
protected void onTimeout(BatchedTask task, TimeValue timeout) {
|
|
|
threadPool.generic()
|
|
|
- .execute(() -> ((UpdateTask) task).onFailure(new ProcessClusterEventTimeoutException(timeout, task.source)));
|
|
|
+ .execute(() -> ((UpdateTask) task).onFailure(new ProcessClusterEventTimeoutException(timeout, task.source), () -> {}));
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -188,8 +193,9 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- public void onFailure(Exception e) {
|
|
|
+ public void onFailure(Exception e, Runnable restoreResponseHeaders) {
|
|
|
try (ThreadContext.StoredContext ignore = threadContextSupplier.get()) {
|
|
|
+ restoreResponseHeaders.run();
|
|
|
listener.onFailure(e);
|
|
|
} catch (Exception inner) {
|
|
|
inner.addSuppressed(e);
|
|
@@ -198,10 +204,21 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
}
|
|
|
|
|
|
@Nullable
|
|
|
- public ContextPreservingAckListener wrapInTaskContext(@Nullable ClusterStateAckListener clusterStateAckListener) {
|
|
|
+ public ContextPreservingAckListener wrapInTaskContext(
|
|
|
+ @Nullable ClusterStateAckListener clusterStateAckListener,
|
|
|
+ Runnable restoreResponseHeaders
|
|
|
+ ) {
|
|
|
return clusterStateAckListener == null
|
|
|
? null
|
|
|
- : new ContextPreservingAckListener(Objects.requireNonNull(clusterStateAckListener), threadContextSupplier);
|
|
|
+ : new ContextPreservingAckListener(
|
|
|
+ Objects.requireNonNull(clusterStateAckListener),
|
|
|
+ threadContextSupplier,
|
|
|
+ restoreResponseHeaders
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ ThreadContext getThreadContext() {
|
|
|
+ return threadPool.getThreadContext();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -250,7 +267,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
|
|
|
if (previousClusterState.nodes().isLocalNodeElectedMaster() == false && executor.runOnlyOnMaster()) {
|
|
|
logger.debug("failing [{}]: local node is no longer master", summary);
|
|
|
- updateTasks.forEach(t -> t.onFailure(new NotMasterException("no longer master, failing [" + t.source() + "]")));
|
|
|
+ updateTasks.forEach(t -> t.onFailure(new NotMasterException("no longer master, failing [" + t.source() + "]"), () -> {}));
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -258,12 +275,12 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
final var executionResults = updateTasks.stream().map(ExecutionResult::new).toList();
|
|
|
final var newClusterState = patchVersions(
|
|
|
previousClusterState,
|
|
|
- executeTasks(previousClusterState, executionResults, executor, summary)
|
|
|
+ executeTasks(previousClusterState, executionResults, executor, summary, threadPool.getThreadContext())
|
|
|
);
|
|
|
// fail all tasks that have failed
|
|
|
for (final var executionResult : executionResults) {
|
|
|
if (executionResult.failure != null) {
|
|
|
- executionResult.updateTask.onFailure(executionResult.failure);
|
|
|
+ executionResult.updateTask.onFailure(executionResult.failure, executionResult::restoreResponseHeaders);
|
|
|
}
|
|
|
}
|
|
|
final TimeValue computationTime = getTimeSince(computationStartTime);
|
|
@@ -529,7 +546,10 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
: "this only supports a single task but received " + batchExecutionContext.taskContexts();
|
|
|
final var taskContext = batchExecutionContext.taskContexts().get(0);
|
|
|
final var task = taskContext.getTask();
|
|
|
- final var newState = task.execute(batchExecutionContext.initialState());
|
|
|
+ final ClusterState newState;
|
|
|
+ try (var ignored = taskContext.captureResponseHeaders()) {
|
|
|
+ newState = task.execute(batchExecutionContext.initialState());
|
|
|
+ }
|
|
|
final Consumer<ClusterState> publishListener = publishedState -> task.clusterStateProcessed(
|
|
|
batchExecutionContext.initialState(),
|
|
|
publishedState
|
|
@@ -644,7 +664,11 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
* callbacks, and also logs and swallows any exceptions thrown. One of these is created for each task in the batch that passes a
|
|
|
* {@link ClusterStateAckListener} to {@link ClusterStateTaskExecutor.TaskContext#success}.
|
|
|
*/
|
|
|
- private record ContextPreservingAckListener(ClusterStateAckListener listener, Supplier<ThreadContext.StoredContext> context) {
|
|
|
+ private record ContextPreservingAckListener(
|
|
|
+ ClusterStateAckListener listener,
|
|
|
+ Supplier<ThreadContext.StoredContext> context,
|
|
|
+ Runnable restoreResponseHeaders
|
|
|
+ ) {
|
|
|
|
|
|
public boolean mustAck(DiscoveryNode discoveryNode) {
|
|
|
return listener.mustAck(discoveryNode);
|
|
@@ -652,6 +676,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
|
|
|
public void onAckSuccess() {
|
|
|
try (ThreadContext.StoredContext ignore = context.get()) {
|
|
|
+ restoreResponseHeaders.run();
|
|
|
listener.onAllNodesAcked();
|
|
|
} catch (Exception inner) {
|
|
|
logger.error("exception thrown by listener while notifying on all nodes acked", inner);
|
|
@@ -660,6 +685,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
|
|
|
public void onAckFailure(@Nullable Exception e) {
|
|
|
try (ThreadContext.StoredContext ignore = context.get()) {
|
|
|
+ restoreResponseHeaders.run();
|
|
|
listener.onAckFailure(e);
|
|
|
} catch (Exception inner) {
|
|
|
inner.addSuppressed(e);
|
|
@@ -669,6 +695,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
|
|
|
public void onAckTimeout() {
|
|
|
try (ThreadContext.StoredContext ignore = context.get()) {
|
|
|
+ restoreResponseHeaders.run();
|
|
|
listener.onAckTimeout();
|
|
|
} catch (Exception e) {
|
|
|
logger.error("exception thrown by listener while notifying on ack timeout", e);
|
|
@@ -807,6 +834,9 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
@Nullable // if the task is incomplete or succeeded
|
|
|
Exception failure;
|
|
|
|
|
|
+ @Nullable
|
|
|
+ Map<String, List<String>> responseHeaders;
|
|
|
+
|
|
|
ExecutionResult(Batcher.UpdateTask updateTask) {
|
|
|
this.updateTask = updateTask;
|
|
|
}
|
|
@@ -877,6 +907,40 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
this.failure = Objects.requireNonNull(failure);
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ public Releasable captureResponseHeaders() {
|
|
|
+ final var threadContext = updateTask.getThreadContext();
|
|
|
+ final var storedContext = threadContext.newStoredContext(false);
|
|
|
+ return Releasables.wrap(() -> {
|
|
|
+ final var newResponseHeaders = threadContext.getResponseHeaders();
|
|
|
+ if (newResponseHeaders.isEmpty()) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (responseHeaders == null) {
|
|
|
+ responseHeaders = new HashMap<>(newResponseHeaders);
|
|
|
+ } else {
|
|
|
+ for (final var newResponseHeader : newResponseHeaders.entrySet()) {
|
|
|
+ responseHeaders.compute(newResponseHeader.getKey(), (ignored, oldValue) -> {
|
|
|
+ if (oldValue == null) {
|
|
|
+ return newResponseHeader.getValue();
|
|
|
+ }
|
|
|
+ return CollectionUtils.concatLists(oldValue, newResponseHeader.getValue());
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }, storedContext);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void restoreResponseHeaders() {
|
|
|
+ if (responseHeaders != null) {
|
|
|
+ for (final var responseHeader : responseHeaders.entrySet()) {
|
|
|
+ for (final var value : responseHeader.getValue()) {
|
|
|
+ updateTask.getThreadContext().addResponseHeader(responseHeader.getKey(), value);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
void onBatchFailure(Exception failure) {
|
|
|
// if the whole batch resulted in an exception then this overrides any task-level results whether successful or not
|
|
|
this.failure = Objects.requireNonNull(failure);
|
|
@@ -890,6 +954,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
return;
|
|
|
}
|
|
|
try (ThreadContext.StoredContext ignored = updateTask.threadContextSupplier.get()) {
|
|
|
+ restoreResponseHeaders();
|
|
|
if (onPublicationSuccess == null) {
|
|
|
publishedStateConsumer.accept(newClusterState);
|
|
|
} else {
|
|
@@ -906,6 +971,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
return;
|
|
|
}
|
|
|
try (ThreadContext.StoredContext ignored = updateTask.threadContextSupplier.get()) {
|
|
|
+ restoreResponseHeaders();
|
|
|
if (onPublicationSuccess == null) {
|
|
|
publishedStateConsumer.accept(clusterState);
|
|
|
} else {
|
|
@@ -922,6 +988,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
return;
|
|
|
}
|
|
|
try (ThreadContext.StoredContext ignored = updateTask.threadContextSupplier.get()) {
|
|
|
+ restoreResponseHeaders();
|
|
|
getTask().onFailure(e);
|
|
|
} catch (Exception inner) {
|
|
|
inner.addSuppressed(e);
|
|
@@ -931,7 +998,7 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
|
|
|
ContextPreservingAckListener getContextPreservingAckListener() {
|
|
|
assert incomplete() == false;
|
|
|
- return updateTask.wrapInTaskContext(clusterStateAckListener);
|
|
|
+ return updateTask.wrapInTaskContext(clusterStateAckListener, this::restoreResponseHeaders);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -944,9 +1011,10 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
ClusterState previousClusterState,
|
|
|
List<ExecutionResult<ClusterStateTaskListener>> executionResults,
|
|
|
ClusterStateTaskExecutor<ClusterStateTaskListener> executor,
|
|
|
- BatchSummary summary
|
|
|
+ BatchSummary summary,
|
|
|
+ ThreadContext threadContext
|
|
|
) {
|
|
|
- final var resultingState = innerExecuteTasks(previousClusterState, executionResults, executor, summary);
|
|
|
+ final var resultingState = innerExecuteTasks(previousClusterState, executionResults, executor, summary, threadContext);
|
|
|
if (previousClusterState != resultingState
|
|
|
&& previousClusterState.nodes().isLocalNodeElectedMaster()
|
|
|
&& (resultingState.nodes().isLocalNodeElectedMaster() == false)) {
|
|
@@ -972,28 +1040,49 @@ public class MasterService extends AbstractLifecycleComponent {
|
|
|
ClusterState previousClusterState,
|
|
|
List<ExecutionResult<ClusterStateTaskListener>> executionResults,
|
|
|
ClusterStateTaskExecutor<ClusterStateTaskListener> executor,
|
|
|
- BatchSummary summary
|
|
|
+ BatchSummary summary,
|
|
|
+ ThreadContext threadContext
|
|
|
) {
|
|
|
final var taskContexts = castTaskContexts(executionResults);
|
|
|
- try {
|
|
|
- return executor.execute(new ClusterStateTaskExecutor.BatchExecutionContext<>(previousClusterState, taskContexts));
|
|
|
- } catch (Exception e) {
|
|
|
- logger.trace(
|
|
|
- () -> format(
|
|
|
- "failed to execute cluster state update (on version: [%s], uuid: [%s]) for [%s]\n%s%s%s",
|
|
|
- previousClusterState.version(),
|
|
|
- previousClusterState.stateUUID(),
|
|
|
- summary,
|
|
|
- previousClusterState.nodes(),
|
|
|
- previousClusterState.routingTable(),
|
|
|
- previousClusterState.getRoutingNodes()
|
|
|
- ),
|
|
|
- e
|
|
|
- );
|
|
|
- for (final var executionResult : executionResults) {
|
|
|
- executionResult.onBatchFailure(e);
|
|
|
+ try (var ignored = threadContext.newStoredContext(false)) {
|
|
|
+ // if the executor leaks a response header then this will cause a test failure, but we also store the context here to be sure
|
|
|
+ // to avoid leaking headers in production that were missed by tests
|
|
|
+
|
|
|
+ try {
|
|
|
+ return executor.execute(
|
|
|
+ new ClusterStateTaskExecutor.BatchExecutionContext<>(
|
|
|
+ previousClusterState,
|
|
|
+ taskContexts,
|
|
|
+ () -> threadContext.newStoredContext(false)
|
|
|
+ )
|
|
|
+ );
|
|
|
+ } catch (Exception e) {
|
|
|
+ logger.trace(
|
|
|
+ () -> format(
|
|
|
+ "failed to execute cluster state update (on version: [%s], uuid: [%s]) for [%s]\n%s%s%s",
|
|
|
+ previousClusterState.version(),
|
|
|
+ previousClusterState.stateUUID(),
|
|
|
+ summary,
|
|
|
+ previousClusterState.nodes(),
|
|
|
+ previousClusterState.routingTable(),
|
|
|
+ previousClusterState.getRoutingNodes()
|
|
|
+ ),
|
|
|
+ e
|
|
|
+ );
|
|
|
+ for (final var executionResult : executionResults) {
|
|
|
+ executionResult.onBatchFailure(e);
|
|
|
+ }
|
|
|
+ return previousClusterState;
|
|
|
+ } finally {
|
|
|
+ assert threadContext.getResponseHeaders().isEmpty()
|
|
|
+ : """
|
|
|
+ Batched task executors must marshal response headers to the appropriate task context (e.g. using \
|
|
|
+ TaskContext#captureResponseHeaders) or suppress them (e.g. using BatchExecutionContext#dropHeadersContext) and \
|
|
|
+ must not leak them to the master service, but executor ["""
|
|
|
+ + executor
|
|
|
+ + "] leaked the following headers: "
|
|
|
+ + threadContext.getResponseHeaders();
|
|
|
}
|
|
|
- return previousClusterState;
|
|
|
}
|
|
|
}
|
|
|
|