Browse Source

Avoid hoarding cluster state references during rollover (#124107)

By keeping a list of all the rollover results in a rollover request
batch, we were keeping references to all the intermediate cluster states
that we built. We've seen this list take up ~1.4GB with 600 rollover
requests in one batch.

We only kept the list of results to compute the "reason" for the
allocation reroute, so we can easily drop the cluster state reference
from the list and only keep what we need.

Fixes #123893
Niels Bauman 7 tháng trước cách đây
mục cha
commit
ff6465b83b

+ 6 - 0
docs/changelog/124107.yaml

@@ -0,0 +1,6 @@
+pr: 124107
+summary: Avoid hoarding cluster state references during rollover
+area: Indices APIs
+type: bug
+issues:
+ - 123893

+ 4 - 12
server/src/main/java/org/elasticsearch/action/admin/indices/rollover/LazyRolloverAction.java

@@ -36,7 +36,6 @@ import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.injection.guice.Inject;
 import org.elasticsearch.tasks.CancellableTask;
 import org.elasticsearch.tasks.Task;
@@ -189,7 +188,7 @@ public final class LazyRolloverAction extends ActionType<RolloverResponse> {
         @Override
         public ClusterState execute(BatchExecutionContext<LazyRolloverTask> batchExecutionContext) {
             final var listener = new AllocationActionMultiListener<RolloverResponse>(threadPool.getThreadContext());
-            final var results = new ArrayList<MetadataRolloverService.RolloverResult>(batchExecutionContext.taskContexts().size());
+            final var results = new ArrayList<String>(batchExecutionContext.taskContexts().size());
             var state = batchExecutionContext.initialState();
             Map<ProjectId, Map<RolloverRequest, List<TaskContext<LazyRolloverTask>>>> groupedRequests = new HashMap<>();
             for (final var taskContext : batchExecutionContext.taskContexts()) {
@@ -214,14 +213,7 @@ public final class LazyRolloverAction extends ActionType<RolloverResponse> {
 
             if (state != batchExecutionContext.initialState()) {
                 var reason = new StringBuilder();
-                Strings.collectionToDelimitedStringWithLimit(
-                    (Iterable<String>) () -> Iterators.map(results.iterator(), t -> t.sourceIndexName() + "->" + t.rolloverIndexName()),
-                    ",",
-                    "lazy bulk rollover [",
-                    "]",
-                    1024,
-                    reason
-                );
+                Strings.collectionToDelimitedStringWithLimit(results, ",", "lazy bulk rollover [", "]", 1024, reason);
                 try (var ignored = batchExecutionContext.dropHeadersContext()) {
                     state = allocationService.reroute(state, reason.toString(), listener.reroute());
                 }
@@ -234,7 +226,7 @@ public final class LazyRolloverAction extends ActionType<RolloverResponse> {
         public ClusterState executeTask(
             ProjectState currentState,
             RolloverRequest rolloverRequest,
-            List<MetadataRolloverService.RolloverResult> results,
+            ArrayList<String> results,
             List<TaskContext<LazyRolloverTask>> rolloverTaskContexts,
             AllocationActionMultiListener<RolloverResponse> allocationActionMultiListener
         ) throws Exception {
@@ -271,7 +263,7 @@ public final class LazyRolloverAction extends ActionType<RolloverResponse> {
                 null,
                 isFailureStoreRollover
             );
-            results.add(rolloverResult);
+            results.add(rolloverResult.sourceIndexName() + "->" + rolloverResult.rolloverIndexName());
             logger.trace("lazy rollover result [{}]", rolloverResult);
 
             final var rolloverIndexName = rolloverResult.rolloverIndexName();

+ 4 - 12
server/src/main/java/org/elasticsearch/action/admin/indices/rollover/TransportRolloverAction.java

@@ -49,7 +49,6 @@ import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.core.Nullable;
@@ -494,7 +493,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
         @Override
         public ClusterState execute(BatchExecutionContext<RolloverTask> batchExecutionContext) {
             final var listener = new AllocationActionMultiListener<RolloverResponse>(threadPool.getThreadContext());
-            final var results = new ArrayList<MetadataRolloverService.RolloverResult>(batchExecutionContext.taskContexts().size());
+            final var results = new ArrayList<String>(batchExecutionContext.taskContexts().size());
             var state = batchExecutionContext.initialState();
             for (final var taskContext : batchExecutionContext.taskContexts()) {
                 try (var ignored = taskContext.captureResponseHeaders()) {
@@ -506,14 +505,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
 
             if (state != batchExecutionContext.initialState()) {
                 var reason = new StringBuilder();
-                Strings.collectionToDelimitedStringWithLimit(
-                    (Iterable<String>) () -> Iterators.map(results.iterator(), t -> t.sourceIndexName() + "->" + t.rolloverIndexName()),
-                    ",",
-                    "bulk rollover [",
-                    "]",
-                    1024,
-                    reason
-                );
+                Strings.collectionToDelimitedStringWithLimit(results, ",", "bulk rollover [", "]", 1024, reason);
                 try (var ignored = batchExecutionContext.dropHeadersContext()) {
                     state = allocationService.reroute(state, reason.toString(), listener.reroute());
                 }
@@ -525,7 +517,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
 
         public ClusterState executeTask(
             ClusterState currentState,
-            List<MetadataRolloverService.RolloverResult> results,
+            ArrayList<String> results,
             TaskContext<RolloverTask> rolloverTaskContext,
             AllocationActionMultiListener<RolloverResponse> allocationActionMultiListener
         ) throws Exception {
@@ -596,7 +588,7 @@ public class TransportRolloverAction extends TransportMasterNodeAction<RolloverR
                     rolloverTask.autoShardingResult(),
                     targetFailureStore
                 );
-                results.add(rolloverResult);
+                results.add(rolloverResult.sourceIndexName() + "->" + rolloverResult.rolloverIndexName());
                 logger.trace("rollover result [{}]", rolloverResult);
 
                 final var rolloverIndexName = rolloverResult.rolloverIndexName();