فهرست منبع

Ensuring that the ShrinkAction does not hang if total shards per node is too low (#76732)

We added configuration to AllocateAction to set the total shards per node property on the index. This makes it possible that a user could set this to a value lower than the total number of shards in the index that is about to be shrunk, meaning that all of the shards could not be moved to a single node in the ShrinkAction. This commit unsets the total shards per node property so that we fall back to the default value (-1, unlimited) in the ShrinkAction to avoid this.
Relates to #44070
Keith Massey 4 سال پیش
والد
کامیت
0aab3c01c9

+ 5 - 0
docs/reference/ilm/actions/ilm-shrink.asciidoc

@@ -17,6 +17,11 @@ stream. You cannot perform the `shrink` action on a write index.
 To use the `shrink` action in the `hot` phase, the `rollover` action *must* be
 present. If no rollover action is configured, {ilm-init} will reject the policy.
 
+The shrink action will unset the index's index.routing.allocation.total_shards_per_node
+setting, meaning that there will be no limit. This is to ensure that all shards of the
+index can be copied to a single node. This setting change will persist on the index
+even after the step completes.
+
 [IMPORTANT]
 If the shrink action is used on a <<ccr-put-follow,follower index>>, policy
 execution waits until the leader index rolls over (or is <<skipping-rollover,

+ 3 - 1
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java

@@ -24,6 +24,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.Decision;
 import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider;
 import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider;
 import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider;
+import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.settings.ClusterSettings;
 import org.elasticsearch.common.settings.Settings;
@@ -95,7 +96,8 @@ public class SetSingleNodeAllocateStep extends AsyncActionStep {
 
             if (nodeId.isPresent()) {
                 Settings settings = Settings.builder()
-                        .put(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id", nodeId.get()).build();
+                        .put(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id", nodeId.get())
+                        .putNull(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey()).build();
                 UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(indexName)
                         .masterNodeTimeout(TimeValue.MAX_VALUE)
                         .settings(settings);

+ 1 - 1
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java

@@ -85,7 +85,7 @@ public class SetSingleNodeAllocateStepTests extends AbstractStepTestCase<SetSing
         assertArrayEquals(expectedIndices, request.indices());
         assertThat(request.settings().get(settingsKey), anyOf(acceptableValues.stream().map(e -> equalTo(e)).collect(Collectors.toList())));
         if (assertOnlyKeyInSettings) {
-            assertEquals(1, request.settings().size());
+            assertEquals(2, request.settings().size());
         }
     }
 

+ 30 - 0
x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/ShrinkActionIT.java

@@ -12,6 +12,7 @@ import org.apache.http.entity.StringEntity;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.ResponseException;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.Settings;
@@ -302,4 +303,33 @@ public class ShrinkActionIT extends ESRestTestCase {
         });
         expectThrows(ResponseException.class, () -> indexDocument(client(), index));
     }
+
+    /*
+     * This test verifies that we can still shrink an index even if the total number of shards in the index is greater than
+     * index.routing.allocation.total_shards_per_node.
+     */
+    public void testTotalShardsPerNodeTooLow() throws Exception {
+        int numShards = 4;
+        int divisor = randomFrom(2, 4);
+        int expectedFinalShards = numShards / divisor;
+        createIndexWithSettings(client(), index, alias, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, numShards)
+            .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+            .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), numShards - 2));
+        createNewSingletonPolicy(client(), policy, "warm", new ShrinkAction(expectedFinalShards, null));
+        updatePolicy(client(), index, policy);
+
+        String shrunkenIndexName = waitAndGetShrinkIndexName(client(), index);
+        assertBusy(() -> assertTrue(indexExists(shrunkenIndexName)), 60, TimeUnit.SECONDS);
+        assertBusy(() -> assertTrue(aliasExists(shrunkenIndexName, index)));
+        assertBusy(() -> assertThat(getStepKeyForIndex(client(), shrunkenIndexName),
+            equalTo(PhaseCompleteStep.finalStep("warm").getKey())));
+        assertBusy(() -> {
+            Map<String, Object> settings = getOnlyIndexSettings(client(), shrunkenIndexName);
+            assertThat(settings.get(SETTING_NUMBER_OF_SHARDS), equalTo(String.valueOf(expectedFinalShards)));
+            assertThat(settings.get(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey()), equalTo("true"));
+            assertThat(settings.get(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id"), nullValue());
+            assertNull(settings.get(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey()));
+        });
+        expectThrows(ResponseException.class, () -> indexDocument(client(), index));
+    }
 }