Quellcode durchsuchen

Atomic remove of archived setting & readonly block (#64113)

Today if you upgrade a cluster containing invalid cluster settings and a
read-only block then you are stuck: you cannot remove the now-archived
settings because of the read-only block, but you cannot set any settings
to remove the read-only block because of the archived settings.

This commit adds functionality to permit removing both the read-only
block and the archived settings in a single step.

Closes #62920
Howard vor 4 Jahren
Ursprung
Commit
65300d6494

+ 143 - 0
server/src/internalClusterTest/java/org/elasticsearch/cluster/settings/ClusterSettingsIT.java

@@ -24,6 +24,8 @@ import org.apache.logging.log4j.LogManager;
 import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequestBuilder;
 import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.block.ClusterBlockException;
 import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
 import org.elasticsearch.common.settings.Setting;
@@ -287,6 +289,147 @@ public class ClusterSettingsIT extends ESIntegTestCase {
         assertThat(clusterService().getClusterSettings().get(INITIAL_RECOVERIES), equalTo(42));
     }
 
+    public void testRemoveArchiveSettingsWithBlocks() throws Exception {
+        testRemoveArchiveSettingsWithBlocks(true, false);
+        testRemoveArchiveSettingsWithBlocks(false, true);
+        testRemoveArchiveSettingsWithBlocks(true, true);
+    }
+
+    private void testRemoveArchiveSettingsWithBlocks(boolean readOnly, boolean readOnlyAllowDelete) throws Exception {
+        Settings.Builder settingsBuilder = Settings.builder();
+        if (readOnly) {
+            settingsBuilder.put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), "true");
+        }
+        if (readOnlyAllowDelete) {
+            settingsBuilder.put(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), "true");
+        }
+        assertAcked(client().admin().cluster().prepareUpdateSettings()
+            .setPersistentSettings(settingsBuilder).setTransientSettings(settingsBuilder).get());
+
+        ClusterState state = client().admin().cluster().prepareState().get().getState();
+        if (readOnly) {
+            assertTrue(Metadata.SETTING_READ_ONLY_SETTING.get(state.getMetadata().transientSettings()));
+            assertTrue(Metadata.SETTING_READ_ONLY_SETTING.get(state.getMetadata().persistentSettings()));
+        }
+        if (readOnlyAllowDelete) {
+            assertTrue(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.get(state.getMetadata().transientSettings()));
+            assertTrue(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.get(state.getMetadata().persistentSettings()));
+        }
+
+        // create archived setting
+        final Metadata metadata = state.getMetadata();
+        final Metadata brokenMeta = Metadata.builder(metadata).persistentSettings(Settings.builder()
+            .put(metadata.persistentSettings()).put("this.is.unknown", true).build()).build();
+        restartNodesOnBrokenClusterState(ClusterState.builder(state).metadata(brokenMeta));
+        ensureGreen(); // wait for state recovery
+        state = client().admin().cluster().prepareState().get().getState();
+        assertTrue(state.getMetadata().persistentSettings().getAsBoolean("archived.this.is.unknown", false));
+
+        // cannot remove read only block due to archived settings
+        final IllegalArgumentException e1 =
+            expectThrows(
+                IllegalArgumentException.class,
+                () -> {
+                    Settings.Builder builder = Settings.builder();
+                    clearOrSetFalse(builder, readOnly, Metadata.SETTING_READ_ONLY_SETTING);
+                    clearOrSetFalse(builder, readOnlyAllowDelete, Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING);
+                    assertAcked(client().admin().cluster().prepareUpdateSettings()
+                        .setPersistentSettings(builder).setTransientSettings(builder).get());
+                });
+        assertTrue(e1.getMessage().contains("unknown setting [archived.this.is.unknown]"));
+
+        // fail to clear archived settings with non-archived settings
+        final ClusterBlockException e2 =
+            expectThrows(
+                ClusterBlockException.class,
+                () -> assertAcked(client().admin().cluster().prepareUpdateSettings()
+                        .setPersistentSettings(Settings.builder().putNull("cluster.routing.allocation.enable"))
+                        .setTransientSettings(Settings.builder().putNull("archived.*")).get()));
+        if (readOnly) {
+            assertTrue(e2.getMessage().contains("cluster read-only (api)"));
+        }
+        if (readOnlyAllowDelete) {
+            assertTrue(e2.getMessage().contains("cluster read-only / allow delete (api)"));
+        }
+
+        // fail to clear archived settings due to cluster read only block
+        final ClusterBlockException e3 =
+            expectThrows(
+                ClusterBlockException.class,
+                () -> assertAcked(client().admin().cluster().prepareUpdateSettings()
+                    .setPersistentSettings(Settings.builder().putNull("archived.*")).get()));
+        if (readOnly) {
+            assertTrue(e3.getMessage().contains("cluster read-only (api)"));
+        }
+        if (readOnlyAllowDelete) {
+            assertTrue(e3.getMessage().contains("cluster read-only / allow delete (api)"));
+        }
+
+        // fail to clear archived settings with adding cluster block
+        final ClusterBlockException e4 =
+            expectThrows(
+                ClusterBlockException.class,
+                () -> {
+                    Settings.Builder builder = Settings.builder().putNull("archived.*");
+                    if (randomBoolean()) {
+                        builder.put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), "true");
+                        builder.put(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), "true");
+                    } else if (randomBoolean()) {
+                        builder.put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), "true");
+                    } else {
+                        builder.put(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), "true");
+                    }
+                    assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(builder).get());
+                });
+        if (readOnly) {
+            assertTrue(e4.getMessage().contains("cluster read-only (api)"));
+        }
+        if (readOnlyAllowDelete) {
+            assertTrue(e4.getMessage().contains("cluster read-only / allow delete (api)"));
+        }
+
+        // fail to set archived settings to non-null value even with clearing blocks together
+        final ClusterBlockException e5 =
+            expectThrows(
+                ClusterBlockException.class,
+                () -> {
+                    Settings.Builder builder = Settings.builder().put("archived.this.is.unknown", "false");
+                    clearOrSetFalse(builder, readOnly, Metadata.SETTING_READ_ONLY_SETTING);
+                    clearOrSetFalse(builder, readOnlyAllowDelete, Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING);
+                    assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(builder).get());
+                });
+        if (readOnly) {
+            assertTrue(e5.getMessage().contains("cluster read-only (api)"));
+        }
+        if (readOnlyAllowDelete) {
+            assertTrue(e5.getMessage().contains("cluster read-only / allow delete (api)"));
+        }
+
+        // we can clear read-only block with archived settings together
+        Settings.Builder builder = Settings.builder().putNull("archived.*");
+        clearOrSetFalse(builder, readOnly, Metadata.SETTING_READ_ONLY_SETTING);
+        clearOrSetFalse(builder, readOnlyAllowDelete, Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING);
+        assertAcked(client().admin().cluster().prepareUpdateSettings()
+            .setPersistentSettings(builder).setTransientSettings(builder).get());
+
+        state = client().admin().cluster().prepareState().get().getState();
+        assertFalse(Metadata.SETTING_READ_ONLY_SETTING.get(state.getMetadata().transientSettings()));
+        assertFalse(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.get(state.getMetadata().transientSettings()));
+        assertFalse(Metadata.SETTING_READ_ONLY_SETTING.get(state.getMetadata().persistentSettings()));
+        assertFalse(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.get(state.getMetadata().persistentSettings()));
+        assertNull(state.getMetadata().persistentSettings().get("archived.this.is.unknown"));
+    }
+
+    private static void clearOrSetFalse(Settings.Builder settings, boolean applySetting, Setting<Boolean> setting) {
+        if (applySetting) {
+            if (randomBoolean()) {
+                settings.put(setting.getKey(), "false");
+            } else {
+                settings.putNull(setting.getKey());
+            }
+        }
+    }
+
     public void testClusterUpdateSettingsWithBlocks() {
         String key1 = "cluster.routing.allocation.enable";
         Settings transientSettings = Settings.builder().put(key1, EnableAllocationDecider.Allocation.NONE.name()).build();

+ 0 - 17
server/src/internalClusterTest/java/org/elasticsearch/gateway/GatewayIndexStateIT.java

@@ -557,21 +557,4 @@ public class GatewayIndexStateIT extends ESIntegTestCase {
             }
         });
     }
-
-    private void restartNodesOnBrokenClusterState(ClusterState.Builder clusterStateBuilder) throws Exception {
-        Map<String, PersistedClusterStateService> lucenePersistedStateFactories = Stream.of(internalCluster().getNodeNames())
-            .collect(Collectors.toMap(Function.identity(),
-                nodeName -> internalCluster().getInstance(PersistedClusterStateService.class, nodeName)));
-        final ClusterState clusterState = clusterStateBuilder.build();
-        internalCluster().fullRestart(new RestartCallback(){
-            @Override
-            public Settings onNodeStopped(String nodeName) throws Exception {
-                final PersistedClusterStateService lucenePersistedStateFactory = lucenePersistedStateFactories.get(nodeName);
-                try (PersistedClusterStateService.Writer writer = lucenePersistedStateFactory.createWriter()) {
-                    writer.writeFullStateAndCommit(clusterState.term(), clusterState);
-                }
-                return super.onNodeStopped(nodeName);
-            }
-        });
-    }
 }

+ 54 - 8
server/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java

@@ -39,10 +39,16 @@ import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.ClusterSettings;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.elasticsearch.common.settings.AbstractScopedSettings.ARCHIVED_SETTINGS_PREFIX;
+
 public class TransportClusterUpdateSettingsAction extends
     TransportMasterNodeAction<ClusterUpdateSettingsRequest, ClusterUpdateSettingsResponse> {
 
@@ -62,22 +68,62 @@ public class TransportClusterUpdateSettingsAction extends
         this.clusterSettings = clusterSettings;
     }
 
+    /**
+     skip check block if:
+     * Only at least one of cluster.blocks.read_only or cluster.blocks.read_only_allow_delete is being cleared (set to null or false).
+     * Or all of the following are true:
+     * 1. At least one of cluster.blocks.read_only or cluster.blocks.read_only_allow_delete is being cleared (set to null or false).
+     * 2. Neither cluster.blocks.read_only nor cluster.blocks.read_only_allow_delete is being set to true.
+     * 3. The only other settings in this update are archived ones being set to null.
+     */
     @Override
     protected ClusterBlockException checkBlock(ClusterUpdateSettingsRequest request, ClusterState state) {
-        // allow for dedicated changes to the metadata blocks, so we don't block those to allow to "re-enable" it
-        if (request.transientSettings().size() + request.persistentSettings().size() == 1) {
-            // only one setting
-            if (Metadata.SETTING_READ_ONLY_SETTING.exists(request.persistentSettings())
-                || Metadata.SETTING_READ_ONLY_SETTING.exists(request.transientSettings())
-                || Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.exists(request.transientSettings())
-                || Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.exists(request.persistentSettings())) {
-                // one of the settings above as the only setting in the request means - resetting the block!
+        Set<String> clearedBlockAndArchivedSettings = new HashSet<>();
+        if (checkClearedBlockAndArchivedSettings(request.transientSettings(), clearedBlockAndArchivedSettings)
+            && checkClearedBlockAndArchivedSettings(request.persistentSettings(), clearedBlockAndArchivedSettings)) {
+            if (clearedBlockAndArchivedSettings.contains(Metadata.SETTING_READ_ONLY_SETTING.getKey())
+                || clearedBlockAndArchivedSettings.contains(Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.getKey())) {
                 return null;
             }
         }
+
         return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE);
     }
 
+    /**
+     * Check settings that only contains block and archived settings.
+     * @param settings target settings to be checked.
+     * @param clearedBlockAndArchivedSettings block settings that have been set to null or false,
+     *                                        archived settings that have been set to null.
+     * @return true if all settings are clear blocks or archived settings.
+     */
+    private boolean checkClearedBlockAndArchivedSettings(final Settings settings,
+                                                         final Set<String> clearedBlockAndArchivedSettings) {
+        for (String key : settings.keySet()) {
+            if (Metadata.SETTING_READ_ONLY_SETTING.getKey().equals(key)) {
+                if (Metadata.SETTING_READ_ONLY_SETTING.get(settings)) {
+                    // set block as true
+                    return false;
+                }
+            } else if (Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.getKey().equals(key)) {
+                if (Metadata.SETTING_READ_ONLY_ALLOW_DELETE_SETTING.get(settings)) {
+                    // set block as true
+                    return false;
+                }
+            } else if (key.startsWith(ARCHIVED_SETTINGS_PREFIX)) {
+                if (settings.get(key) != null) {
+                    // archived setting value is not null
+                    return false;
+                }
+            } else {
+                // other settings
+                return false;
+            }
+            clearedBlockAndArchivedSettings.add(key);
+        }
+        return true;
+    }
+
     @Override
     protected void masterOperation(Task task, final ClusterUpdateSettingsRequest request, final ClusterState state,
                                    final ActionListener<ClusterUpdateSettingsResponse> listener) {

+ 21 - 1
test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

@@ -107,6 +107,7 @@ import org.elasticsearch.common.xcontent.smile.SmileXContent;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.TestEnvironment;
+import org.elasticsearch.gateway.PersistedClusterStateService;
 import org.elasticsearch.http.HttpInfo;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexModule;
@@ -177,6 +178,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
@@ -1527,7 +1529,8 @@ public abstract class ESIntegTestCase extends ESTestCase {
     public static void setClusterReadOnly(boolean value) {
         Settings settings = value ? Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), value).build() :
             Settings.builder().putNull(Metadata.SETTING_READ_ONLY_SETTING.getKey()).build()  ;
-        assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(settings).get());
+        assertAcked(client().admin().cluster().prepareUpdateSettings()
+            .setPersistentSettings(settings).setTransientSettings(settings).get());
     }
 
     private static CountDownLatch newLatch(List<CountDownLatch> latches) {
@@ -2235,6 +2238,23 @@ public abstract class ESIntegTestCase extends ESTestCase {
         return Boolean.parseBoolean(System.getProperty(FIPS_SYSPROP));
     }
 
+    protected void restartNodesOnBrokenClusterState(ClusterState.Builder clusterStateBuilder) throws Exception {
+        Map<String, PersistedClusterStateService> lucenePersistedStateFactories = Stream.of(internalCluster().getNodeNames())
+            .collect(Collectors.toMap(Function.identity(),
+                nodeName -> internalCluster().getInstance(PersistedClusterStateService.class, nodeName)));
+        final ClusterState clusterState = clusterStateBuilder.build();
+        internalCluster().fullRestart(new InternalTestCluster.RestartCallback() {
+            @Override
+            public Settings onNodeStopped(String nodeName) throws Exception {
+                final PersistedClusterStateService lucenePersistedStateFactory = lucenePersistedStateFactories.get(nodeName);
+                try (PersistedClusterStateService.Writer writer = lucenePersistedStateFactory.createWriter()) {
+                    writer.writeFullStateAndCommit(clusterState.term(), clusterState);
+                }
+                return super.onNodeStopped(nodeName);
+            }
+        });
+    }
+
     /**
      * On Debian 8 the "memory" subsystem is not mounted by default
      * when cgroups are enabled, and this confuses many versions of