Jelajahi Sumber

Don't announce ready until file settings are applied (#92856)

Instead of failing startup on incorrect file based settings, prevent the node
from declaring readiness. This is equivalent from an outside perspective of
the readiness probe, however, it doesn't block on the cluster state update
task.
Nikola Grcevski 2 tahun lalu
induk
melakukan
808ce7272b
21 mengubah file dengan 495 tambahan dan 436 penghapusan
  1. 6 0
      docs/changelog/92856.yaml
  2. 3 9
      server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestFileSettingsIT.java
  3. 167 0
      server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java
  4. 0 5
      server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java
  5. 105 12
      server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java
  6. 0 6
      server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java
  7. 0 6
      server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java
  8. 2 14
      server/src/main/java/org/elasticsearch/action/ingest/ReservedPipelineAction.java
  9. 9 6
      server/src/main/java/org/elasticsearch/node/Node.java
  10. 21 4
      server/src/main/java/org/elasticsearch/readiness/ReadinessService.java
  11. 18 0
      server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsChangedListener.java
  12. 50 95
      server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java
  13. 2 42
      server/src/test/java/org/elasticsearch/action/ingest/ReservedPipelineActionTests.java
  14. 1 0
      server/src/test/java/org/elasticsearch/readiness/ReadinessServiceTests.java
  15. 61 194
      server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java
  16. 0 7
      test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java
  17. 0 6
      x-pack/plugin/autoscaling/src/internalClusterTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingFileSettingsIT.java
  18. 0 1
      x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMFileSettingsIT.java
  19. 0 5
      x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java
  20. 0 6
      x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java
  21. 50 18
      x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsStartupIT.java

+ 6 - 0
docs/changelog/92856.yaml

@@ -0,0 +1,6 @@
+pr: 92856
+summary: Don't announce ready until file settings are applied
+area: Infra/Core
+type: bug
+issues:
+ - 92812

+ 3 - 9
server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestFileSettingsIT.java

@@ -20,7 +20,6 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.plugins.Plugin;
@@ -51,11 +50,6 @@ import static org.hamcrest.Matchers.notNullValue;
 
 public class IngestFileSettingsIT extends ESIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
         return Arrays.asList(CustomIngestTestPlugin.class);
@@ -107,7 +101,7 @@ public class IngestFileSettingsIT extends ESIntegTestCase {
                  "ingest_pipelines": {
                    "my_ingest_pipeline": {
                        "description": "_description",
-                       "processors": [
+                       "processors":
                           {
                             "foo" : {
                               "field": "pipeline",
@@ -210,11 +204,11 @@ public class IngestFileSettingsIT extends ESIntegTestCase {
                     clusterService.removeListener(this);
                     metadataVersion.set(event.state().metadata().version());
                     savedClusterState.countDown();
-                    assertEquals(ReservedStateErrorMetadata.ErrorKind.VALIDATION, reservedState.errorMetadata().errorKind());
+                    assertEquals(ReservedStateErrorMetadata.ErrorKind.PARSING, reservedState.errorMetadata().errorKind());
                     assertThat(reservedState.errorMetadata().errors(), allOf(notNullValue(), hasSize(1)));
                     assertThat(
                         reservedState.errorMetadata().errors().get(0),
-                        containsString("org.elasticsearch.ElasticsearchParseException: No processor type exists with name [foo]")
+                        containsString("org.elasticsearch.xcontent.XContentParseException: [17:16] [reserved_state_chunk] failed")
                     );
                 }
             }

+ 167 - 0
server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java

@@ -8,24 +8,73 @@
 package org.elasticsearch.readiness;
 
 import org.elasticsearch.client.internal.Client;
+import org.elasticsearch.cluster.ClusterChangedEvent;
+import org.elasticsearch.cluster.ClusterStateListener;
+import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
+import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
+import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
+import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.discovery.MasterNotDiscoveredException;
+import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
+import org.elasticsearch.reservedstate.service.FileSettingsService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
 import org.elasticsearch.test.InternalTestCluster;
 import org.elasticsearch.test.readiness.ReadinessClientProbe;
 
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.List;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
 
+import static org.elasticsearch.node.Node.INITIAL_STATE_TIMEOUT_SETTING;
 import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
 import static org.elasticsearch.test.NodeRoles.masterNode;
 import static org.elasticsearch.test.NodeRoles.nonDataNode;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.notNullValue;
 
 @ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
 public class ReadinessClusterIT extends ESIntegTestCase implements ReadinessClientProbe {
 
+    private static AtomicLong versionCounter = new AtomicLong(1);
+
+    private static String testErrorJSON = """
+        {
+             "metadata": {
+                 "version": "%s",
+                 "compatibility": "8.4.0"
+             },
+             "state": {
+                 "not_cluster_settings": {
+                     "search.allow_expensive_queries": "false"
+                 }
+             }
+        }""";
+
+    private static String testJSON = """
+        {
+             "metadata": {
+                 "version": "%s",
+                 "compatibility": "8.4.0"
+             },
+             "state": {
+                 "cluster_settings": {
+                     "indices.recovery.max_bytes_per_sec": "50mb"
+                 }
+             }
+        }""";
+
     @Override
     protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
         Settings.Builder settings = Settings.builder()
@@ -152,4 +201,122 @@ public class ReadinessClusterIT extends ESIntegTestCase implements ReadinessClie
             tcpReadinessProbeTrue(s);
         }
     }
+
+    private Tuple<CountDownLatch, AtomicLong> setupClusterStateListenerForError(String node) {
+        ClusterService clusterService = internalCluster().clusterService(node);
+        CountDownLatch savedClusterState = new CountDownLatch(1);
+        AtomicLong metadataVersion = new AtomicLong(-1);
+        clusterService.addListener(new ClusterStateListener() {
+            @Override
+            public void clusterChanged(ClusterChangedEvent event) {
+                ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
+                if (reservedState != null && reservedState.errorMetadata() != null) {
+                    assertEquals(ReservedStateErrorMetadata.ErrorKind.PARSING, reservedState.errorMetadata().errorKind());
+                    assertThat(reservedState.errorMetadata().errors(), allOf(notNullValue(), hasSize(1)));
+                    assertThat(
+                        reservedState.errorMetadata().errors().get(0),
+                        containsString("Missing handler definition for content key [not_cluster_settings]")
+                    );
+                    clusterService.removeListener(this);
+                    metadataVersion.set(event.state().metadata().version());
+                    savedClusterState.countDown();
+                }
+            }
+        });
+
+        return new Tuple<>(savedClusterState, metadataVersion);
+    }
+
+    private void writeJSONFile(String node, String json) throws Exception {
+        long version = versionCounter.incrementAndGet();
+
+        FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
+
+        Files.createDirectories(fileSettingsService.operatorSettingsDir());
+        Path tempFilePath = createTempFile();
+
+        Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
+        Files.move(tempFilePath, fileSettingsService.operatorSettingsFile(), StandardCopyOption.ATOMIC_MOVE);
+        logger.info("--> New file settings: [{}]", Strings.format(json, version));
+    }
+
+    public void testNotReadyOnBadFileSettings() throws Exception {
+        internalCluster().setBootstrapMasterNodeIndex(0);
+        logger.info("--> start data node / non master node");
+        String dataNode = internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
+        FileSettingsService dataFileSettingsService = internalCluster().getInstance(FileSettingsService.class, dataNode);
+
+        assertFalse(dataFileSettingsService.watching());
+
+        logger.info("--> write bad file settings before we boot master node");
+        writeJSONFile(dataNode, testErrorJSON);
+
+        logger.info("--> start master node");
+        final String masterNode = internalCluster().startMasterOnlyNode(
+            Settings.builder().put(INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s").build()
+        );
+        assertMasterNode(internalCluster().nonMasterClient(), masterNode);
+        var savedClusterState = setupClusterStateListenerForError(masterNode);
+
+        FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode);
+
+        assertTrue(masterFileSettingsService.watching());
+        assertFalse(dataFileSettingsService.watching());
+
+        boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
+        assertTrue(awaitSuccessful);
+
+        ReadinessService s = internalCluster().getInstance(ReadinessService.class, internalCluster().getMasterName());
+        assertNull(s.boundAddress());
+    }
+
+    private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {
+        ClusterService clusterService = internalCluster().clusterService(node);
+        CountDownLatch savedClusterState = new CountDownLatch(1);
+        AtomicLong metadataVersion = new AtomicLong(-1);
+        clusterService.addListener(new ClusterStateListener() {
+            @Override
+            public void clusterChanged(ClusterChangedEvent event) {
+                ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
+                if (reservedState != null) {
+                    ReservedStateHandlerMetadata handlerMetadata = reservedState.handlers().get(ReservedClusterSettingsAction.NAME);
+                    if (handlerMetadata != null && handlerMetadata.keys().contains("indices.recovery.max_bytes_per_sec")) {
+                        clusterService.removeListener(this);
+                        metadataVersion.set(event.state().metadata().version());
+                        savedClusterState.countDown();
+                    }
+                }
+            }
+        });
+
+        return new Tuple<>(savedClusterState, metadataVersion);
+    }
+
+    public void testReadyAfterCorrectFileSettings() throws Exception {
+        internalCluster().setBootstrapMasterNodeIndex(0);
+        logger.info("--> start data node / non master node");
+        String dataNode = internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
+        FileSettingsService dataFileSettingsService = internalCluster().getInstance(FileSettingsService.class, dataNode);
+
+        assertFalse(dataFileSettingsService.watching());
+        var savedClusterState = setupClusterStateListener(dataNode);
+
+        logger.info("--> write correct file settings before we boot master node");
+        writeJSONFile(dataNode, testJSON);
+
+        logger.info("--> start master node");
+        final String masterNode = internalCluster().startMasterOnlyNode();
+        assertMasterNode(internalCluster().nonMasterClient(), masterNode);
+
+        FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode);
+
+        assertTrue(masterFileSettingsService.watching());
+        assertFalse(dataFileSettingsService.watching());
+
+        boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
+        assertTrue(awaitSuccessful);
+
+        ReadinessService s = internalCluster().getInstance(ReadinessService.class, internalCluster().getMasterName());
+        tcpReadinessProbeTrue(s);
+    }
 }

+ 0 - 5
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java

@@ -52,11 +52,6 @@ import static org.hamcrest.Matchers.notNullValue;
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
 public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String emptyJSON = """

+ 105 - 12
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java

@@ -34,7 +34,9 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING;
+import static org.elasticsearch.node.Node.INITIAL_STATE_TIMEOUT_SETTING;
 import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
+import static org.elasticsearch.test.NodeRoles.masterNode;
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -45,11 +47,6 @@ import static org.hamcrest.Matchers.nullValue;
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
 public class FileSettingsServiceIT extends ESIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String testJSON = """
@@ -65,6 +62,30 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
              }
         }""";
 
+    private static String testJSON43mb = """
+        {
+             "metadata": {
+                 "version": "%s",
+                 "compatibility": "8.4.0"
+             },
+             "state": {
+                 "cluster_settings": {
+                     "indices.recovery.max_bytes_per_sec": "43mb"
+                 }
+             }
+        }""";
+
+    private static String testCleanupJSON = """
+        {
+             "metadata": {
+                 "version": "%s",
+                 "compatibility": "8.4.0"
+             },
+             "state": {
+                 "cluster_settings": {}
+             }
+        }""";
+
     private static String testErrorJSON = """
         {
              "metadata": {
@@ -95,6 +116,29 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
 
         Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
         Files.move(tempFilePath, fileSettingsService.operatorSettingsFile(), StandardCopyOption.ATOMIC_MOVE);
+        logger.info("--> New file settings: [{}]", Strings.format(json, version));
+    }
+
+    private Tuple<CountDownLatch, AtomicLong> setupCleanupClusterStateListener(String node) {
+        ClusterService clusterService = internalCluster().clusterService(node);
+        CountDownLatch savedClusterState = new CountDownLatch(1);
+        AtomicLong metadataVersion = new AtomicLong(-1);
+        clusterService.addListener(new ClusterStateListener() {
+            @Override
+            public void clusterChanged(ClusterChangedEvent event) {
+                ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
+                if (reservedState != null) {
+                    ReservedStateHandlerMetadata handlerMetadata = reservedState.handlers().get(ReservedClusterSettingsAction.NAME);
+                    if (handlerMetadata != null && handlerMetadata.keys().contains("indices.recovery.max_bytes_per_sec") == false) {
+                        clusterService.removeListener(this);
+                        metadataVersion.set(event.state().metadata().version());
+                        savedClusterState.countDown();
+                    }
+                }
+            }
+        });
+
+        return new Tuple<>(savedClusterState, metadataVersion);
     }
 
     private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {
@@ -119,7 +163,8 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         return new Tuple<>(savedClusterState, metadataVersion);
     }
 
-    private void assertClusterStateSaveOK(CountDownLatch savedClusterState, AtomicLong metadataVersion) throws Exception {
+    private void assertClusterStateSaveOK(CountDownLatch savedClusterState, AtomicLong metadataVersion, String expectedBytesPerSec)
+        throws Exception {
         boolean awaitSuccessful = savedClusterState.await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -130,7 +175,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
 
         assertThat(
             clusterStateResponse.getState().metadata().persistentSettings().get(INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING.getKey()),
-            equalTo("50mb")
+            equalTo(expectedBytesPerSec)
         );
 
         ClusterUpdateSettingsRequest req = new ClusterUpdateSettingsRequest().persistentSettings(
@@ -163,7 +208,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertFalse(dataFileSettingsService.watching());
 
         writeJSONFile(masterNode, testJSON);
-        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2());
+        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
     }
 
     public void testSettingsAppliedOnStart() throws Exception {
@@ -188,13 +233,15 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertTrue(masterFileSettingsService.watching());
         assertFalse(dataFileSettingsService.watching());
 
-        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2());
+        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
     }
 
     public void testReservedStatePersistsOnRestart() throws Exception {
         internalCluster().setBootstrapMasterNodeIndex(0);
         logger.info("--> start master node");
-        final String masterNode = internalCluster().startMasterOnlyNode();
+        final String masterNode = internalCluster().startMasterOnlyNode(
+            Settings.builder().put(INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s").build()
+        );
         assertMasterNode(internalCluster().masterClient(), masterNode);
         var savedClusterState = setupClusterStateListener(masterNode);
 
@@ -204,7 +251,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
 
         logger.info("--> write some settings");
         writeJSONFile(masterNode, testJSON);
-        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2());
+        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
 
         logger.info("--> restart master");
         internalCluster().restartNode(masterNode);
@@ -275,7 +322,9 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertFalse(dataFileSettingsService.watching());
 
         logger.info("--> start master node");
-        final String masterNode = internalCluster().startMasterOnlyNode();
+        final String masterNode = internalCluster().startMasterOnlyNode(
+            Settings.builder().put(INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s").build()
+        );
         assertMasterNode(internalCluster().nonMasterClient(), masterNode);
         var savedClusterState = setupClusterStateListenerForError(masterNode);
 
@@ -287,4 +336,48 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         writeJSONFile(masterNode, testErrorJSON);
         assertClusterStateNotSaved(savedClusterState.v1(), savedClusterState.v2());
     }
+
+    public void testSettingsAppliedOnMasterReElection() throws Exception {
+        internalCluster().setBootstrapMasterNodeIndex(0);
+        logger.info("--> start master node");
+        final String masterNode = internalCluster().startMasterOnlyNode();
+
+        logger.info("--> start master eligible nodes, 2 more for quorum");
+        String masterNode1 = internalCluster().startNode(Settings.builder().put(masterNode()).put("discovery.initial_state_timeout", "1s"));
+        String masterNode2 = internalCluster().startNode(Settings.builder().put(masterNode()).put("discovery.initial_state_timeout", "1s"));
+        FileSettingsService master1FS = internalCluster().getInstance(FileSettingsService.class, masterNode1);
+        FileSettingsService master2FS = internalCluster().getInstance(FileSettingsService.class, masterNode2);
+
+        assertFalse(master1FS.watching());
+        assertFalse(master2FS.watching());
+
+        var savedClusterState = setupClusterStateListener(masterNode);
+        FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode);
+
+        assertTrue(masterFileSettingsService.watching());
+
+        writeJSONFile(masterNode, testJSON);
+        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
+
+        internalCluster().stopCurrentMasterNode();
+        ensureStableCluster(2);
+
+        FileSettingsService masterFS = internalCluster().getCurrentMasterNodeInstance(FileSettingsService.class);
+        assertTrue(masterFS.watching());
+        logger.info("--> start another master eligible node to form a quorum");
+        internalCluster().startNode(Settings.builder().put(masterNode()).put("discovery.initial_state_timeout", "1s"));
+        ensureStableCluster(3);
+
+        savedClusterState = setupCleanupClusterStateListener(internalCluster().getMasterName());
+        writeJSONFile(internalCluster().getMasterName(), testCleanupJSON);
+
+        boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
+        assertTrue(awaitSuccessful);
+
+        savedClusterState = setupClusterStateListener(internalCluster().getMasterName());
+        writeJSONFile(internalCluster().getMasterName(), testJSON43mb);
+
+        assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "43mb");
+    }
+
 }

+ 0 - 6
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java

@@ -49,12 +49,6 @@ import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
 public class RepositoriesFileSettingsIT extends ESIntegTestCase {
-
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String testJSON = """

+ 0 - 6
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java

@@ -47,12 +47,6 @@ import static org.hamcrest.Matchers.equalTo;
  */
 @LuceneTestCase.SuppressFileSystems("*")
 public class SnapshotsAndFileSettingsIT extends AbstractSnapshotIntegTestCase {
-
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String testFileSettingsJSON = """

+ 2 - 14
server/src/main/java/org/elasticsearch/action/ingest/ReservedPipelineAction.java

@@ -9,14 +9,12 @@
 package org.elasticsearch.action.ingest;
 
 import org.elasticsearch.ElasticsearchGenerationException;
-import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.ingest.IngestMetadata;
 import org.elasticsearch.ingest.IngestService;
 import org.elasticsearch.reservedstate.ReservedClusterStateHandler;
 import org.elasticsearch.reservedstate.TransformState;
-import org.elasticsearch.reservedstate.service.FileSettingsService;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
 import org.elasticsearch.xcontent.XContentParser;
@@ -40,19 +38,11 @@ import java.util.stream.Collectors;
 public class ReservedPipelineAction implements ReservedClusterStateHandler<List<PutPipelineRequest>> {
     public static final String NAME = "ingest_pipelines";
 
-    private final IngestService ingestService;
-    private final FileSettingsService fileSettingsService;
-
     /**
      * Creates a ReservedPipelineAction
      *
-     * @param ingestService requires {@link IngestService} for storing/deleting the pipelines
-     * @param fileSettingsService required for supplying the latest node infos
      */
-    public ReservedPipelineAction(IngestService ingestService, FileSettingsService fileSettingsService) {
-        this.ingestService = ingestService;
-        this.fileSettingsService = fileSettingsService;
-    }
+    public ReservedPipelineAction() {}
 
     @Override
     public String name() {
@@ -61,11 +51,9 @@ public class ReservedPipelineAction implements ReservedClusterStateHandler<List<
 
     private Collection<PutPipelineRequest> prepare(List<PutPipelineRequest> requests) {
         var exceptions = new ArrayList<Exception>();
-        NodesInfoResponse nodeInfos = fileSettingsService.nodeInfos();
-        assert nodeInfos != null;
         for (var pipeline : requests) {
             try {
-                ingestService.validatePipelineRequest(pipeline, nodeInfos);
+                validate(pipeline);
             } catch (Exception e) {
                 exceptions.add(e);
             }

+ 9 - 6
server/src/main/java/org/elasticsearch/node/Node.java

@@ -858,17 +858,14 @@ public class Node implements Closeable {
             );
 
             actionModule.getReservedClusterStateService().installStateHandler(new ReservedRepositoryAction(repositoryService));
+            actionModule.getReservedClusterStateService().installStateHandler(new ReservedPipelineAction());
 
             FileSettingsService fileSettingsService = new FileSettingsService(
                 clusterService,
                 actionModule.getReservedClusterStateService(),
-                environment,
-                client
+                environment
             );
 
-            actionModule.getReservedClusterStateService()
-                .installStateHandler(new ReservedPipelineAction(ingestService, fileSettingsService));
-
             RestoreService restoreService = new RestoreService(
                 clusterService,
                 repositoryService,
@@ -1395,6 +1392,13 @@ public class Node implements Closeable {
             pluginsService.flatMap(Plugin::getBootstrapChecks).toList()
         );
 
+        final FileSettingsService fileSettingsService = injector.getInstance(FileSettingsService.class);
+        fileSettingsService.start();
+        // if we are using the readiness service, listen for the file settings being applied
+        if (ReadinessService.enabled(environment)) {
+            fileSettingsService.addFileSettingsChangedListener(injector.getInstance(ReadinessService.class));
+        }
+
         clusterService.addStateApplier(transportService.getTaskManager());
         // start after transport service so the local disco is known
         coordinator.start(); // start before cluster service so that it can set initial state on ClusterApplierService
@@ -1445,7 +1449,6 @@ public class Node implements Closeable {
             }
         }
 
-        injector.getInstance(FileSettingsService.class).start();
         injector.getInstance(HttpServerTransport.class).start();
 
         if (WRITE_PORTS_FILE_SETTING.get(settings())) {

+ 21 - 4
server/src/main/java/org/elasticsearch/readiness/ReadinessService.java

@@ -20,6 +20,7 @@ import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.transport.BoundTransportAddress;
 import org.elasticsearch.common.transport.TransportAddress;
 import org.elasticsearch.env.Environment;
+import org.elasticsearch.reservedstate.service.FileSettingsChangedListener;
 import org.elasticsearch.shutdown.PluginShutdownService;
 import org.elasticsearch.transport.BindTransportException;
 
@@ -36,7 +37,7 @@ import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicReference;
 
-public class ReadinessService extends AbstractLifecycleComponent implements ClusterStateListener {
+public class ReadinessService extends AbstractLifecycleComponent implements ClusterStateListener, FileSettingsChangedListener {
     private static final Logger logger = LogManager.getLogger(ReadinessService.class);
 
     private final Environment environment;
@@ -47,6 +48,9 @@ public class ReadinessService extends AbstractLifecycleComponent implements Clus
     volatile CountDownLatch listenerThreadLatch = new CountDownLatch(0);
     final AtomicReference<InetSocketAddress> boundSocket = new AtomicReference<>();
     private final Collection<BoundAddressListener> boundAddressListeners = new CopyOnWriteArrayList<>();
+    private volatile boolean fileSettingsApplied = false;
+    private volatile boolean masterElected = false;
+    private volatile boolean shuttingDown = false;
 
     public static final Setting<Integer> PORT = Setting.intSetting("readiness.port", -1, Setting.Property.NodeScope);
 
@@ -213,13 +217,20 @@ public class ReadinessService extends AbstractLifecycleComponent implements Clus
     @Override
     public void clusterChanged(ClusterChangedEvent event) {
         ClusterState clusterState = event.state();
-
         Set<String> shutdownNodeIds = PluginShutdownService.shutdownNodes(clusterState);
-        if (shutdownNodeIds.contains(clusterState.nodes().getLocalNodeId())) {
+
+        this.masterElected = clusterState.nodes().getMasterNodeId() != null;
+        this.shuttingDown = shutdownNodeIds.contains(clusterState.nodes().getLocalNodeId());
+
+        if (shuttingDown) {
             setReady(false);
             logger.info("marking node as not ready because it's shutting down");
         } else {
-            setReady(clusterState.nodes().getMasterNodeId() != null);
+            if (clusterState.nodes().getLocalNodeId().equals(clusterState.nodes().getMasterNodeId())) {
+                setReady(fileSettingsApplied);
+            } else {
+                setReady(masterElected);
+            }
         }
     }
 
@@ -239,6 +250,12 @@ public class ReadinessService extends AbstractLifecycleComponent implements Clus
         boundAddressListeners.add(listener);
     }
 
+    @Override
+    public void settingsChanged() {
+        fileSettingsApplied = true;
+        setReady(masterElected && (shuttingDown == false));
+    }
+
     /**
      * A listener to be notified when the readiness service establishes the port it's listening on.
      * The {@link #addressBound(BoundTransportAddress)} method is called after the readiness service socket

+ 18 - 0
server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsChangedListener.java

@@ -0,0 +1,18 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.reservedstate.service;
+
+/**
+ * Listener interface for the file settings service. Listeners will get
+ * notified when the settings have been updated, or if there are no settings
+ * on initial start.
+ */
+public interface FileSettingsChangedListener {
+    void settingsChanged();
+}

+ 50 - 95
server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java

@@ -10,16 +10,13 @@ package org.elasticsearch.reservedstate.service;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.admin.cluster.node.info.NodesInfoRequest;
-import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
-import org.elasticsearch.client.internal.ClusterAdminClient;
-import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.action.support.PlainActionFuture;
 import org.elasticsearch.cluster.ClusterChangedEvent;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.ClusterStateListener;
 import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
+import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.component.AbstractLifecycleComponent;
@@ -37,7 +34,8 @@ import java.nio.file.WatchService;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.nio.file.attribute.FileTime;
 import java.time.Instant;
-import java.util.concurrent.CompletableFuture;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.ExecutionException;
 
 import static org.elasticsearch.xcontent.XContentType.JSON;
@@ -63,7 +61,6 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
     private final ClusterService clusterService;
     private final ReservedClusterStateService stateService;
     private final Path operatorSettingsDir;
-    private final NodeClient nodeClient;
 
     private WatchService watchService; // null;
     private Thread watcherThread;
@@ -72,12 +69,10 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
     private WatchKey configDirWatchKey;
 
     private volatile boolean active = false;
-    private volatile boolean initialState = true;
 
     public static final String OPERATOR_DIRECTORY = "operator";
 
-    private volatile NodesInfoResponse nodesInfoResponse = null;
-    private volatile boolean nodeInfosRefreshRequired = true;
+    private final List<FileSettingsChangedListener> eventListeners;
 
     /**
      * Constructs the {@link FileSettingsService}
@@ -86,16 +81,11 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
      * @param stateService an instance of the immutable cluster state controller, so we can perform the cluster state changes
      * @param environment we need the environment to pull the location of the config and operator directories
      */
-    public FileSettingsService(
-        ClusterService clusterService,
-        ReservedClusterStateService stateService,
-        Environment environment,
-        NodeClient nodeClient
-    ) {
+    public FileSettingsService(ClusterService clusterService, ReservedClusterStateService stateService, Environment environment) {
         this.clusterService = clusterService;
         this.stateService = stateService;
         this.operatorSettingsDir = environment.configFile().toAbsolutePath().resolve(OPERATOR_DIRECTORY);
-        this.nodeClient = nodeClient;
+        this.eventListeners = new CopyOnWriteArrayList<>();
     }
 
     public Path operatorSettingsDir() {
@@ -127,8 +117,13 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
         // We need the additional active flag, since cluster state can change after we've shutdown the service
         // causing the watcher to start again.
         this.active = Files.exists(operatorSettingsDir().getParent());
-        startIfMaster(clusterService.state());
-        clusterService.addListener(this);
+        if (active == false) {
+            // we don't have a config directory, we can't possibly launch the file settings service
+            return;
+        }
+        if (DiscoveryNode.isMasterNode(clusterService.getSettings())) {
+            clusterService.addListener(this);
+        }
     }
 
     @Override
@@ -149,16 +144,14 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
     public void clusterChanged(ClusterChangedEvent event) {
         ClusterState clusterState = event.state();
         startIfMaster(clusterState);
-        checkForNodeChanges(event);
     }
 
     private void startIfMaster(ClusterState clusterState) {
         if (currentNodeMaster(clusterState)) {
-            startWatcher(clusterState, initialState);
+            startWatcher(clusterState);
         } else {
             stopWatcher();
         }
-        initialState = false;
     }
 
     /**
@@ -221,7 +214,7 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
         return watcherThread != null;
     }
 
-    synchronized void startWatcher(ClusterState clusterState, boolean onStartup) {
+    synchronized void startWatcher(ClusterState clusterState) {
         if (watching() || active == false) {
             refreshExistingFileStateIfNeeded(clusterState);
 
@@ -239,21 +232,6 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
             Path settingsDirPath = operatorSettingsDir();
             this.watchService = settingsDirPath.getParent().getFileSystem().newWatchService();
             if (Files.exists(settingsDirPath)) {
-                Path settingsFilePath = operatorSettingsFile();
-                if (Files.exists(settingsFilePath)) {
-                    logger.debug("found initial operator settings file [{}], applying...", settingsFilePath);
-                    // we make a distinction here for startup, so that if we had operator settings before the node started
-                    // we would fail startup.
-                    try {
-                        processFileSettings(settingsFilePath).get();
-                    } catch (ExecutionException e) {
-                        if (onStartup) {
-                            throw new FileSettingsStartupException("Error applying operator settings", e.getCause());
-                        } else {
-                            logger.error("Error processing operator settings json file", e.getCause());
-                        }
-                    }
-                }
                 settingsDirWatchKey = enableSettingsWatcher(settingsDirWatchKey, settingsDirPath);
             } else {
                 logger.debug("operator settings directory [{}] not found, will watch for its creation...", settingsDirPath);
@@ -285,6 +263,18 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
         try {
             logger.info("file settings service up and running [tid={}]", Thread.currentThread().getId());
 
+            Path path = operatorSettingsFile();
+
+            if (Files.exists(path)) {
+                logger.debug("found initial operator settings file [{}], applying...", path);
+                processSettingsAndNotifyListeners();
+            } else {
+                // Notify everyone we don't have any initial file settings
+                for (var listener : eventListeners) {
+                    listener.settingsChanged();
+                }
+            }
+
             WatchKey key;
             while ((key = watchService.take()) != null) {
                 /*
@@ -304,8 +294,6 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
                 Path settingsPath = operatorSettingsDir();
                 if (Files.exists(settingsPath)) {
                     try {
-                        Path path = operatorSettingsFile();
-
                         if (logger.isDebugEnabled()) {
                             key.pollEvents().forEach(e -> logger.debug("{}:{}", e.kind().toString(), e.context().toString()));
                         } else {
@@ -320,11 +308,7 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
                         settingsDirWatchKey = enableSettingsWatcher(settingsDirWatchKey, settingsPath);
 
                         if (watchedFileChanged(path)) {
-                            try {
-                                processFileSettings(path).get();
-                            } catch (ExecutionException e) {
-                                logger.error("Error processing operator settings json file", e.getCause());
-                            }
+                            processSettingsAndNotifyListeners();
                         }
                     } catch (IOException e) {
                         logger.warn("encountered I/O error while watching file settings", e);
@@ -341,6 +325,18 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
         }
     }
 
+    // package private for testing
+    void processSettingsAndNotifyListeners() throws InterruptedException {
+        try {
+            processFileSettings(operatorSettingsFile()).get();
+            for (var listener : eventListeners) {
+                listener.settingsChanged();
+            }
+        } catch (ExecutionException e) {
+            logger.error("Error processing operator settings json file", e.getCause());
+        }
+    }
+
     synchronized void stopWatcher() {
         if (watching()) {
             logger.debug("stopping watcher ...");
@@ -404,52 +400,27 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
         } while (true);
     }
 
-    CompletableFuture<Void> processFileSettings(Path path) {
-        CompletableFuture<Void> completion = new CompletableFuture<>();
+    PlainActionFuture<Void> processFileSettings(Path path) {
+        PlainActionFuture<Void> completion = PlainActionFuture.newFuture();
         logger.info("processing path [{}] for [{}]", path, NAMESPACE);
         try (
             var fis = Files.newInputStream(path);
             var bis = new BufferedInputStream(fis);
             var parser = JSON.xContent().createParser(XContentParserConfiguration.EMPTY, bis)
         ) {
-            ReservedStateChunk parsedState = stateService.parse(NAMESPACE, parser);
-            if (nodeInfosRefreshRequired || nodesInfoResponse == null) {
-                var nodesInfoRequest = NodesInfoRequest.requestWithMetrics(NodesInfoRequest.Metric.INGEST);
-
-                clusterAdminClient().nodesInfo(nodesInfoRequest, new ActionListener<>() {
-                    @Override
-                    public void onResponse(NodesInfoResponse response) {
-                        // stash the latest node infos response and continue with processing the file
-                        nodesInfoResponse = response;
-                        nodeInfosRefreshRequired = false;
-                        stateService.process(NAMESPACE, parsedState, (e) -> completeProcessing(e, completion));
-                    }
-
-                    @Override
-                    public void onFailure(Exception e) {
-                        completion.completeExceptionally(e);
-                    }
-                });
-            } else {
-                stateService.process(NAMESPACE, parsedState, (e) -> completeProcessing(e, completion));
-            }
+            stateService.process(NAMESPACE, parser, (e) -> completeProcessing(e, completion));
         } catch (Exception e) {
-            completion.completeExceptionally(e);
+            completion.onFailure(e);
         }
 
         return completion;
     }
 
-    // package private for testing, separate method so that it can be mocked in tests
-    ClusterAdminClient clusterAdminClient() {
-        return nodeClient.admin().cluster();
-    }
-
-    private void completeProcessing(Exception e, CompletableFuture<Void> completion) {
+    private void completeProcessing(Exception e, PlainActionFuture<Void> completion) {
         if (e != null) {
-            completion.completeExceptionally(e);
+            completion.onFailure(e);
         } else {
-            completion.complete(null);
+            completion.onResponse(null);
         }
     }
 
@@ -459,23 +430,7 @@ public class FileSettingsService extends AbstractLifecycleComponent implements C
      */
     record FileUpdateState(long timestamp, String path, Object fileKey) {}
 
-    /**
-     * Error subclass that is thrown when we encounter a fatal error while applying
-     * the operator cluster state at Elasticsearch boot time.
-     */
-    public static class FileSettingsStartupException extends RuntimeException {
-        public FileSettingsStartupException(String message, Throwable t) {
-            super(message, t);
-        }
-    }
-
-    void checkForNodeChanges(ClusterChangedEvent event) {
-        if (currentNodeMaster(event.state()) && event.nodesChanged()) {
-            nodeInfosRefreshRequired = true;
-        }
-    }
-
-    public NodesInfoResponse nodeInfos() {
-        return nodesInfoResponse;
+    public void addFileSettingsChangedListener(FileSettingsChangedListener listener) {
+        eventListeners.add(listener);
     }
 }

+ 2 - 42
server/src/test/java/org/elasticsearch/action/ingest/ReservedPipelineActionTests.java

@@ -13,7 +13,6 @@ import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
 import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
 import org.elasticsearch.client.internal.Client;
-import org.elasticsearch.client.internal.node.NodeClient;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -47,7 +46,6 @@ import static java.util.Collections.emptySet;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.empty;
 import static org.mockito.ArgumentMatchers.anyString;
-import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
@@ -130,15 +128,8 @@ public class ReservedPipelineActionTests extends ESTestCase {
         );
 
         fileSettingsService = spy(
-            new FileSettingsService(
-                clusterService,
-                mock(ReservedClusterStateService.class),
-                newEnvironment(Settings.EMPTY),
-                mock(NodeClient.class)
-            )
+            new FileSettingsService(clusterService, mock(ReservedClusterStateService.class), newEnvironment(Settings.EMPTY))
         );
-
-        doReturn(response).when(fileSettingsService).nodeInfos();
     }
 
     private TransformState processJSON(ReservedPipelineAction action, TransformState prevState, String json) throws Exception {
@@ -147,36 +138,10 @@ public class ReservedPipelineActionTests extends ESTestCase {
         }
     }
 
-    public void testValidation() throws Exception {
-        ClusterState state = ClusterState.builder(new ClusterName("elasticsearch")).build();
-        TransformState prevState = new TransformState(state, Collections.emptySet());
-        ReservedPipelineAction action = makeSpiedAction();
-
-        String badPolicyJSON = """
-            {
-               "my_ingest_pipeline": {
-                   "description": "_description",
-                   "processors": [
-                      {
-                        "foo" : {
-                          "field": "pipeline",
-                          "value": "pipeline"
-                        }
-                      }
-                   ]
-               }
-            }""";
-
-        assertEquals(
-            "Error processing ingest pipelines",
-            expectThrows(IllegalArgumentException.class, () -> processJSON(action, prevState, badPolicyJSON)).getMessage()
-        );
-    }
-
     public void testAddRemoveIngestPipeline() throws Exception {
         ClusterState state = ClusterState.builder(new ClusterName("elasticsearch")).build();
         TransformState prevState = new TransformState(state, Collections.emptySet());
-        ReservedPipelineAction action = makeSpiedAction();
+        ReservedPipelineAction action = new ReservedPipelineAction();
 
         String emptyJSON = "";
 
@@ -235,9 +200,4 @@ public class ReservedPipelineActionTests extends ESTestCase {
         updatedState = processJSON(action, prevState, emptyJSON);
         assertThat(updatedState.keys(), empty());
     }
-
-    @SuppressWarnings("unchecked")
-    private ReservedPipelineAction makeSpiedAction() {
-        return spy(new ReservedPipelineAction(ingestService, fileSettingsService));
-    }
 }

+ 1 - 0
server/src/test/java/org/elasticsearch/readiness/ReadinessServiceTests.java

@@ -239,6 +239,7 @@ public class ReadinessServiceTests extends ESTestCase implements ReadinessClient
             .build();
         ClusterChangedEvent event = new ClusterChangedEvent("test", newState, previousState);
         readinessService.clusterChanged(event);
+        readinessService.settingsChanged();
 
         // sending a cluster state with active master should bring up the service
         assertTrue(readinessService.ready());

+ 61 - 194
server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java

@@ -8,14 +8,7 @@
 
 package org.elasticsearch.reservedstate.service;
 
-import org.elasticsearch.Build;
 import org.elasticsearch.Version;
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
-import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
-import org.elasticsearch.client.internal.ClusterAdminClient;
-import org.elasticsearch.client.internal.node.NodeClient;
-import org.elasticsearch.cluster.ClusterChangedEvent;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -25,12 +18,11 @@ import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.ClusterSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
-import org.elasticsearch.ingest.IngestInfo;
-import org.elasticsearch.ingest.ProcessorInfo;
 import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.threadpool.TestThreadPool;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.xcontent.XContentParser;
 import org.junit.After;
 import org.junit.Before;
 import org.mockito.Mockito;
@@ -52,16 +44,11 @@ import java.util.List;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Consumer;
 
-import static java.util.Collections.emptyMap;
-import static java.util.Collections.emptySet;
-import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.hasToString;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyInt;
-import static org.mockito.Mockito.clearInvocations;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
@@ -75,9 +62,6 @@ public class FileSettingsServiceTests extends ESTestCase {
     private FileSettingsService fileSettingsService;
     private ReservedClusterStateService controller;
     private ThreadPool threadpool;
-    private NodeClient nodeClient;
-    private ClusterAdminClient clusterAdminClient;
-    private NodeInfo nodeInfo;
 
     @Before
     @SuppressWarnings("unchecked")
@@ -109,42 +93,7 @@ public class FileSettingsServiceTests extends ESTestCase {
         ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
 
         controller = new ReservedClusterStateService(clusterService, List.of(new ReservedClusterSettingsAction(clusterSettings)));
-
-        DiscoveryNode discoveryNode = new DiscoveryNode(
-            "_node_id",
-            buildNewFakeTransportAddress(),
-            emptyMap(),
-            emptySet(),
-            Version.CURRENT
-        );
-
-        nodeInfo = new NodeInfo(
-            Version.CURRENT,
-            Build.CURRENT,
-            discoveryNode,
-            Settings.EMPTY,
-            null,
-            null,
-            null,
-            null,
-            null,
-            null,
-            null,
-            new IngestInfo(Collections.singletonList(new ProcessorInfo("set"))),
-            null,
-            null
-        );
-        NodesInfoResponse response = new NodesInfoResponse(new ClusterName("elasticsearch"), List.of(nodeInfo), List.of());
-
-        clusterAdminClient = mock(ClusterAdminClient.class);
-        doAnswer(i -> {
-            ((ActionListener<NodesInfoResponse>) i.getArgument(1)).onResponse(response);
-            return null;
-        }).when(clusterAdminClient).nodesInfo(any(), any());
-
-        nodeClient = mock(NodeClient.class);
-        fileSettingsService = spy(new FileSettingsService(clusterService, controller, env, nodeClient));
-        doAnswer(i -> clusterAdminClient).when(fileSettingsService).clusterAdminClient();
+        fileSettingsService = spy(new FileSettingsService(clusterService, controller, env));
     }
 
     @After
@@ -188,6 +137,7 @@ public class FileSettingsServiceTests extends ESTestCase {
 
     public void testStartStop() {
         fileSettingsService.start();
+        fileSettingsService.startWatcher(clusterService.state());
         assertTrue(fileSettingsService.watching());
         fileSettingsService.stop();
         assertFalse(fileSettingsService.watching());
@@ -204,6 +154,7 @@ public class FileSettingsServiceTests extends ESTestCase {
         }).when(service).processFileSettings(any());
 
         service.start();
+        service.startWatcher(clusterService.state());
         assertTrue(service.watching());
 
         Files.createDirectories(service.operatorSettingsDir());
@@ -214,8 +165,8 @@ public class FileSettingsServiceTests extends ESTestCase {
         // on Linux is instantaneous. Windows is instantaneous too.
         processFileLatch.await(30, TimeUnit.SECONDS);
 
-        verify(service, Mockito.atLeast(1)).watchedFileChanged(any());
-        verify(service, times(1)).processFileSettings(any());
+        verify(service, Mockito.atLeast(1)).processSettingsAndNotifyListeners();
+        verify(service, Mockito.atLeast(1)).processFileSettings(any());
 
         service.stop();
         assertFalse(service.watching());
@@ -223,50 +174,81 @@ public class FileSettingsServiceTests extends ESTestCase {
     }
 
     @SuppressWarnings("unchecked")
-    public void testInitialFile() throws Exception {
+    public void testInitialFileError() throws Exception {
         ReservedClusterStateService stateService = mock(ReservedClusterStateService.class);
 
         doAnswer((Answer<Void>) invocation -> {
             ((Consumer<Exception>) invocation.getArgument(2)).accept(new IllegalStateException("Some exception"));
             return null;
-        }).when(stateService).process(any(), (ReservedStateChunk) any(), any());
+        }).when(stateService).process(any(), (XContentParser) any(), any());
 
-        FileSettingsService service = spy(new FileSettingsService(clusterService, stateService, env, nodeClient));
-        doAnswer(i -> clusterAdminClient).when(service).clusterAdminClient();
+        AtomicBoolean settingsChanged = new AtomicBoolean(false);
+        CountDownLatch latch = new CountDownLatch(1);
 
-        Files.createDirectories(service.operatorSettingsDir());
+        final FileSettingsService service = spy(new FileSettingsService(clusterService, stateService, env));
+
+        service.addFileSettingsChangedListener(() -> settingsChanged.set(true));
+
+        doAnswer((Answer<Void>) invocation -> {
+            invocation.callRealMethod();
+            latch.countDown();
+            return null;
+        }).when(service).processSettingsAndNotifyListeners();
 
+        Files.createDirectories(service.operatorSettingsDir());
         // contents of the JSON don't matter, we just need a file to exist
         writeTestFile(service.operatorSettingsFile(), "{}");
 
-        Exception startupException = expectThrows(IllegalStateException.class, () -> service.start());
-        assertThat(
-            startupException.getCause(),
-            allOf(
-                instanceOf(FileSettingsService.FileSettingsStartupException.class),
-                hasToString(
-                    "org.elasticsearch.reservedstate.service.FileSettingsService$FileSettingsStartupException: "
-                        + "Error applying operator settings"
-                )
-            )
-        );
+        service.start();
+        service.startWatcher(clusterService.state());
+
+        // wait until the watcher thread has started, and it has discovered the file
+        assertTrue(latch.await(20, TimeUnit.SECONDS));
 
         verify(service, times(1)).processFileSettings(any());
+        // assert we never notified any listeners of successful application of file based settings
+        assertFalse(settingsChanged.get());
 
         service.stop();
+        service.close();
+    }
 
-        clearInvocations(service);
+    @SuppressWarnings("unchecked")
+    public void testInitialFileWorks() throws Exception {
+        ReservedClusterStateService stateService = mock(ReservedClusterStateService.class);
 
         // Let's check that if we didn't throw an error that everything works
         doAnswer((Answer<Void>) invocation -> {
             ((Consumer<Exception>) invocation.getArgument(2)).accept(null);
             return null;
-        }).when(stateService).process(any(), (ReservedStateChunk) any(), any());
+        }).when(stateService).process(any(), (XContentParser) any(), any());
+
+        AtomicBoolean settingsChanged = new AtomicBoolean(false);
+        CountDownLatch latch = new CountDownLatch(1);
+
+        final FileSettingsService service = spy(new FileSettingsService(clusterService, stateService, env));
+
+        service.addFileSettingsChangedListener(() -> settingsChanged.set(true));
+
+        doAnswer((Answer<Void>) invocation -> {
+            invocation.callRealMethod();
+            latch.countDown();
+            return null;
+        }).when(service).processSettingsAndNotifyListeners();
+
+        Files.createDirectories(service.operatorSettingsDir());
+        // contents of the JSON don't matter, we just need a file to exist
+        writeTestFile(service.operatorSettingsFile(), "{}");
 
         service.start();
-        service.startWatcher(clusterService.state(), true);
+        service.startWatcher(clusterService.state());
+
+        // wait until the watcher thread has started, and it has discovered the file
+        assertTrue(latch.await(20, TimeUnit.SECONDS));
 
         verify(service, times(1)).processFileSettings(any());
+        // assert we notified the listeners the file settings have changed, they were successfully applied
+        assertTrue(settingsChanged.get());
 
         service.stop();
         service.close();
@@ -275,9 +257,8 @@ public class FileSettingsServiceTests extends ESTestCase {
     @SuppressWarnings("unchecked")
     public void testStopWorksInMiddleOfProcessing() throws Exception {
         var spiedController = spy(controller);
-        var fsService = new FileSettingsService(clusterService, spiedController, env, nodeClient);
+        var fsService = new FileSettingsService(clusterService, spiedController, env);
         FileSettingsService service = spy(fsService);
-        doAnswer(i -> clusterAdminClient).when(service).clusterAdminClient();
 
         CountDownLatch processFileLatch = new CountDownLatch(1);
         CountDownLatch deadThreadLatch = new CountDownLatch(1);
@@ -297,6 +278,7 @@ public class FileSettingsServiceTests extends ESTestCase {
         }).when(spiedController).parse(any(String.class), any());
 
         service.start();
+        service.startWatcher(clusterService.state());
         assertTrue(service.watching());
 
         Files.createDirectories(service.operatorSettingsDir());
@@ -319,10 +301,8 @@ public class FileSettingsServiceTests extends ESTestCase {
     @SuppressWarnings("unchecked")
     public void testStopWorksIfProcessingDidntReturnYet() throws Exception {
         var spiedController = spy(controller);
-        var fsService = new FileSettingsService(clusterService, spiedController, env, nodeClient);
+        var service = new FileSettingsService(clusterService, spiedController, env);
 
-        FileSettingsService service = spy(fsService);
-        doAnswer(i -> clusterAdminClient).when(service).clusterAdminClient();
         CountDownLatch processFileLatch = new CountDownLatch(1);
         CountDownLatch deadThreadLatch = new CountDownLatch(1);
 
@@ -343,6 +323,7 @@ public class FileSettingsServiceTests extends ESTestCase {
         }).when(spiedController).parse(any(String.class), any());
 
         service.start();
+        service.startWatcher(clusterService.state());
         assertTrue(service.watching());
 
         Files.createDirectories(service.operatorSettingsDir());
@@ -362,120 +343,6 @@ public class FileSettingsServiceTests extends ESTestCase {
         deadThreadLatch.countDown();
     }
 
-    @SuppressWarnings("unchecked")
-    public void testNodeInfosRefresh() throws Exception {
-        var spiedController = spy(controller);
-        var csAdminClient = spy(clusterAdminClient);
-        var response = new NodesInfoResponse(new ClusterName("elasticsearch"), List.of(nodeInfo), List.of());
-
-        doAnswer(i -> {
-            ((ActionListener<NodesInfoResponse>) i.getArgument(1)).onResponse(response);
-            return null;
-        }).when(csAdminClient).nodesInfo(any(), any());
-
-        var service = spy(new FileSettingsService(clusterService, spiedController, env, nodeClient));
-        doAnswer(i -> csAdminClient).when(service).clusterAdminClient();
-
-        doAnswer(
-            (Answer<ReservedStateChunk>) invocation -> new ReservedStateChunk(
-                Collections.emptyMap(),
-                new ReservedStateVersion(1L, Version.CURRENT)
-            )
-        ).when(spiedController).parse(any(String.class), any());
-
-        Files.createDirectories(service.operatorSettingsDir());
-        // Make some fake settings file to cause the file settings service to process it
-        writeTestFile(service.operatorSettingsFile(), "{}");
-
-        clearInvocations(csAdminClient);
-        clearInvocations(spiedController);
-
-        // we haven't fetched the node infos ever, since we haven't done any file processing
-        assertNull(service.nodeInfos());
-
-        // call the processing twice
-        service.processFileSettings(service.operatorSettingsFile()).whenComplete((o, e) -> {
-            if (e != null) {
-                fail("shouldn't get an exception");
-            }
-        });
-        // after the first processing we should have node infos
-        assertEquals(1, service.nodeInfos().getNodes().size());
-
-        service.processFileSettings(service.operatorSettingsFile()).whenComplete((o, e) -> {
-            if (e != null) {
-                fail("shouldn't get an exception");
-            }
-        });
-
-        // node infos should have been fetched only once
-        verify(csAdminClient, times(1)).nodesInfo(any(), any());
-        verify(spiedController, times(2)).process(any(), any(ReservedStateChunk.class), any());
-
-        // pretend we added a new node
-
-        final DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), Version.CURRENT);
-
-        NodeInfo localNodeInfo = new NodeInfo(
-            Version.CURRENT,
-            Build.CURRENT,
-            localNode,
-            Settings.EMPTY,
-            null,
-            null,
-            null,
-            null,
-            null,
-            null,
-            null,
-            new IngestInfo(Collections.singletonList(new ProcessorInfo("set"))),
-            null,
-            null
-        );
-        var newResponse = new NodesInfoResponse(new ClusterName("elasticsearch"), List.of(nodeInfo, localNodeInfo), List.of());
-
-        final ClusterState prevState = clusterService.state();
-        final ClusterState clusterState = ClusterState.builder(prevState)
-            .nodes(
-                DiscoveryNodes.builder(prevState.getNodes()).add(localNode).localNodeId(localNode.getId()).masterNodeId(localNode.getId())
-            )
-            .build();
-
-        ClusterChangedEvent event = new ClusterChangedEvent("transport", clusterState, prevState);
-        assertTrue(event.nodesChanged());
-        service.clusterChanged(event);
-
-        doAnswer(i -> {
-            ((ActionListener<NodesInfoResponse>) i.getArgument(1)).onResponse(newResponse);
-            return null;
-        }).when(csAdminClient).nodesInfo(any(), any());
-
-        // this wouldn't change yet, node fetch transport action is invoked on demand, when we need to process file changes,
-        // not every time we update the cluster state
-        assertEquals(1, service.nodeInfos().getNodes().size());
-
-        // call the processing twice
-        service.processFileSettings(service.operatorSettingsFile()).whenComplete((o, e) -> {
-            if (e != null) {
-                fail("shouldn't get an exception");
-            }
-        });
-
-        assertEquals(2, service.nodeInfos().getNodes().size());
-
-        service.processFileSettings(service.operatorSettingsFile()).whenComplete((o, e) -> {
-            if (e != null) {
-                fail("shouldn't get an exception");
-            }
-        });
-
-        assertEquals(2, service.nodeInfos().getNodes().size());
-
-        // node infos should have been fetched one more time
-        verify(csAdminClient, times(2)).nodesInfo(any(), any());
-        verify(spiedController, times(4)).process(any(), any(ReservedStateChunk.class), any());
-    }
-
     public void testRegisterWatchKeyRetry() throws IOException, InterruptedException {
         var service = spy(fileSettingsService);
         doAnswer(i -> 0L).when(service).retryDelayMillis(anyInt());

+ 0 - 7
test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

@@ -21,7 +21,6 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.ExceptionsHelper;
-import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionFuture;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.DocWriteResponse;
@@ -186,7 +185,6 @@ import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_SEED_PROVIDE
 import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
 import static org.elasticsearch.index.IndexSettings.INDEX_SOFT_DELETES_RETENTION_LEASE_PERIOD_SETTING;
 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
-import static org.elasticsearch.node.Node.INITIAL_STATE_TIMEOUT_SETTING;
 import static org.elasticsearch.test.XContentTestUtils.convertToMap;
 import static org.elasticsearch.test.XContentTestUtils.differenceBetweenMapsIgnoringArrayOrder;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@@ -1917,11 +1915,6 @@ public abstract class ESIntegTestCase extends ESTestCase {
         return builder.build();
     }
 
-    protected static Settings applyWorkaroundForIssue92812(Settings settings) {
-        assertTrue("this setting hides a blocking bug, we must remove it ASAP", Version.CURRENT.onOrBefore(Version.V_8_7_0));
-        return Settings.builder().put(settings).put(INITIAL_STATE_TIMEOUT_SETTING.getKey(), "30s").build();
-    }
-
     protected Path nodeConfigPath(int nodeOrdinal) {
         return null;
     }

+ 0 - 6
x-pack/plugin/autoscaling/src/internalClusterTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingFileSettingsIT.java

@@ -15,7 +15,6 @@ import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.reservedstate.service.FileSettingsService;
@@ -45,11 +44,6 @@ import static org.hamcrest.Matchers.notNullValue;
  */
 public class AutoscalingFileSettingsIT extends AutoscalingIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String testJSON = """

+ 0 - 1
x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMFileSettingsIT.java

@@ -148,7 +148,6 @@ public class SLMFileSettingsIT extends AbstractSnapshotIntegTestCase {
     @Override
     protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
         return Settings.builder()
-            .put(applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings)))
             .put(LifecycleSettings.LIFECYCLE_HISTORY_INDEX_ENABLED, false)
             .put(LifecycleSettings.SLM_HISTORY_INDEX_ENABLED, false)
             .build();

+ 0 - 5
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java

@@ -62,11 +62,6 @@ import static org.hamcrest.Matchers.notNullValue;
  */
 public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String emptyJSON = """

+ 0 - 6
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java

@@ -13,7 +13,6 @@ import org.elasticsearch.cluster.ClusterStateListener;
 import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.reservedstate.service.FileSettingsService;
@@ -40,11 +39,6 @@ import static org.hamcrest.Matchers.notNullValue;
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
 public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
-
     private static AtomicLong versionCounter = new AtomicLong(1);
 
     private static String testJSONOnlyRoleMappings = """

+ 50 - 18
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsStartupIT.java

@@ -8,14 +8,20 @@
 package org.elasticsearch.xpack.security;
 
 import org.elasticsearch.analysis.common.CommonAnalysisPlugin;
-import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.cluster.ClusterChangedEvent;
+import org.elasticsearch.cluster.ClusterStateListener;
+import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
+import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
+import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.core.Strings;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.reindex.ReindexPlugin;
 import org.elasticsearch.reservedstate.service.FileSettingsService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.InternalSettingsPlugin;
+import org.elasticsearch.test.SecurityIntegTestCase;
 import org.elasticsearch.transport.netty4.Netty4Plugin;
 
 import java.nio.charset.StandardCharsets;
@@ -24,17 +30,17 @@ import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
-import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
-public class FileSettingsRoleMappingsStartupIT extends ESIntegTestCase {
-
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
-        return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
-    }
+public class FileSettingsRoleMappingsStartupIT extends SecurityIntegTestCase {
 
     private static AtomicLong versionCounter = new AtomicLong(1);
     private static String testJSONForFailedCase = """
@@ -58,6 +64,14 @@ public class FileSettingsRoleMappingsStartupIT extends ESIntegTestCase {
              }
         }""";
 
+    @Override
+    protected void doAssertXPackIsInstalled() {}
+
+    @Override
+    protected Path nodeConfigPath(int nodeOrdinal) {
+        return null;
+    }
+
     private void writeJSONFile(String node, String json) throws Exception {
         long version = versionCounter.incrementAndGet();
 
@@ -74,20 +88,38 @@ public class FileSettingsRoleMappingsStartupIT extends ESIntegTestCase {
         Files.move(tempFilePath, fileSettingsService.operatorSettingsFile(), StandardCopyOption.ATOMIC_MOVE);
     }
 
+    private Tuple<CountDownLatch, AtomicLong> setupClusterStateListenerForError(String node) {
+        ClusterService clusterService = internalCluster().clusterService(node);
+        CountDownLatch savedClusterState = new CountDownLatch(1);
+        AtomicLong metadataVersion = new AtomicLong(-1);
+        clusterService.addListener(new ClusterStateListener() {
+            @Override
+            public void clusterChanged(ClusterChangedEvent event) {
+                ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
+                if (reservedState != null && reservedState.errorMetadata() != null) {
+                    assertEquals(ReservedStateErrorMetadata.ErrorKind.VALIDATION, reservedState.errorMetadata().errorKind());
+                    assertThat(reservedState.errorMetadata().errors(), allOf(notNullValue(), hasSize(1)));
+                    assertThat(reservedState.errorMetadata().errors().get(0), containsString("Fake exception"));
+                    clusterService.removeListener(this);
+                    metadataVersion.set(event.state().metadata().version());
+                    savedClusterState.countDown();
+                }
+            }
+        });
+
+        return new Tuple<>(savedClusterState, metadataVersion);
+    }
+
     public void testFailsOnStartMasterNodeWithError() throws Exception {
         internalCluster().setBootstrapMasterNodeIndex(0);
 
-        String dataNode = internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
+        internalCluster().startMasterOnlyNode();
         logger.info("--> write some role mappings, no other file settings");
-        writeJSONFile(dataNode, testJSONForFailedCase);
-
-        logger.info("--> stop data node");
-        internalCluster().stopNode(dataNode);
-        logger.info("--> start master node");
-        assertEquals(
-            "unable to launch a new watch service",
-            expectThrows(IllegalStateException.class, () -> internalCluster().startMasterOnlyNode()).getMessage()
-        );
+        writeJSONFile(internalCluster().getMasterName(), testJSONForFailedCase);
+        var savedClusterState = setupClusterStateListenerForError(internalCluster().getMasterName());
+
+        boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
+        assertTrue(awaitSuccessful);
     }
 
     public Collection<Class<? extends Plugin>> nodePlugins() {