Преглед на файлове

Use retry logic and real file system in file settings ITs (#116392) (#116709)

Several file-settings ITs fail (rarely) with exceptions like:

```
java.nio.file.AccessDeniedException: C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempFile-001.tmp -> C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempDir-002\config\operator\settings.json |  

at sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:89) |  
-- | --
  |   | at sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) |  
  |   | at sun.nio.fs.WindowsFileCopy.move(WindowsFileCopy.java:317) |  
  |   | at sun.nio.fs.WindowsFileSystemProvider.move(WindowsFileSystemProvider.java:293) |  
  |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |  
  |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |  
  |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |  
  |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |  
  |   | at java.nio.file.Files.move(Files.java:1430) |  
  |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.writeJSONFile(SnaphotsAndFileSettingsIT.java:86) |  
  |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.testRestoreWithPersistedFileSettings(SnaphotsAndFileSettingsIT.java:321)
```

This happens in Windows file systems, due to a race condition where the
file settings service is reading the settings file concurrently with the
test trying to modify it (a no-go in Windows). It turns out we have
already addressed this with a retry for one test suite
(https://github.com/elastic/elasticsearch/pull/91863), plus addressed a
related issue around mock windows file-systems misbehaving
(https://github.com/elastic/elasticsearch/pull/92653).

This PR extends the above fixes to all file-settings related ITs.

(cherry picked from commit 91559da015abc4fd5851eb768d3af8884efa9c7c)
Nikolaj Volgushev преди 11 месеца
родител
ревизия
7668eee283

+ 3 - 13
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java

@@ -9,6 +9,7 @@
 
 package org.elasticsearch.reservedstate.service;
 
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
 import org.elasticsearch.action.admin.indices.template.get.GetComponentTemplateAction;
@@ -26,16 +27,12 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 
 import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
@@ -54,6 +51,7 @@ import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
+@LuceneTestCase.SuppressFileSystems("*")
 public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase {
 
     private static AtomicLong versionCounter = new AtomicLong(1);
@@ -359,15 +357,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase {
     }
 
     private void writeJSONFile(String node, String json) throws Exception {
-        long version = versionCounter.incrementAndGet();
-
-        FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
-
-        Files.createDirectories(fileSettingsService.watchedFileDir());
-        Path tempFilePath = createTempFile();
-
-        Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
-        Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
+        FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
     }
 
     private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

+ 39 - 28
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java

@@ -10,6 +10,7 @@
 package org.elasticsearch.reservedstate.service;
 
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -20,6 +21,7 @@ import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
@@ -27,7 +29,7 @@ import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.junit.Before;
 
-import java.nio.charset.StandardCharsets;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
@@ -50,6 +52,7 @@ import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.nullValue;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
+@LuceneTestCase.SuppressFileSystems("*")
 public class FileSettingsServiceIT extends ESIntegTestCase {
 
     private final AtomicLong versionCounter = new AtomicLong(1);
@@ -129,29 +132,37 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         );
     }
 
-    public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger, boolean incrementVersion)
-        throws Exception {
-        long version = incrementVersion ? versionCounter.incrementAndGet() : versionCounter.get();
-
+    public static void writeJSONFile(String node, String json, Logger logger, Long version) throws Exception {
         FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
 
         Files.createDirectories(fileSettingsService.watchedFileDir());
         Path tempFilePath = createTempFile();
 
-        String settingsFileContent = Strings.format(json, version);
-        Files.write(tempFilePath, settingsFileContent.getBytes(StandardCharsets.UTF_8));
-        logger.info("--> Before writing new settings file with version [{}]", version);
-        Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
-        logger.info("--> After writing new settings file: [{}]", settingsFileContent);
-    }
-
-    public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger) throws Exception {
-        writeJSONFile(node, json, versionCounter, logger, true);
+        String jsonWithVersion = Strings.format(json, version);
+        logger.info("--> before writing JSON config to node {} with path {}", node, tempFilePath);
+        logger.info(jsonWithVersion);
+
+        Files.writeString(tempFilePath, jsonWithVersion);
+        int retryCount = 0;
+        do {
+            try {
+                // this can fail on Windows because of timing
+                Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
+                logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath);
+                return;
+            } catch (IOException e) {
+                logger.info("--> retrying writing a settings file [{}]", retryCount);
+                if (retryCount == 4) { // retry 5 times
+                    throw e;
+                }
+                Thread.sleep(retryDelay(retryCount));
+                retryCount++;
+            }
+        } while (true);
     }
 
-    public static void writeJSONFileWithoutVersionIncrement(String node, String json, AtomicLong versionCounter, Logger logger)
-        throws Exception {
-        writeJSONFile(node, json, versionCounter, logger, false);
+    private static long retryDelay(int retryCount) {
+        return 100 * (1 << retryCount) + Randomness.get().nextInt(10);
     }
 
     private Tuple<CountDownLatch, AtomicLong> setupCleanupClusterStateListener(String node) {
@@ -245,7 +256,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertTrue(masterFileSettingsService.watching());
         assertFalse(dataFileSettingsService.watching());
 
-        writeJSONFile(masterNode, testJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
         assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
     }
 
@@ -260,7 +271,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
 
         // In internal cluster tests, the nodes share the config directory, so when we write with the data node path
         // the master will pick it up on start
-        writeJSONFile(dataNode, testJSON, versionCounter, logger);
+        writeJSONFile(dataNode, testJSON, logger, versionCounter.incrementAndGet());
 
         logger.info("--> start master node");
         final String masterNode = internalCluster().startMasterOnlyNode();
@@ -288,7 +299,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertBusy(() -> assertTrue(masterFileSettingsService.watching()));
 
         logger.info("--> write some settings");
-        writeJSONFile(masterNode, testJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
         assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
 
         logger.info("--> restart master");
@@ -366,7 +377,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertTrue(masterFileSettingsService.watching());
         assertFalse(dataFileSettingsService.watching());
 
-        writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
         assertClusterStateNotSaved(savedClusterState.v1(), savedClusterState.v2());
     }
 
@@ -390,14 +401,14 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertTrue(masterFileSettingsService.watching());
         assertFalse(dataFileSettingsService.watching());
 
-        writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
         AtomicLong metadataVersion = savedClusterState.v2();
         assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion);
         assertHasErrors(metadataVersion, "not_cluster_settings");
 
         // write valid json without version increment to simulate ES being able to process settings after a restart (usually, this would be
         // due to a code change)
-        writeJSONFileWithoutVersionIncrement(masterNode, testJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testJSON, logger, versionCounter.get());
         internalCluster().restartNode(masterNode);
         ensureGreen();
 
@@ -426,14 +437,14 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         assertTrue(masterFileSettingsService.watching());
         assertFalse(dataFileSettingsService.watching());
 
-        writeJSONFile(masterNode, testErrorJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet());
         AtomicLong metadataVersion = savedClusterState.v2();
         assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion);
         assertHasErrors(metadataVersion, "not_cluster_settings");
 
         // write json with new error without version increment to simulate ES failing to process settings after a restart for a new reason
         // (usually, this would be due to a code change)
-        writeJSONFileWithoutVersionIncrement(masterNode, testOtherErrorJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testOtherErrorJSON, logger, versionCounter.get());
         assertHasErrors(metadataVersion, "not_cluster_settings");
         internalCluster().restartNode(masterNode);
         ensureGreen();
@@ -461,7 +472,7 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
 
         assertTrue(masterFileSettingsService.watching());
 
-        writeJSONFile(masterNode, testJSON, versionCounter, logger);
+        writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet());
         assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb");
 
         internalCluster().stopCurrentMasterNode();
@@ -476,13 +487,13 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
         ensureStableCluster(3);
 
         savedClusterState = setupCleanupClusterStateListener(internalCluster().getMasterName());
-        writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, versionCounter, logger);
+        writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, logger, versionCounter.incrementAndGet());
 
         boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
         savedClusterState = setupClusterStateListener(internalCluster().getMasterName());
-        writeJSONFile(internalCluster().getMasterName(), testJSON43mb, versionCounter, logger);
+        writeJSONFile(internalCluster().getMasterName(), testJSON43mb, logger, versionCounter.incrementAndGet());
 
         assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "43mb");
     }

+ 3 - 13
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java

@@ -9,6 +9,7 @@
 
 package org.elasticsearch.reservedstate.service;
 
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesAction;
 import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest;
 import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest;
@@ -22,7 +23,6 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.repositories.RepositoryMissingException;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -30,9 +30,6 @@ import org.elasticsearch.xcontent.XContentParserConfiguration;
 
 import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
@@ -49,6 +46,7 @@ import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.notNullValue;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
+@LuceneTestCase.SuppressFileSystems("*")
 public class RepositoriesFileSettingsIT extends ESIntegTestCase {
     private static AtomicLong versionCounter = new AtomicLong(1);
 
@@ -102,15 +100,7 @@ public class RepositoriesFileSettingsIT extends ESIntegTestCase {
     }
 
     private void writeJSONFile(String node, String json) throws Exception {
-        long version = versionCounter.incrementAndGet();
-
-        FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
-
-        Files.createDirectories(fileSettingsService.watchedFileDir());
-        Path tempFilePath = createTempFile();
-
-        Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
-        Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
+        FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
     }
 
     private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

+ 1 - 33
server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java

@@ -19,9 +19,7 @@ import org.elasticsearch.cluster.InternalClusterInfoService;
 import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
-import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
@@ -29,11 +27,7 @@ import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase;
 import org.elasticsearch.snapshots.SnapshotState;
 import org.junit.After;
 
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
@@ -78,34 +72,8 @@ public class SnapshotsAndFileSettingsIT extends AbstractSnapshotIntegTestCase {
         awaitNoMoreRunningOperations();
     }
 
-    private long retryDelay(int retryCount) {
-        return 100 * (1 << retryCount) + Randomness.get().nextInt(10);
-    }
-
     private void writeJSONFile(String node, String json) throws Exception {
-        long version = versionCounter.incrementAndGet();
-
-        FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
-
-        Files.createDirectories(fileSettingsService.watchedFileDir());
-        Path tempFilePath = createTempFile();
-
-        Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
-        int retryCount = 0;
-        do {
-            try {
-                // this can fail on Windows because of timing
-                Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
-                return;
-            } catch (IOException e) {
-                logger.info("--> retrying writing a settings file [" + retryCount + "]");
-                if (retryCount == 4) { // retry 5 times
-                    throw e;
-                }
-                Thread.sleep(retryDelay(retryCount));
-                retryCount++;
-            }
-        } while (true);
+        FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet());
     }
 
     private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {

+ 41 - 28
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java

@@ -8,6 +8,7 @@
 package org.elasticsearch.integration;
 
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -20,6 +21,7 @@ import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
 import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.core.Tuple;
@@ -42,6 +44,7 @@ import org.elasticsearch.xpack.security.action.rolemapping.ReservedRoleMappingAc
 import org.junit.After;
 
 import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -73,6 +76,7 @@ import static org.mockito.Mockito.mock;
 /**
  * Tests that file settings service can properly add role mappings.
  */
+@LuceneTestCase.SuppressFileSystems("*")
 public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
 
     private static AtomicLong versionCounter = new AtomicLong(1);
@@ -154,32 +158,37 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
         updateClusterSettings(Settings.builder().putNull("indices.recovery.max_bytes_per_sec"));
     }
 
-    public static void writeJSONFile(String node, String json, Logger logger, AtomicLong versionCounter) throws Exception {
-        writeJSONFile(node, json, logger, versionCounter, true);
-    }
-
-    public static void writeJSONFileWithoutVersionIncrement(String node, String json, Logger logger, AtomicLong versionCounter)
-        throws Exception {
-        writeJSONFile(node, json, logger, versionCounter, false);
-    }
-
-    private static void writeJSONFile(String node, String json, Logger logger, AtomicLong versionCounter, boolean incrementVersion)
-        throws Exception {
-        long version = incrementVersion ? versionCounter.incrementAndGet() : versionCounter.get();
-
+    public static void writeJSONFile(String node, String json, Logger logger, Long version) throws Exception {
         FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
-        assertTrue(fileSettingsService.watching());
-
-        Files.deleteIfExists(fileSettingsService.watchedFile());
 
         Files.createDirectories(fileSettingsService.watchedFileDir());
         Path tempFilePath = createTempFile();
 
+        String jsonWithVersion = Strings.format(json, version);
         logger.info("--> before writing JSON config to node {} with path {}", node, tempFilePath);
-        logger.info(Strings.format(json, version));
-        Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
-        Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
-        logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath);
+        logger.info(jsonWithVersion);
+
+        Files.writeString(tempFilePath, jsonWithVersion);
+        int retryCount = 0;
+        do {
+            try {
+                // this can fail on Windows because of timing
+                Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE);
+                logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath);
+                return;
+            } catch (IOException e) {
+                logger.info("--> retrying writing a settings file [{}]", retryCount);
+                if (retryCount == 4) { // retry 5 times
+                    throw e;
+                }
+                Thread.sleep(retryDelay(retryCount));
+                retryCount++;
+            }
+        } while (true);
+    }
+
+    private static long retryDelay(int retryCount) {
+        return 100 * (1 << retryCount) + Randomness.get().nextInt(10);
     }
 
     public static Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node, String expectedKey) {
@@ -320,7 +329,7 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
         ensureGreen();
 
         var savedClusterState = setupClusterStateListener(internalCluster().getMasterName(), "everyone_kibana");
-        writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter);
+        writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter.incrementAndGet());
 
         assertRoleMappingsSaveOK(savedClusterState.v1(), savedClusterState.v2());
         logger.info("---> cleanup cluster settings...");
@@ -333,7 +342,7 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
 
         savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName());
 
-        writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter);
+        writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter.incrementAndGet());
         boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -373,7 +382,7 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
         }
 
         var savedClusterState = setupClusterStateListener(internalCluster().getMasterName(), "everyone_kibana");
-        writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter);
+        writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter.incrementAndGet());
         boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -415,7 +424,8 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
         );
 
         savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName());
-        writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter);
+        String node = internalCluster().getMasterName();
+        writeJSONFile(node, emptyJSON, logger, versionCounter.incrementAndGet());
         awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -465,7 +475,7 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
         // save an empty file to clear any prior state, this ensures we don't get a stale file left over by another test
         var savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName());
 
-        writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter);
+        writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter.incrementAndGet());
         boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -490,7 +500,8 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
             }
         );
 
-        writeJSONFile(internalCluster().getMasterName(), testErrorJSON, logger, versionCounter);
+        String node = internalCluster().getMasterName();
+        writeJSONFile(node, testErrorJSON, logger, versionCounter.incrementAndGet());
         awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
 
@@ -515,7 +526,8 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
             var closeIndexResponse = indicesAdmin().close(new CloseIndexRequest(INTERNAL_SECURITY_MAIN_INDEX_7)).get();
             assertTrue(closeIndexResponse.isAcknowledged());
 
-            writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter);
+            String node = internalCluster().getMasterName();
+            writeJSONFile(node, testJSON, logger, versionCounter.incrementAndGet());
             boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
             assertTrue(awaitSuccessful);
 
@@ -550,7 +562,8 @@ public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase {
             }
         } finally {
             savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName());
-            writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter);
+            String node = internalCluster().getMasterName();
+            writeJSONFile(node, emptyJSON, logger, versionCounter.incrementAndGet());
             boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
             assertTrue(awaitSuccessful);
 

+ 7 - 6
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java

@@ -7,9 +7,11 @@
 
 package org.elasticsearch.xpack.security;
 
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.core.Tuple;
+import org.elasticsearch.integration.RoleMappingFileSettingsIT;
 import org.elasticsearch.reservedstate.service.FileSettingsService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.SecurityIntegTestCase;
@@ -29,12 +31,11 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListener;
 import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListenerForCleanup;
-import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFile;
-import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFileWithoutVersionIncrement;
 import static org.elasticsearch.xpack.core.security.authz.RoleMappingMetadata.METADATA_NAME_FIELD;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
+@LuceneTestCase.SuppressFileSystems("*")
 public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
 
     private static final int MAX_WAIT_TIME_SECONDS = 20;
@@ -116,7 +117,7 @@ public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
 
         awaitFileSettingsWatcher();
         logger.info("--> write some role mappings, no other file settings");
-        writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter.incrementAndGet());
 
         assertRoleMappingsInClusterStateWithAwait(
             savedClusterState,
@@ -196,7 +197,7 @@ public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
         Tuple<CountDownLatch, AtomicLong> savedClusterState = setupClusterStateListener(masterNode, "everyone_kibana_alone");
         awaitFileSettingsWatcher();
         logger.info("--> write some role mappings, no other file settings");
-        writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter.incrementAndGet());
 
         assertRoleMappingsInClusterStateWithAwait(
             savedClusterState,
@@ -226,7 +227,7 @@ public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
         );
 
         // write without version increment and assert that change gets applied on restart
-        writeJSONFileWithoutVersionIncrement(masterNode, testJSONOnlyUpdatedRoleMappings, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyUpdatedRoleMappings, logger, versionCounter.get());
         logger.info("--> restart master");
         internalCluster().restartNode(masterNode);
         ensureGreen();
@@ -288,7 +289,7 @@ public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase {
         var savedClusterState = setupClusterStateListenerForCleanup(masterNode);
         awaitFileSettingsWatcher();
         logger.info("--> remove the role mappings with an empty settings file");
-        writeJSONFile(masterNode, emptyJSON, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, emptyJSON, logger, versionCounter.incrementAndGet());
         boolean awaitSuccessful = savedClusterState.v1().await(MAX_WAIT_TIME_SECONDS, TimeUnit.SECONDS);
         assertTrue(awaitSuccessful);
         // ensure cluster-state update got propagated to expected version

+ 7 - 7
x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/support/CleanupRoleMappingDuplicatesMigrationIT.java

@@ -14,6 +14,7 @@ import org.elasticsearch.cluster.ClusterStateListener;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.integration.RoleMappingFileSettingsIT;
 import org.elasticsearch.reservedstate.service.FileSettingsService;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.SecurityIntegTestCase;
@@ -40,7 +41,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
 import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListener;
-import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFile;
 import static org.elasticsearch.xpack.core.security.action.UpdateIndexMigrationVersionAction.MIGRATION_VERSION_CUSTOM_DATA_KEY;
 import static org.elasticsearch.xpack.core.security.action.UpdateIndexMigrationVersionAction.MIGRATION_VERSION_CUSTOM_KEY;
 import static org.elasticsearch.xpack.core.security.test.TestRestrictedIndices.INTERNAL_SECURITY_MAIN_INDEX_7;
@@ -138,7 +138,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         // Setup listener to wait for role mapping
         var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone");
         // Write role mappings
-        writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet());
         assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS));
         waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION);
 
@@ -170,7 +170,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         // Setup listener to wait for role mapping
         var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone");
         // Write role mappings with fallback name, this should block any security migration
-        writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet());
         assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS));
         waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION);
 
@@ -202,7 +202,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         // Setup listener to wait for role mapping
         var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone");
         // Write role mappings with fallback name, this should block any security migration
-        writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet());
         assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS));
         waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION);
 
@@ -228,7 +228,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         // Setup listener to wait for role mapping
         var nameNotAvailableListener = setupClusterStateListener(masterNode, "name_not_available_after_deserialization");
         // Write role mappings with fallback name, this should block any security migration
-        writeJSONFile(masterNode, TEST_JSON_WITH_FALLBACK_NAME, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_FALLBACK_NAME, logger, versionCounter.incrementAndGet());
         assertTrue(nameNotAvailableListener.v1().await(20, TimeUnit.SECONDS));
 
         // Create a native role mapping to create security index and trigger migration
@@ -249,7 +249,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         assertThat(status, equalTo(SecurityIndexManager.RoleMappingsCleanupMigrationStatus.NOT_READY));
 
         // Write file without fallback name in it to unblock migration
-        writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet());
         waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION);
     }
 
@@ -282,7 +282,7 @@ public class CleanupRoleMappingDuplicatesMigrationIT extends SecurityIntegTestCa
         // Setup listener to wait for any role mapping
         var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode);
         // Write role mappings
-        writeJSONFile(masterNode, TEST_JSON_WITH_EMPTY_ROLE_MAPPINGS, logger, versionCounter);
+        RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_EMPTY_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet());
         assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS));
 
         // Create a native role mapping to create security index and trigger migration (skipped initially)