Browse Source

Fix BlobStoreRepositoryTest Leaking a RepoData Write Operation (#68441)

We were leaking a repo data write operation here by never waiting on the future.
Since we normally don't invoke `writeIndexGen` manually there are no safety measures
around removing the repository from the cluster state and adding it back with a different
path concurrently, leading to a collision between repo generation CS updates since all tests
use the same repository name.
Fix the missing write and added some trace logging that was very helpful in tracking this down.

Closes #68437
Armin Braun 4 years ago
parent
commit
86005dedaf

+ 8 - 1
server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

@@ -1414,7 +1414,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                 };
                 threadPool.generic().execute(() -> doGetRepositoryData(
                         ActionListener.wrap(repoData -> clusterService.submitStateUpdateTask(
-                                "set safe repository generation [" + metadata.name() + "][" + repoData + "]",
+                                "set initial safe repository generation [" + metadata.name() + "][" + repoData.getGenId() + "]",
                                 new ClusterStateUpdateTask() {
                                     @Override
                                     public ClusterState execute(ClusterState currentState) {
@@ -1441,6 +1441,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
 
                                     @Override
                                     public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
+                                        logger.trace("[{}] initialized repository generation in cluster state to [{}]",
+                                                metadata.name(), repoData.getGenId());
                                         // Resolve listeners on generic pool since some callbacks for repository data do additional IO
                                         threadPool.generic().execute(() -> {
                                             final ActionListener<RepositoryData> existingListener;
@@ -1449,6 +1451,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                                                 repoDataInitialized = null;
                                             }
                                             existingListener.onResponse(repoData);
+                                            logger.trace("[{}] called listeners after initializing repository to generation [{}]"
+                                                    , metadata.name(), repoData.getGenId());
                                         });
                                     }
                                 }), onFailure)));
@@ -1719,6 +1723,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
      */
     protected void writeIndexGen(RepositoryData repositoryData, long expectedGen, Version version,
                                  Function<ClusterState, ClusterState> stateFilter, ActionListener<RepositoryData> listener) {
+        logger.trace("[{}] writing repository data on top of expected generation [{}]", metadata.name(), expectedGen);
         assert isReadOnly() == false; // can not write to a read only repository
         final long currentGen = repositoryData.getGenId();
         if (currentGen != expectedGen) {
@@ -1776,6 +1781,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
 
                 @Override
                 public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
+                    logger.trace("[{}] successfully set pending repository generation to [{}]", metadata.name(), newGen);
                     setPendingStep.onResponse(newGen);
                 }
             });
@@ -1872,6 +1878,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
 
                     @Override
                     public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
+                        logger.trace("[{}] successfully set safe repository generation to [{}]", metadata.name(), newGen);
                         cacheRepositoryData(repoDataToCache, newGen);
                         threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(listener, () -> {
                             // Delete all now outdated index files up to 1000 blobs back from the new generation.

+ 2 - 3
server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryTests.java

@@ -183,14 +183,13 @@ public class BlobStoreRepositoryTests extends ESSingleNodeTestCase {
         assertThat(repository.readSnapshotIndexLatestBlob(), equalTo(expectedGeneration + 2L));
     }
 
-    public void testRepositoryDataConcurrentModificationNotAllowed() {
+    public void testRepositoryDataConcurrentModificationNotAllowed() throws Exception {
         final BlobStoreRepository repository = setupRepo();
 
         // write to index generational file
         RepositoryData repositoryData = generateRandomRepoData();
         final long startingGeneration = repositoryData.getGenId();
-        final PlainActionFuture<RepositoryData> future = PlainActionFuture.newFuture();
-        repository.writeIndexGen(repositoryData, startingGeneration, Version.CURRENT, Function.identity(), future);
+        writeIndexGen(repository, repositoryData, startingGeneration);
 
         // write repo data again to index generational file, errors because we already wrote to the
         // N+1 generation from which this repository data instance was created