浏览代码

Do not fail snapshot when deleting a missing snapshotted file (#30332)

When deleting or creating a snapshot for a given shard, elasticsearch 
usually starts by listing all the existing snapshotted files in the repository. 
Then it computes a diff and deletes the snapshotted files that are not 
needed anymore. During this deletion, an exception is thrown if the file 
to be deleted does not exist anymore.

This behavior is challenging with cloud based repository implementations 
like S3 where a file that has been deleted can still appear in the bucket for 
few seconds/minutes (because the deletion can take some time to be fully 
replicated on S3). If the deleted file appears in the listing of files, then the 
following deletion will fail with a NoSuchFileException and the snapshot 
will be partially created/deleted.

This pull request makes the deletion of these files a bit less strict, ie not 
failing if the file we want to delete does not exist anymore. It introduces a 
new BlobContainer.deleteIgnoringIfNotExists() method that can be used 
at some specific places where not failing when deleting a file is 
considered harmless.

Closes #28322
Tanguy Leroux 7 年之前
父节点
当前提交
1987d6261f

+ 1 - 0
docs/CHANGELOG.asciidoc

@@ -117,6 +117,7 @@ Fail snapshot operations early when creating or deleting a snapshot on a reposit
 written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140])
 
 Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
+Do not fail snapshot when deleting a missing snapshotted file ({pull}30332[#30332])
 
 //[float]
 //=== Regressions

+ 17 - 1
server/src/main/java/org/elasticsearch/common/blobstore/BlobContainer.java

@@ -75,7 +75,8 @@ public interface BlobContainer {
     void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
 
     /**
-     * Deletes a blob with giving name, if the blob exists.  If the blob does not exist, this method throws an IOException.
+     * Deletes a blob with giving name, if the blob exists. If the blob does not exist,
+     * this method throws a NoSuchFileException.
      *
      * @param   blobName
      *          The name of the blob to delete.
@@ -84,6 +85,21 @@ public interface BlobContainer {
      */
     void deleteBlob(String blobName) throws IOException;
 
+    /**
+     * Deletes a blob with giving name, ignoring if the blob does not exist.
+     *
+     * @param   blobName
+     *          The name of the blob to delete.
+     * @throws  IOException if the blob exists but could not be deleted.
+     */
+    default void deleteBlobIgnoringIfNotExists(String blobName) throws IOException {
+        try {
+            deleteBlob(blobName);
+        } catch (final NoSuchFileException ignored) {
+            // This exception is ignored
+        }
+    }
+
     /**
      * Lists all blobs in the container.
      *

+ 0 - 7
server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java

@@ -102,13 +102,6 @@ public class BlobStoreIndexShardSnapshots implements Iterable<SnapshotFiles>, To
         this.physicalFiles = unmodifiableMap(mapBuilder);
     }
 
-    private BlobStoreIndexShardSnapshots() {
-        shardSnapshots = Collections.emptyList();
-        files = Collections.emptyMap();
-        physicalFiles = Collections.emptyMap();
-    }
-
-
     /**
      * Returns list of snapshots
      *

+ 0 - 1
server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreFormat.java

@@ -90,7 +90,6 @@ public abstract class BlobStoreFormat<T extends ToXContent> {
         return readBlob(blobContainer, blobName);
     }
 
-
     /**
      * Deletes obj in the blob container
      */

+ 44 - 40
server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

@@ -120,6 +120,7 @@ import java.util.stream.Collectors;
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.unmodifiableMap;
+import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
 
 /**
  * BlobStore - based implementation of Snapshot Repository
@@ -780,7 +781,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
         } catch (IOException ex) {
             // temporary blob creation or move failed - try cleaning up
             try {
-                snapshotsBlobContainer.deleteBlob(tempBlobName);
+                snapshotsBlobContainer.deleteBlobIgnoringIfNotExists(tempBlobName);
             } catch (IOException e) {
                 ex.addSuppressed(e);
             }
@@ -915,13 +916,13 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                 }
             }
             // finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
-            finalize(newSnapshotsList, fileListGeneration + 1, blobs);
+            finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot deletion [" + snapshotId + "]");
         }
 
         /**
          * Loads information about shard snapshot
          */
-        public BlobStoreIndexShardSnapshot loadSnapshot() {
+        BlobStoreIndexShardSnapshot loadSnapshot() {
             try {
                 return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
             } catch (IOException ex) {
@@ -930,54 +931,57 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
         }
 
         /**
-         * Removes all unreferenced files from the repository and writes new index file
+         * Writes a new index file for the shard and removes all unreferenced files from the repository.
          *
-         * We need to be really careful in handling index files in case of failures to make sure we have index file that
-         * points to files that were deleted.
+         * We need to be really careful in handling index files in case of failures to make sure we don't
+         * have index file that points to files that were deleted.
          *
-         *
-         * @param snapshots list of active snapshots in the container
+         * @param snapshots          list of active snapshots in the container
          * @param fileListGeneration the generation number of the snapshot index file
-         * @param blobs     list of blobs in the container
+         * @param blobs              list of blobs in the container
+         * @param reason             a reason explaining why the shard index file is written
          */
-        protected void finalize(List<SnapshotFiles> snapshots, int fileListGeneration, Map<String, BlobMetaData> blobs) {
-            BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
-            // delete old index files first
-            for (String blobName : blobs.keySet()) {
-                if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
-                    try {
-                        blobContainer.deleteBlob(blobName);
-                    } catch (IOException e) {
-                        // We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up
-                        // with references to non-existing files
-                        throw new IndexShardSnapshotFailedException(shardId, "error deleting index file ["
-                            + blobName + "] during cleanup", e);
-                    }
+        protected void finalize(final List<SnapshotFiles> snapshots,
+                                final int fileListGeneration,
+                                final Map<String, BlobMetaData> blobs,
+                                final String reason) {
+            final String indexGeneration = Integer.toString(fileListGeneration);
+            final String currentIndexGen = indexShardSnapshotsFormat.blobName(indexGeneration);
+
+            final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
+            try {
+                // If we deleted all snapshots, we don't need to create a new index file
+                if (snapshots.size() > 0) {
+                    indexShardSnapshotsFormat.writeAtomic(updatedSnapshots, blobContainer, indexGeneration);
                 }
-            }
 
-            // now go over all the blobs, and if they don't exist in a snapshot, delete them
-            for (String blobName : blobs.keySet()) {
-                // delete unused files
-                if (blobName.startsWith(DATA_BLOB_PREFIX)) {
-                    if (newSnapshots.findNameFile(BlobStoreIndexShardSnapshot.FileInfo.canonicalName(blobName)) == null) {
+                // Delete old index files
+                for (final String blobName : blobs.keySet()) {
+                    if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
                         try {
-                            blobContainer.deleteBlob(blobName);
+                            blobContainer.deleteBlobIgnoringIfNotExists(blobName);
                         } catch (IOException e) {
-                            // TODO: don't catch and let the user handle it?
-                            logger.debug(() -> new ParameterizedMessage("[{}] [{}] error deleting blob [{}] during cleanup", snapshotId, shardId, blobName), e);
+                            logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete index blob [{}] during finalization",
+                                snapshotId, shardId, blobName), e);
+                            throw e;
                         }
                     }
                 }
-            }
 
-            // If we deleted all snapshots - we don't need to create the index file
-            if (snapshots.size() > 0) {
-                try {
-                    indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
-                } catch (IOException e) {
-                    throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e);
+                // Delete all blobs that don't exist in a snapshot
+                for (final String blobName : blobs.keySet()) {
+                    if (blobName.startsWith(DATA_BLOB_PREFIX) && (updatedSnapshots.findNameFile(canonicalName(blobName)) == null)) {
+                        try {
+                            blobContainer.deleteBlobIgnoringIfNotExists(blobName);
+                        } catch (IOException e) {
+                            logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete data blob [{}] during finalization",
+                                snapshotId, shardId, blobName), e);
+                        }
+                    }
                 }
+            } catch (IOException e) {
+                String message = "Failed to finalize " + reason + " with shard index [" + currentIndexGen + "]";
+                throw new IndexShardSnapshotFailedException(shardId, message, e);
             }
         }
 
@@ -1003,7 +1007,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                 if (!name.startsWith(DATA_BLOB_PREFIX)) {
                     continue;
                 }
-                name = BlobStoreIndexShardSnapshot.FileInfo.canonicalName(name);
+                name = canonicalName(name);
                 try {
                     long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
                     if (currentGen > generation) {
@@ -1217,7 +1221,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                 newSnapshotsList.add(point);
             }
             // finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
-            finalize(newSnapshotsList, fileListGeneration + 1, blobs);
+            finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot creation [" + snapshotId + "]");
             snapshotStatus.moveToDone(System.currentTimeMillis());
 
         }

+ 16 - 3
test/framework/src/main/java/org/elasticsearch/repositories/ESBlobStoreContainerTestCase.java

@@ -29,13 +29,14 @@ import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.NoSuchFileException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
-import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
 import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
 import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
+import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.notNullValue;
 
@@ -116,7 +117,7 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
         try (BlobStore store = newBlobStore()) {
             final String blobName = "foobar";
             final BlobContainer container = store.blobContainer(new BlobPath());
-            expectThrows(IOException.class, () -> container.deleteBlob(blobName));
+            expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
 
             byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
             final BytesArray bytesArray = new BytesArray(data);
@@ -124,7 +125,19 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
             container.deleteBlob(blobName); // should not raise
 
             // blob deleted, so should raise again
-            expectThrows(IOException.class, () -> container.deleteBlob(blobName));
+            expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
+        }
+    }
+
+    public void testDeleteBlobIgnoringIfNotExists() throws IOException {
+        try (BlobStore store = newBlobStore()) {
+            BlobPath blobPath = new BlobPath();
+            if (randomBoolean()) {
+                blobPath = blobPath.add(randomAlphaOfLengthBetween(1, 10));
+            }
+
+            final BlobContainer container = store.blobContainer(blobPath);
+            container.deleteBlobIgnoringIfNotExists("does_not_exist");
         }
     }