Browse Source

More consistent logging messages for snapshot deletion (#101024)

This PR makes sure that the start message ("deleting snapshots ...") is
logged after the cluster state is processed and any failures before
finishing updating the new repositoryData are always logged at warning
level.

Resolves: #99057 Resolves: #100481
Yang Wang 2 years ago
parent
commit
168a464f41

+ 5 - 0
docs/changelog/101024.yaml

@@ -0,0 +1,5 @@
+pr: 101024
+summary: More consistent logging messages for snapshot deletion
+area: Snapshot/Restore
+type: bug
+issues: []

+ 124 - 0
server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceIT.java

@@ -0,0 +1,124 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.snapshots;
+
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.elasticsearch.action.ActionFuture;
+import org.elasticsearch.action.support.master.AcknowledgedResponse;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.snapshots.mockstore.MockRepository;
+import org.elasticsearch.test.MockLogAppender;
+
+import java.util.List;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.is;
+
+public class SnapshotsServiceIT extends AbstractSnapshotIntegTestCase {
+
+    public void testDeletingSnapshotsIsLoggedAfterClusterStateIsProcessed() throws Exception {
+        createRepository("test-repo", "fs");
+        createIndexWithRandomDocs("test-index", randomIntBetween(1, 42));
+        createSnapshot("test-repo", "test-snapshot", List.of("test-index"));
+
+        final MockLogAppender mockLogAppender = new MockLogAppender();
+
+        try {
+            mockLogAppender.start();
+            Loggers.addAppender(LogManager.getLogger(SnapshotsService.class), mockLogAppender);
+
+            mockLogAppender.addExpectation(
+                new MockLogAppender.UnseenEventExpectation(
+                    "[does-not-exist]",
+                    SnapshotsService.class.getName(),
+                    Level.INFO,
+                    "deleting snapshots [does-not-exist] from repository [test-repo]"
+                )
+            );
+
+            mockLogAppender.addExpectation(
+                new MockLogAppender.SeenEventExpectation(
+                    "[deleting test-snapshot]",
+                    SnapshotsService.class.getName(),
+                    Level.INFO,
+                    "deleting snapshots [test-snapshot] from repository [test-repo]"
+                )
+            );
+
+            mockLogAppender.addExpectation(
+                new MockLogAppender.SeenEventExpectation(
+                    "[test-snapshot deleted]",
+                    SnapshotsService.class.getName(),
+                    Level.INFO,
+                    "snapshots [test-snapshot/*] deleted"
+                )
+            );
+
+            final SnapshotMissingException e = expectThrows(
+                SnapshotMissingException.class,
+                () -> startDeleteSnapshot("test-repo", "does-not-exist").actionGet()
+            );
+            assertThat(e.getMessage(), containsString("[test-repo:does-not-exist] is missing"));
+            assertThat(startDeleteSnapshot("test-repo", "test-snapshot").actionGet().isAcknowledged(), is(true));
+
+            awaitNoMoreRunningOperations(); // ensure background file deletion is completed
+            mockLogAppender.assertAllExpectationsMatched();
+        } finally {
+            Loggers.removeAppender(LogManager.getLogger(SnapshotsService.class), mockLogAppender);
+            mockLogAppender.stop();
+            deleteRepository("test-repo");
+        }
+    }
+
+    public void testSnapshotDeletionFailureShouldBeLogged() throws Exception {
+        createRepository("test-repo", "mock");
+        createIndexWithRandomDocs("test-index", randomIntBetween(1, 42));
+        createSnapshot("test-repo", "test-snapshot", List.of("test-index"));
+
+        final MockLogAppender mockLogAppender = new MockLogAppender();
+
+        try {
+            mockLogAppender.start();
+            Loggers.addAppender(LogManager.getLogger(SnapshotsService.class), mockLogAppender);
+
+            mockLogAppender.addExpectation(
+                new MockLogAppender.SeenEventExpectation(
+                    "[test-snapshot]",
+                    SnapshotsService.class.getName(),
+                    Level.WARN,
+                    "failed to complete snapshot deletion for [test-snapshot] from repository [test-repo]"
+                )
+            );
+
+            if (randomBoolean()) {
+                // Failure when listing root blobs
+                final MockRepository mockRepository = getRepositoryOnMaster("test-repo");
+                mockRepository.setRandomControlIOExceptionRate(1.0);
+                final Exception e = expectThrows(Exception.class, () -> startDeleteSnapshot("test-repo", "test-snapshot").actionGet());
+                assertThat(e.getCause().getMessage(), containsString("Random IOException"));
+            } else {
+                // Failure when finalizing on index-N file
+                final ActionFuture<AcknowledgedResponse> deleteFuture;
+                blockMasterFromFinalizingSnapshotOnIndexFile("test-repo");
+                deleteFuture = startDeleteSnapshot("test-repo", "test-snapshot");
+                waitForBlock(internalCluster().getMasterName(), "test-repo");
+                unblockNode("test-repo", internalCluster().getMasterName());
+                final Exception e = expectThrows(Exception.class, deleteFuture::actionGet);
+                assertThat(e.getCause().getMessage(), containsString("exception after block"));
+            }
+
+            mockLogAppender.assertAllExpectationsMatched();
+        } finally {
+            Loggers.removeAppender(LogManager.getLogger(SnapshotsService.class), mockLogAppender);
+            mockLogAppender.stop();
+            deleteRepository("test-repo");
+        }
+    }
+}

+ 17 - 4
server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

@@ -1887,9 +1887,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
     public void deleteSnapshots(final DeleteSnapshotRequest request, final ActionListener<Void> listener) {
     public void deleteSnapshots(final DeleteSnapshotRequest request, final ActionListener<Void> listener) {
         final String repositoryName = request.repository();
         final String repositoryName = request.repository();
         final String[] snapshotNames = request.snapshots();
         final String[] snapshotNames = request.snapshots();
-        logger.info(
-            () -> format("deleting snapshots [%s] from repository [%s]", arrayToCommaDelimitedString(snapshotNames), repositoryName)
-        );
 
 
         final Repository repository = repositoriesService.repository(repositoryName);
         final Repository repository = repositoriesService.repository(repositoryName);
         executeConsistentStateUpdate(repository, repositoryData -> new ClusterStateUpdateTask(request.masterNodeTimeout()) {
         executeConsistentStateUpdate(repository, repositoryData -> new ClusterStateUpdateTask(request.masterNodeTimeout()) {
@@ -2064,6 +2061,10 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
 
 
             @Override
             @Override
             public void clusterStateProcessed(ClusterState oldState, ClusterState newState) {
             public void clusterStateProcessed(ClusterState oldState, ClusterState newState) {
+                logger.info(
+                    () -> format("deleting snapshots [%s] from repository [%s]", arrayToCommaDelimitedString(snapshotNames), repositoryName)
+                );
+
                 if (completedNoCleanup.isEmpty() == false) {
                 if (completedNoCleanup.isEmpty() == false) {
                     logger.info("snapshots {} aborted", completedNoCleanup);
                     logger.info("snapshots {} aborted", completedNoCleanup);
                 }
                 }
@@ -2355,7 +2356,19 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
 
 
                     @Override
                     @Override
                     public void onFailure(Exception e) {
                     public void onFailure(Exception e) {
-                        logger.debug(() -> "failed to complete snapshot deletion [" + deleteEntry + "]", e);
+                        logger.warn(() -> {
+                            final StringBuilder sb = new StringBuilder("failed to complete snapshot deletion for [");
+                            Strings.collectionToDelimitedStringWithLimit(
+                                deleteEntry.getSnapshots().stream().map(SnapshotId::getName).toList(),
+                                ",",
+                                "",
+                                "",
+                                1024,
+                                sb
+                            );
+                            sb.append("] from repository [").append(deleteEntry.repository()).append("]");
+                            return sb;
+                        }, e);
                         submitUnbatchedTask(
                         submitUnbatchedTask(
                             "remove snapshot deletion metadata after failed delete",
                             "remove snapshot deletion metadata after failed delete",
                             new RemoveSnapshotDeletionAndContinueTask(deleteEntry, repositoryData) {
                             new RemoveSnapshotDeletionAndContinueTask(deleteEntry, repositoryData) {