Browse Source

Improve string reps for snapshot debugging (#101295)

Various things related to snapshots appear in debug logs, but have no
useful string representation which makes it hard to follow the process.
This commit adds some missing string representations.
David Turner 2 years ago
parent
commit
5bbfe66b99

+ 7 - 3
server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStressTestsIT.java

@@ -1031,11 +1031,15 @@ public class SnapshotStressTestsIT extends AbstractSnapshotIntegTestCase {
                             final Releasable abortReleasable = abortReleasables.transfer();
 
                             abortRunnable = mustSucceed(() -> {
-                                logger.info("--> aborting/deleting snapshot [{}:{}]", trackedRepository.repositoryName, snapshotName);
+                                logger.info("--> abort/delete snapshot [{}:{}] start", trackedRepository.repositoryName, snapshotName);
                                 deleteSnapshotRequestBuilder.execute(new ActionListener<>() {
                                     @Override
                                     public void onResponse(AcknowledgedResponse acknowledgedResponse) {
-                                        logger.info("--> aborted/deleted snapshot [{}:{}]", trackedRepository.repositoryName, snapshotName);
+                                        logger.info(
+                                            "--> abort/delete snapshot [{}:{}] success",
+                                            trackedRepository.repositoryName,
+                                            snapshotName
+                                        );
                                         Releasables.close(abortReleasable);
                                         assertTrue(acknowledgedResponse.isAcknowledged());
                                     }
@@ -1046,7 +1050,7 @@ public class SnapshotStressTestsIT extends AbstractSnapshotIntegTestCase {
                                         if (ExceptionsHelper.unwrapCause(e) instanceof SnapshotMissingException) {
                                             // processed before the snapshot even started
                                             logger.info(
-                                                "--> abort/delete of [{}:{}] got snapshot missing",
+                                                "--> abort/delete snapshot [{}:{}] got snapshot missing",
                                                 trackedRepository.repositoryName,
                                                 snapshotName
                                             );

+ 9 - 1
server/src/main/java/org/elasticsearch/cluster/SnapshotDeletionsInProgress.java

@@ -11,6 +11,7 @@ package org.elasticsearch.cluster;
 import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.cluster.ClusterState.Custom;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -321,7 +322,14 @@ public class SnapshotDeletionsInProgress extends AbstractNamedDiffable<Custom> i
 
         @Override
         public String toString() {
-            return "SnapshotDeletionsInProgress.Entry[[" + uuid + "][" + state + "]" + snapshots + "]";
+            return Strings.format(
+                "SnapshotDeletionsInProgress.Entry[[%s@%d][%s][%s]%s]",
+                repoName,
+                repositoryStateId,
+                uuid,
+                state,
+                snapshots
+            );
         }
     }
 

+ 27 - 2
server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

@@ -1673,7 +1673,17 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                     ),
                     repositoryStateId,
                     repositoryMetaVersion,
-                    finalizeSnapshotContext::updatedClusterState,
+                    new Function<>() {
+                        @Override
+                        public ClusterState apply(ClusterState state) {
+                            return finalizeSnapshotContext.updatedClusterState(state);
+                        }
+
+                        @Override
+                        public String toString() {
+                            return "finalizing snapshot [" + metadata.name() + "][" + snapshotId + "]";
+                        }
+                    },
                     l.map(newRepositoryData -> new RootBlobUpdateResult(existingRepositoryData, newRepositoryData))
                 );
                 // NB failure of writeIndexGen doesn't guarantee the update failed, so we cannot safely clean anything up on failure
@@ -2570,6 +2580,11 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                 logger.trace("[{}] successfully set pending repository generation to [{}]", metadata.name(), newGen);
                 setPendingStep.onResponse(newGen);
             }
+
+            @Override
+            public String toString() {
+                return Strings.format("start RepositoryData update from generation [%d], stateFilter=[%s]", expectedGen, stateFilter);
+            }
         });
 
         final ListenableFuture<RepositoryData> filterRepositoryDataStep = new ListenableFuture<>();
@@ -2630,7 +2645,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
             if (ensureSafeGenerationExists(expectedGen, delegate::onFailure) == false) {
                 return;
             }
-            final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen);
+            final String indexBlob = INDEX_FILE_PREFIX + newGen;
             logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob);
             writeAtomic(blobContainer(), indexBlob, out -> {
                 try (XContentBuilder xContentBuilder = XContentFactory.jsonBuilder(org.elasticsearch.core.Streams.noCloseStream(out))) {
@@ -2689,6 +2704,16 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
                     cacheRepositoryData(newRepositoryData, version);
                     delegate.onResponse(newRepositoryData);
                 }
+
+                @Override
+                public String toString() {
+                    return Strings.format(
+                        "complete RepositoryData update from generation [%d] to generation [%d], stateFilter=[%s]",
+                        expectedGen,
+                        newGen,
+                        stateFilter
+                    );
+                }
             });
         }));
     }

+ 24 - 3
server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

@@ -562,6 +562,11 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
                         logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry);
                     }
                 }
+
+                @Override
+                public String toString() {
+                    return Strings.format("start snapshot clone [%s] from [%s]", updatedEntry.snapshot(), updatedEntry.source());
+                }
             }, "start snapshot clone", onFailure), onFailure)
         );
     }
@@ -1445,7 +1450,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
                 );
             }, e -> handleFinalizationFailure(e, snapshot, repositoryData)));
         } catch (Exception e) {
-            assert false : new AssertionError(e);
+            logger.error(Strings.format("unexpected failure finalizing %s", snapshot), e);
+            assert false : new AssertionError("unexpected failure finalizing " + snapshot, e);
             handleFinalizationFailure(e, snapshot, repositoryData);
         }
     }
@@ -2096,6 +2102,11 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
                     }
                 }
             }
+
+            @Override
+            public String toString() {
+                return Strings.format("delete snapshot task [%s]%s", repository, Arrays.toString(snapshotNames));
+            }
         }, "delete snapshot [" + repository + "]" + Arrays.toString(snapshotNames), listener::onFailure);
     }
 
@@ -2691,6 +2702,11 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
             final boolean added = reassignments.add(shardId);
             assert added : "should only ever reassign each shard once but assigned [" + shardId + "] multiple times";
         }
+
+        @Override
+        public String toString() {
+            return "RemoveSnapshotDeletionAndContinueTask[" + deleteEntry + "]";
+        }
     }
 
     /**
@@ -3584,9 +3600,14 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
         }
 
         synchronized boolean assertNotQueued(Snapshot snapshot) {
-            assert snapshotsToFinalize.getOrDefault(snapshot.getRepository(), new LinkedList<>())
+            if (snapshotsToFinalize.getOrDefault(snapshot.getRepository(), new LinkedList<>())
                 .stream()
-                .noneMatch(entry -> entry.equals(snapshot)) : "Snapshot [" + snapshot + "] is still in finalization queue";
+                .anyMatch(entry -> entry.equals(snapshot))) {
+
+                final var assertionError = new AssertionError("[" + snapshot + "] should not be in " + snapshotsToFinalize);
+                logger.error("assertNotQueued failure", assertionError);
+                throw assertionError;
+            }
             return true;
         }