Browse Source

Execute SnapshotsService Error Callback on Generic Thread (#46277)

I couldn't find a test for this, as it seems we only get
into this error handler on a bug. Regardless, we are
executing the snapshot finalization on the master update
thread here which shouldn't happen and will make
debugging a production issue resulting from this
trickier than it has to be (because we probably also
get a cluster state apply is slow warning in addition
to the original bug).
Used the generic pool here instead of the snapshot pool
because we're resolving the user callback here as
well and the generic pool seemed like the safer bet for
that.
Armin Braun 6 years ago
parent
commit
a0967a4b20
1 changed files with 21 additions and 19 deletions
  1. 21 19
      server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

+ 21 - 19
server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

@@ -559,26 +559,28 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
         }
 
         private void cleanupAfterError(Exception exception) {
-            if(snapshotCreated) {
-                try {
-                    repositoriesService.repository(snapshot.snapshot().getRepository())
-                                       .finalizeSnapshot(snapshot.snapshot().getSnapshotId(),
-                                                         snapshot.indices(),
-                                                         snapshot.startTime(),
-                                                         ExceptionsHelper.detailedMessage(exception),
-                                                         0,
-                                                         Collections.emptyList(),
-                                                         snapshot.getRepositoryStateId(),
-                                                         snapshot.includeGlobalState(),
-                                                         metaDataForSnapshot(snapshot, clusterService.state().metaData()),
-                                                         snapshot.userMetadata());
-                } catch (Exception inner) {
-                    inner.addSuppressed(exception);
-                    logger.warn(() -> new ParameterizedMessage("[{}] failed to close snapshot in repository",
-                        snapshot.snapshot()), inner);
+            threadPool.generic().execute(() -> {
+                if (snapshotCreated) {
+                    try {
+                        repositoriesService.repository(snapshot.snapshot().getRepository())
+                            .finalizeSnapshot(snapshot.snapshot().getSnapshotId(),
+                                snapshot.indices(),
+                                snapshot.startTime(),
+                                ExceptionsHelper.detailedMessage(exception),
+                                0,
+                                Collections.emptyList(),
+                                snapshot.getRepositoryStateId(),
+                                snapshot.includeGlobalState(),
+                                metaDataForSnapshot(snapshot, clusterService.state().metaData()),
+                                snapshot.userMetadata());
+                    } catch (Exception inner) {
+                        inner.addSuppressed(exception);
+                        logger.warn(() -> new ParameterizedMessage("[{}] failed to close snapshot in repository",
+                            snapshot.snapshot()), inner);
+                    }
                 }
-            }
-            userCreateSnapshotListener.onFailure(e);
+                userCreateSnapshotListener.onFailure(e);
+            });
         }
     }