Browse Source

Treat TransportService stopped error as node is closing (#39800)

If TransportService is stopped before a shard-failure request is sent
but after the request is registered, TransportService will notify
ReplicationOperation a TransportException with an error message:
"transport stop, action: internal:cluster/shard/failure".

Relates #39584
Nhat Nguyen 6 years ago
parent
commit
660c7ed343

+ 3 - 1
server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java

@@ -206,7 +206,9 @@ public class ReplicationOperation<
     private void onNoLongerPrimary(Exception failure) {
         final Throwable cause = ExceptionsHelper.unwrapCause(failure);
         final boolean nodeIsClosing = cause instanceof NodeClosedException
-            || (cause instanceof TransportException && "TransportService is closed stopped can't send request".equals(cause.getMessage()));
+            || (cause instanceof TransportException &&
+                ("TransportService is closed stopped can't send request".equals(cause.getMessage())
+                || "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage())));
         final String message;
         if (nodeIsClosing) {
             message = String.format(Locale.ROOT,

+ 4 - 1
server/src/test/java/org/elasticsearch/action/support/replication/ReplicationOperationTests.java

@@ -205,8 +205,11 @@ public class ReplicationOperationTests extends ESTestCase {
             shardActionFailure = new NodeClosedException(new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT));
         } else if (randomBoolean()) {
             shardActionFailure = new SendRequestTransportException(
-                new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT), "internal:cluster/shard/failure",
+                new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT), ShardStateAction.SHARD_FAILED_ACTION_NAME,
                 new TransportException("TransportService is closed stopped can't send request"));
+        } else if (randomBoolean()) {
+            shardActionFailure = new TransportException(
+                "transport stopped, action: " + ShardStateAction.SHARD_FAILED_ACTION_NAME);
         } else {
             shardActionFailure = new ShardStateAction.NoLongerPrimaryShardException(failedReplica.shardId(), "the king is dead");
         }