Browse Source

Improve handshake failure messages (#44485)

Today we report an exception on a handshake failure (e.g. cluster name
mismatch) but the message does not include all the details of the mismatch. If
the mismatch is something subtle like `my-cluster` instead of `my_cluster` then
we cannot diagnose this from the message alone. This commit adds the details of
the local cluster to the message, along with the details of the remote cluster,
improving the utility of the exception message if reported in isolation.
David Turner 6 years ago
parent
commit
f95a8acddc

+ 15 - 0
server/src/main/java/org/elasticsearch/cluster/ClusterName.java

@@ -27,6 +27,7 @@ import org.elasticsearch.common.settings.Settings;
 
 import java.io.IOException;
 import java.util.Objects;
+import java.util.function.Predicate;
 
 public class ClusterName implements Writeable {
 
@@ -81,4 +82,18 @@ public class ClusterName implements Writeable {
     public String toString() {
         return "Cluster [" + value + "]";
     }
+
+    public Predicate<ClusterName> getEqualityPredicate() {
+        return new Predicate<ClusterName>() {
+            @Override
+            public boolean test(ClusterName o) {
+                return ClusterName.this.equals(o);
+            }
+
+            @Override
+            public String toString() {
+                return "local cluster name [" + ClusterName.this.value() + "]";
+            }
+        };
+    }
 }

+ 18 - 3
server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java

@@ -255,6 +255,22 @@ final class RemoteClusterConnection implements TransportConnectionListener, Clos
         return new ProxyConnection(connection, remoteClusterNode);
     }
 
+    private Predicate<ClusterName> getRemoteClusterNamePredicate() {
+        return
+            new Predicate<ClusterName>() {
+                @Override
+                public boolean test(ClusterName c) {
+                    return remoteClusterName.get() == null || c.equals(remoteClusterName.get());
+                }
+
+                @Override
+                public String toString() {
+                    return remoteClusterName.get() == null ? "any cluster name"
+                        : "expected remote cluster name [" + remoteClusterName.get().value() + "]";
+                }
+            };
+    }
+
 
     static final class ProxyConnection implements Transport.Connection {
         private final Transport.Connection proxyConnection;
@@ -454,10 +470,9 @@ final class RemoteClusterConnection implements TransportConnectionListener, Clos
                                 ConnectionProfile connectionProfile = connectionManager.getConnectionProfile();
                                 handshakeResponse = PlainActionFuture.get(fut ->
                                     transportService.handshake(connection, connectionProfile.getHandshakeTimeout().millis(),
-                                        (c) -> remoteClusterName.get() == null ? true : c.equals(remoteClusterName.get()), fut));
+                                        getRemoteClusterNamePredicate(), fut));
                             } catch (IllegalStateException ex) {
-                                logger.warn(() -> new ParameterizedMessage("seed node {} cluster name mismatch expected " +
-                                    "cluster name {}", connection.getNode(), remoteClusterName.get()), ex);
+                                logger.warn(new ParameterizedMessage("failed to connect to seed node [{}]", connection.getNode()), ex);
                                 throw ex;
                             }
 

+ 7 - 6
server/src/main/java/org/elasticsearch/transport/TransportService.java

@@ -420,7 +420,8 @@ public class TransportService extends AbstractLifecycleComponent implements Tran
         final Transport.Connection connection,
         final long handshakeTimeout,
         final ActionListener<DiscoveryNode> listener) {
-        handshake(connection, handshakeTimeout, clusterName::equals, ActionListener.map(listener, HandshakeResponse::getDiscoveryNode));
+        handshake(connection, handshakeTimeout, clusterName.getEqualityPredicate(),
+            ActionListener.map(listener, HandshakeResponse::getDiscoveryNode));
     }
 
     /**
@@ -447,12 +448,12 @@ public class TransportService extends AbstractLifecycleComponent implements Tran
                 new ActionListener<>() {
                     @Override
                     public void onResponse(HandshakeResponse response) {
-                        if (!clusterNamePredicate.test(response.clusterName)) {
-                            listener.onFailure(new IllegalStateException("handshake failed, mismatched cluster name [" +
-                                response.clusterName + "] - " + node.toString()));
+                        if (clusterNamePredicate.test(response.clusterName) == false) {
+                            listener.onFailure(new IllegalStateException("handshake with [" + node + "] failed: remote cluster name ["
+                                + response.clusterName.value() + "] does not match " + clusterNamePredicate));
                         } else if (response.version.isCompatible(localNode.getVersion()) == false) {
-                            listener.onFailure(new IllegalStateException("handshake failed, incompatible version [" +
-                                response.version + "] - " + node));
+                            listener.onFailure(new IllegalStateException("handshake with [" + node + "] failed: remote node version ["
+                                + response.version + "] is incompatible with local node version [" + localNode.getVersion() + "]"));
                         } else {
                             listener.onResponse(response);
                         }

+ 7 - 3
server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java

@@ -92,6 +92,7 @@ import java.util.stream.Collectors;
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.emptySet;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
@@ -100,6 +101,7 @@ import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.sameInstance;
 import static org.hamcrest.Matchers.startsWith;
+import static org.hamcrest.Matchers.endsWith;
 
 public class RemoteClusterConnectionTests extends ESTestCase {
 
@@ -1106,9 +1108,11 @@ public class RemoteClusterConnectionTests extends ESTestCase {
                     assertTrue(connection.assertNoRunningConnections());
                     IllegalStateException illegalStateException = expectThrows(IllegalStateException.class, () ->
                         updateSeedNodes(connection, Arrays.asList(Tuple.tuple("other", otherClusterTransport::getLocalDiscoNode))));
-                    assertThat(illegalStateException.getMessage(),
-                        startsWith("handshake failed, mismatched cluster name [Cluster [otherCluster]]" +
-                            " - {other_cluster_discoverable_node}"));
+                    assertThat(illegalStateException.getMessage(), allOf(
+                        startsWith("handshake with [{other_cluster_discoverable_node}"),
+                        containsString(otherClusterTransport.getLocalDiscoNode().toString()),
+                        endsWith(" failed: remote cluster name [otherCluster] " +
+                            "does not match expected remote cluster name [testClusterNameIsChecked]")));
                 }
             }
         }

+ 6 - 3
server/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java

@@ -135,9 +135,10 @@ public class TransportServiceHandshakeTests extends ESTestCase {
                 PlainActionFuture.get(fut -> handleA.transportService.handshake(connection, timeout, ActionListener.map(fut, x -> null)));
             }
         });
-        assertThat(ex.getMessage(), containsString("handshake failed, mismatched cluster name [Cluster [b]]"));
+        assertThat(ex.getMessage(), containsString("handshake with [" + discoveryNode +
+            "] failed: remote cluster name [b] does not match local cluster name [a]"));
         assertFalse(handleA.transportService.nodeConnected(discoveryNode));
-}
+    }
 
     public void testIncompatibleVersions() {
         Settings settings = Settings.builder().put("cluster.name", "test").build();
@@ -156,7 +157,9 @@ public class TransportServiceHandshakeTests extends ESTestCase {
                 PlainActionFuture.get(fut -> handleA.transportService.handshake(connection, timeout, ActionListener.map(fut, x -> null)));
             }
         });
-        assertThat(ex.getMessage(), containsString("handshake failed, incompatible version"));
+        assertThat(ex.getMessage(), containsString("handshake with [" + discoveryNode +
+            "] failed: remote node version [" + handleB.discoveryNode.getVersion() + "] is incompatible with local node version [" +
+            Version.CURRENT + "]"));
         assertFalse(handleA.transportService.nodeConnected(discoveryNode));
     }