فهرست منبع

[Discovery] verify connect when sending a rejoin cluster request

When a master receives a cluster state from another node, it compares the local cluster state with the one it got. If the local one has a higher version, it sends a JoinClusterRequest to the other master to tell it step down. Because our network layer is asymmetric, we need to make sure we're connected before sending.

Closes #6779
Boaz Leskes 11 سال پیش
والد
کامیت
c9b0816b29
1فایلهای تغییر یافته به همراه17 افزوده شده و 7 حذف شده
  1. 17 7
      src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

+ 17 - 7
src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

@@ -554,7 +554,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
             return;
         }
         if (master) {
-            logger.debug("received cluster state from [{}] which is also master but with cluster name [{}]",  newClusterState.nodes().masterNode(), incomingClusterName);
+            logger.debug("received cluster state from [{}] which is also master with cluster name [{}]", newClusterState.nodes().masterNode(), incomingClusterName);
             final ClusterState newState = newClusterState;
             clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
                 @Override
@@ -564,12 +564,22 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                         return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]");
                     } else {
                         logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode());
-                        transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
-                            @Override
-                            public void handleException(TransportException exp) {
-                                logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
-                            }
-                        });
+
+                        try {
+                            // make sure we're connected to this node (connect to node does nothing if we're already connected)
+                            // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node
+                            // in the past (after a master failure, for example)
+                            transportService.connectToNode(newState.nodes().masterNode());
+                            transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
+                                @Override
+                                public void handleException(TransportException exp) {
+                                    logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
+                                }
+                            });
+                        } catch (Exception e) {
+                            logger.warn("failed to send rejoin request to [{}]", e, newState.nodes().masterNode());
+                        }
+
                         return currentState;
                     }
                 }