Przeglądaj źródła

Revert "[Discovery] immediately start Master|Node fault detection pinging"

In #6706 we change the master validation to start pining immediately after a new master as ellected or a node joined. The idea is to have a quicker response to failures. This does however create a problem if the new master has yet fully processed it's ellection and responds to the ping with a NoLongerMasterException. This causes the source node to remove the current master and ellect another, only to find out it's not a master either and so forth. We are moving this change to the feature/improve_zen branch, where the improvements we made will cause the situation to be handled properly.

This reverts commit ae16956e072bea317ea481f65f2e110dc48fde17.
Boaz Leskes 11 lat temu
rodzic
commit
caf11ff2fb

+ 3 - 5
src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java

@@ -153,9 +153,8 @@ public class MasterFaultDetection extends AbstractComponent {
             masterPinger.stop();
         }
         this.masterPinger = new MasterPinger();
-
-        // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
-        threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
+        // start the ping process
+        threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
     }
 
     public void stop(String reason) {
@@ -199,8 +198,7 @@ public class MasterFaultDetection extends AbstractComponent {
                         masterPinger.stop();
                     }
                     this.masterPinger = new MasterPinger();
-                    // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
-                    threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
+                    threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
                 } catch (Exception e) {
                     logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
                     notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");

+ 2 - 4
src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java

@@ -119,8 +119,7 @@ public class NodesFaultDetection extends AbstractComponent {
             }
             if (!nodesFD.containsKey(newNode)) {
                 nodesFD.put(newNode, new NodeFD());
-                // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
-                threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
+                threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
             }
         }
         for (DiscoveryNode removedNode : delta.removedNodes()) {
@@ -166,8 +165,7 @@ public class NodesFaultDetection extends AbstractComponent {
             try {
                 transportService.connectToNode(node);
                 nodesFD.put(node, new NodeFD());
-                // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
-                threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
+                threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
             } catch (Exception e) {
                 logger.trace("[node  ] [{}] transport disconnected (with verified connect)", node);
                 notifyNodeFailure(node, "transport disconnected (with verified connect)");