Răsfoiți Sursa

Decrease leader and follower check timeout (#38298)

Reduces the leader and follower check timeout to 3 * 10 = 30s instead of 3 * 30 = 90s, with 30s still
being a very long time for a node to be completely unresponsive.
Yannick Welsch 6 ani în urmă
părinte
comite
ece8c659c5

+ 2 - 2
docs/reference/modules/discovery/discovery-settings.asciidoc

@@ -44,7 +44,7 @@ Discovery and cluster formation are affected by the following settings:
 `cluster.fault_detection.follower_check.timeout`::
 
     Sets how long the elected master waits for a response to a follower check
-    before considering it to have failed. Defaults to `30s`.
+    before considering it to have failed. Defaults to `10s`.
 
 `cluster.fault_detection.follower_check.retry_count`::
 
@@ -60,7 +60,7 @@ Discovery and cluster formation are affected by the following settings:
 `cluster.fault_detection.leader_check.timeout`::
 
     Sets how long each node waits for a response to a leader check from the
-    elected master before considering it to have failed. Defaults to `30s`.
+    elected master before considering it to have failed. Defaults to `10s`.
 
 `cluster.fault_detection.leader_check.retry_count`::
 

+ 1 - 1
server/src/main/java/org/elasticsearch/cluster/coordination/FollowersChecker.java

@@ -77,7 +77,7 @@ public class FollowersChecker {
     // the timeout for each check sent to each node
     public static final Setting<TimeValue> FOLLOWER_CHECK_TIMEOUT_SETTING =
         Setting.timeSetting("cluster.fault_detection.follower_check.timeout",
-            TimeValue.timeValueMillis(30000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
+            TimeValue.timeValueMillis(10000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
 
     // the number of failed checks that must happen before the follower is considered to have failed.
     public static final Setting<Integer> FOLLOWER_CHECK_RETRY_COUNT_SETTING =

+ 1 - 1
server/src/main/java/org/elasticsearch/cluster/coordination/LeaderChecker.java

@@ -71,7 +71,7 @@ public class LeaderChecker {
     // the timeout for each check sent to the leader
     public static final Setting<TimeValue> LEADER_CHECK_TIMEOUT_SETTING =
         Setting.timeSetting("cluster.fault_detection.leader_check.timeout",
-            TimeValue.timeValueMillis(30000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
+            TimeValue.timeValueMillis(10000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
 
     // the number of failed checks that must happen before the leader is considered to have failed.
     public static final Setting<Integer> LEADER_CHECK_RETRY_COUNT_SETTING =