浏览代码

Include pings from client nodes in master election

We currently have a `discovery.zen.master_election.filter_client` setting that control whether their ping responses are ignored for master election (which is the current default). With the push to treat client nodes as normal nodes (and promote the transport/rest clients for client work), this should be changed. This commit remove this setting and it's companion `discovery.zen.master_election.filter_data` setting (currently defaulting to  false) in favor of singe `discovery.zen.master_election.ignore_non_master_pings` setting with more intuitive name (defaulting to false).

Resolves #17325
Closes #17329
Boaz Leskes 9 年之前
父节点
当前提交
6dd164d0bd

+ 1 - 2
core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

@@ -344,9 +344,8 @@ public final class ClusterSettings extends AbstractScopedSettings {
                     ZenDiscovery.JOIN_RETRY_DELAY_SETTING,
                     ZenDiscovery.MAX_PINGS_FROM_ANOTHER_MASTER_SETTING,
                     ZenDiscovery.SEND_LEAVE_REQUEST_SETTING,
-                    ZenDiscovery.MASTER_ELECTION_FILTER_CLIENT_SETTING,
                     ZenDiscovery.MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING,
-                    ZenDiscovery.MASTER_ELECTION_FILTER_DATA_SETTING,
+                    ZenDiscovery.MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING,
                     UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING,
                     UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_CONCURRENT_CONNECTS_SETTING,
                     SearchService.DEFAULT_KEEPALIVE_SETTING,

+ 32 - 33
core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

@@ -71,6 +71,7 @@ import org.elasticsearch.transport.TransportService;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -78,6 +79,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
 
 import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
 
@@ -100,14 +102,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         Setting.intSetting("discovery.zen.max_pings_from_another_master", 3, 1, Property.NodeScope);
     public final static Setting<Boolean> SEND_LEAVE_REQUEST_SETTING =
         Setting.boolSetting("discovery.zen.send_leave_request", true, Property.NodeScope);
-    public final static Setting<Boolean> MASTER_ELECTION_FILTER_CLIENT_SETTING =
-        Setting.boolSetting("discovery.zen.master_election.filter_client", true, Property.NodeScope);
     public final static Setting<TimeValue> MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING =
         Setting.timeSetting("discovery.zen.master_election.wait_for_joins_timeout",
             settings -> TimeValue.timeValueMillis(JOIN_TIMEOUT_SETTING.get(settings).millis() / 2).toString(), TimeValue.timeValueMillis(0),
             Property.NodeScope);
-    public final static Setting<Boolean> MASTER_ELECTION_FILTER_DATA_SETTING =
-        Setting.boolSetting("discovery.zen.master_election.filter_data", false, Property.NodeScope);
+    public final static Setting<Boolean> MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING =
+            Setting.boolSetting("discovery.zen.master_election.ignore_non_master_pings", false, Property.NodeScope);
 
     public static final String DISCOVERY_REJOIN_ACTION_NAME = "internal:discovery/zen/rejoin";
 
@@ -138,8 +138,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
 
     private final ElectMasterService electMaster;
 
-    private final boolean masterElectionFilterClientNodes;
-    private final boolean masterElectionFilterDataNodes;
+    private final boolean masterElectionIgnoreNonMasters;
     private final TimeValue masterElectionWaitForJoinsTimeout;
 
     private final JoinThreadControl joinThreadControl;
@@ -169,11 +168,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         this.maxPingsFromAnotherMaster = MAX_PINGS_FROM_ANOTHER_MASTER_SETTING.get(settings);
         this.sendLeaveRequest = SEND_LEAVE_REQUEST_SETTING.get(settings);
 
-        this.masterElectionFilterClientNodes = MASTER_ELECTION_FILTER_CLIENT_SETTING.get(settings);
-        this.masterElectionFilterDataNodes = MASTER_ELECTION_FILTER_DATA_SETTING.get(settings);
+        this.masterElectionIgnoreNonMasters = MASTER_ELECTION_IGNORE_NON_MASTER_PINGS_SETTING.get(settings);
         this.masterElectionWaitForJoinsTimeout = MASTER_ELECTION_WAIT_FOR_JOINS_TIMEOUT_SETTING.get(settings);
 
-        logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", this.pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
+        logger.debug("using ping_timeout [{}], join.timeout [{}], master_election.ignore_non_master [{}]",
+                this.pingTimeout, joinTimeout, masterElectionIgnoreNonMasters);
 
         clusterSettings.addSettingsUpdateConsumer(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING, this::handleMinimumMasterNodesChanged, (value) -> {
             final ClusterState clusterState = clusterService.state();
@@ -846,30 +845,8 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         }
 
         // filter responses
-        List<ZenPing.PingResponse> pingResponses = new ArrayList<>();
-        for (ZenPing.PingResponse pingResponse : fullPingResponses) {
-            DiscoveryNode node = pingResponse.node();
-            if (masterElectionFilterClientNodes && (node.clientNode() || (!node.masterNode() && !node.dataNode()))) {
-                // filter out the client node, which is a client node, or also one that is not data and not master (effectively, client)
-            } else if (masterElectionFilterDataNodes && (!node.masterNode() && node.dataNode())) {
-                // filter out data node that is not also master
-            } else {
-                pingResponses.add(pingResponse);
-            }
-        }
-
-        if (logger.isDebugEnabled()) {
-            StringBuilder sb = new StringBuilder();
-            if (pingResponses.isEmpty()) {
-                sb.append(" {none}");
-            } else {
-                for (ZenPing.PingResponse pingResponse : pingResponses) {
-                    sb.append("\n\t--> ").append(pingResponse);
-                }
-            }
-            logger.debug("filtered ping responses: (filter_client[{}], filter_data[{}]){}", masterElectionFilterClientNodes,
-                masterElectionFilterDataNodes, sb);
-        }
+        final List<ZenPing.PingResponse> pingResponses;
+        pingResponses = filterPingResponses(fullPingResponses, masterElectionIgnoreNonMasters, logger);
 
         final DiscoveryNode localNode = clusterService.localNode();
         List<DiscoveryNode> pingMasters = new ArrayList<>();
@@ -925,6 +902,28 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         }
     }
 
+    static List<ZenPing.PingResponse> filterPingResponses(ZenPing.PingResponse[] fullPingResponses, boolean masterElectionIgnoreNonMasters, ESLogger logger) {
+        List<ZenPing.PingResponse> pingResponses;
+        if (masterElectionIgnoreNonMasters) {
+            pingResponses = Arrays.stream(fullPingResponses).filter(ping -> ping.node().isMasterNode()).collect(Collectors.toList());
+        } else {
+            pingResponses = Arrays.asList(fullPingResponses);
+        }
+
+        if (logger.isDebugEnabled()) {
+            StringBuilder sb = new StringBuilder();
+            if (pingResponses.isEmpty()) {
+                sb.append(" {none}");
+            } else {
+                for (ZenPing.PingResponse pingResponse : pingResponses) {
+                    sb.append("\n\t--> ").append(pingResponse);
+                }
+            }
+            logger.debug("filtered ping responses: (ignore_non_masters [{}]){}", masterElectionIgnoreNonMasters, sb);
+        }
+        return pingResponses;
+    }
+
     protected ClusterState rejoin(ClusterState clusterState, String reason) {
 
         // *** called from within an cluster state update task *** //

+ 39 - 0
core/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryUnitTests.java

@@ -25,10 +25,19 @@ import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.common.transport.DummyTransportAddress;
+import org.elasticsearch.discovery.zen.ping.ZenPing;
 import org.elasticsearch.test.ESTestCase;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
 import static org.elasticsearch.discovery.zen.ZenDiscovery.shouldIgnoreOrRejectNewClusterState;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
 
 /**
  */
@@ -89,4 +98,34 @@ public class ZenDiscoveryUnitTests extends ESTestCase {
         }
         assertFalse("should not ignore, because current state doesn't have a master", shouldIgnoreOrRejectNewClusterState(logger, currentState.build(), newState.build()));
     }
+
+    public void testFilterNonMasterPingResponse() {
+        ArrayList<ZenPing.PingResponse> responses = new ArrayList<>();
+        ArrayList<DiscoveryNode> masterNodes = new ArrayList<>();
+        ArrayList<DiscoveryNode> allNodes = new ArrayList<>();
+        for (int i = randomIntBetween(10, 20); i >= 0; i--) {
+            Map<String, String> attrs = new HashMap<>();
+            for (String attr : randomSubsetOf(
+                    Arrays.asList(DiscoveryNode.INGEST_ATTR, DiscoveryNode.DATA_ATTR, DiscoveryNode.MASTER_ATTR))) {
+                attrs.put(attr, randomBoolean() + "");
+            }
+
+            DiscoveryNode node = new DiscoveryNode("node_" + i, "id_" + i, DummyTransportAddress.INSTANCE, attrs, Version.CURRENT);
+            responses.add(new ZenPing.PingResponse(node, randomBoolean() ? null : node, new ClusterName("test"), randomBoolean()));
+            allNodes.add(node);
+            if (node.isMasterNode()) {
+                masterNodes.add(node);
+            }
+        }
+
+        boolean ignore = randomBoolean();
+        List<ZenPing.PingResponse> filtered = ZenDiscovery.filterPingResponses(
+                responses.toArray(new ZenPing.PingResponse[responses.size()]), ignore, logger);
+        final List<DiscoveryNode> filteredNodes = filtered.stream().map(ZenPing.PingResponse::node).collect(Collectors.toList());
+        if (ignore) {
+            assertThat(filteredNodes, equalTo(masterNodes));
+        } else {
+            assertThat(filteredNodes, equalTo(allNodes));
+        }
+    }
 }

+ 6 - 1
docs/reference/migration/migrate_5_0/settings.asciidoc

@@ -82,7 +82,7 @@ The `index.analysis.analyzer.default_index` analyzer is not supported anymore.
 If you wish to change the analyzer to use for indexing, change the
 `index.analysis.analyzer.default` analyzer instead.
 
-==== Ping timeout settings
+==== Ping settings
 
 Previously, there were three settings for the ping timeout:
 `discovery.zen.initial_ping_timeout`, `discovery.zen.ping.timeout` and
@@ -90,6 +90,11 @@ Previously, there were three settings for the ping timeout:
 setting key for the ping timeout is now `discovery.zen.ping_timeout`. The
 default value for ping timeouts remains at three seconds.
 
+
+`discovery.zen.master_election.filter_client` and `discovery.zen.master_election.filter_data` have
+been removed in favor of the new `discovery.zen.master_election.ignore_non_master_pings`. This setting control how ping responses
+are interpreted during master election and should be used with care and only in extreme cases. See documentation for details.
+
 ==== Recovery settings
 
 Recovery settings deprecated in 1.x have been removed:

+ 3 - 5
docs/reference/modules/discovery/zen.asciidoc

@@ -56,11 +56,9 @@ serves as a protection against (partial) network failures where node may unjustl
 think that the master has failed. In this case the node will simply hear from
 other nodes about the currently active master.
 
-If `discovery.zen.master_election.filter_client` is `true`, pings from client nodes (nodes where `node.client` is
-`true`, or both `node.data` and `node.master` are `false`) are ignored during master election; the default value is
-`true`. If `discovery.zen.master_election.filter_data` is `true`, pings from non-master-eligible data nodes (nodes
-where `node.data` is `true` and `node.master` is `false`) are ignored during master election; the default value is
-`false`. Pings from master-eligible nodes are always observed during master election.
+If `discovery.zen.master_election.ignore_non_master_pings` is `true`, pings from nodes that are not master
+eligible (nodes where `node.master` is `false`) are ignored during master election; the default value is
+`false`.
 
 Nodes can be excluded from becoming a master by setting `node.master` to
 `false`. Note, once a node is a client node (`node.client` set to