Преглед изворни кода

Don't update nodes list when stepping down as master (#22049)

This commit simplifies the node update logic so that nodes are never removed from the cluster state when the cluster state is not published.
Yannick Welsch пре 8 година
родитељ
комит
a724f4eb61

+ 22 - 16
core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

@@ -72,6 +72,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.BiFunction;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
 import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
@@ -207,7 +208,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
         joinThreadControl.start();
         zenPing.start(this);
         this.nodeJoinController = new NodeJoinController(clusterService, allocationService, electMaster, discoverySettings, settings);
-        this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, electMaster, this::rejoin, logger);
+        this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, electMaster, this::submitRejoin, logger);
     }
 
     @Override
@@ -306,18 +307,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
         } catch (FailedToCommitClusterStateException t) {
             // cluster service logs a WARN message
             logger.debug("failed to publish cluster state version [{}] (not enough nodes acknowledged, min master nodes [{}])", clusterChangedEvent.state().version(), electMaster.minimumMasterNodes());
-            clusterService.submitStateUpdateTask("zen-disco-failed-to-publish", new ClusterStateUpdateTask(Priority.IMMEDIATE) {
-                @Override
-                public ClusterState execute(ClusterState currentState) {
-                    return rejoin(currentState, "failed to publish to min_master_nodes");
-                }
-
-                @Override
-                public void onFailure(String source, Exception e) {
-                    logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
-                }
-
-            });
+            submitRejoin("zen-disco-failed-to-publish");
             throw t;
         }
 
@@ -505,12 +495,27 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
         }
     }
 
+    private void submitRejoin(String source) {
+        clusterService.submitStateUpdateTask(source, new ClusterStateUpdateTask(Priority.IMMEDIATE) {
+            @Override
+            public ClusterState execute(ClusterState currentState) {
+                return rejoin(currentState, source);
+            }
+
+            @Override
+            public void onFailure(String source, Exception e) {
+                logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
+            }
+
+        });
+    }
+
     // visible for testing
     static class NodeRemovalClusterStateTaskExecutor implements ClusterStateTaskExecutor<NodeRemovalClusterStateTaskExecutor.Task>, ClusterStateTaskListener {
 
         private final AllocationService allocationService;
         private final ElectMasterService electMasterService;
-        private final BiFunction<ClusterState, String, ClusterState> rejoin;
+        private final Consumer<String> rejoin;
         private final Logger logger;
 
         static class Task {
@@ -540,7 +545,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
         NodeRemovalClusterStateTaskExecutor(
                 final AllocationService allocationService,
                 final ElectMasterService electMasterService,
-                final BiFunction<ClusterState, String, ClusterState> rejoin,
+                final Consumer<String> rejoin,
                 final Logger logger) {
             this.allocationService = allocationService;
             this.electMasterService = electMasterService;
@@ -570,7 +575,8 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
 
             final BatchResult.Builder<Task> resultBuilder = BatchResult.<Task>builder().successes(tasks);
             if (!electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes())) {
-                return resultBuilder.build(rejoin.apply(remainingNodesClusterState, "not enough master nodes"));
+                rejoin.accept("not enough master nodes");
+                return resultBuilder.build(currentState);
             } else {
                 return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks)));
             }

+ 9 - 16
core/src/test/java/org/elasticsearch/discovery/zen/NodeRemovalClusterStateTaskExecutorTests.java

@@ -33,6 +33,7 @@ import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.BiFunction;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
@@ -77,17 +78,12 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
 
         final AllocationService allocationService = mock(AllocationService.class);
 
-        final AtomicBoolean rejoined = new AtomicBoolean();
-        final AtomicReference<ClusterState> rejoinedClusterState = new AtomicReference<>();
-        final BiFunction<ClusterState, String, ClusterState> rejoin = (cs, r) -> {
-            rejoined.set(true);
-            rejoinedClusterState.set(ClusterState.builder(cs).build());
-            return rejoinedClusterState.get();
-        };
+        final AtomicBoolean rejoinCalled = new AtomicBoolean();
+        final Consumer<String> submitRejoin = source -> rejoinCalled.set(true);
 
         final AtomicReference<ClusterState> remainingNodesClusterState = new AtomicReference<>();
         final ZenDiscovery.NodeRemovalClusterStateTaskExecutor executor =
-                new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, rejoin, logger) {
+                new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, submitRejoin, logger) {
                     @Override
                     ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) {
                         remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder));
@@ -117,11 +113,11 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
 
         // ensure that we did not reroute
         verifyNoMoreInteractions(allocationService);
-        assertTrue(rejoined.get());
-        assertThat(result.resultingState, equalTo(rejoinedClusterState.get()));
+        assertTrue(rejoinCalled.get());
+        assertThat(result.resultingState, equalTo(clusterState));
 
         for (final ZenDiscovery.NodeRemovalClusterStateTaskExecutor.Task task : tasks) {
-            assertNull(result.resultingState.nodes().get(task.node().getId()));
+            assertNotNull(result.resultingState.nodes().get(task.node().getId()));
         }
     }
 
@@ -133,14 +129,11 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
         when(allocationService.deassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class)))
             .thenAnswer(im -> im.getArguments()[0]);
 
-        final BiFunction<ClusterState, String, ClusterState> rejoin = (cs, r) -> {
-            fail("rejoin should not be invoked");
-            return cs;
-        };
+        final Consumer<String> submitRejoin = source -> fail("rejoin should not be invoked");
 
         final AtomicReference<ClusterState> remainingNodesClusterState = new AtomicReference<>();
         final ZenDiscovery.NodeRemovalClusterStateTaskExecutor executor =
-                new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, rejoin, logger) {
+                new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, submitRejoin, logger) {
                     @Override
                     ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) {
                         remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder));