11 years ago · 694bf287d6
--- a/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java
+++ b/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java
@@ -367,7 +367,8 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
 
				                 if (sendRefreshMapping) {
			
 
				                     nodeMappingRefreshAction.nodeMappingRefresh(event.state(),
			
 
				                             new NodeMappingRefreshAction.NodeMappingRefreshRequest(index, indexMetaData.uuid(),
			
 
				-                                    typesToRefresh.toArray(new String[typesToRefresh.size()]), event.state().nodes().localNodeId()));
			
 
				+                                    typesToRefresh.toArray(new String[typesToRefresh.size()]), event.state().nodes().localNodeId())
			
 
				+                    );
			
 
				                 }
			
 
				             }
			
 
				             // go over and remove mappings
			
@@ -489,7 +490,8 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
 
				         }
			
 
				 
			
 
				         RoutingTable routingTable = event.state().routingTable();
			
 
				-        RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());;
			
 
				+        RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
			
 
				+
			
 
				         if (routingNode == null) {
			
 
				             failedShards.clear();
			
 
				             return;
			
@@ -551,7 +553,8 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
 
				 
			
 
				     private void cleanFailedShards(final ClusterChangedEvent event) {
			
 
				         RoutingTable routingTable = event.state().routingTable();
			
 
				-        RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());;
			
 
				+        RoutingNodes.RoutingNodeIterator routingNode = event.state().readOnlyRoutingNodes().routingNodeIter(event.state().nodes().localNodeId());
			
 
				+
			
 
				         if (routingNode == null) {
			
 
				             failedShards.clear();
			
 
				             return;
			
@@ -611,10 +614,42 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
 
				                 return;
			
 
				             } else {
			
 
				                 if (indexShard.ignoreRecoveryAttempt()) {
			
 
				+                    logger.trace("ignoring recovery instruction for an existing shard {} (shard state: [{}])", indexShard.shardId(), indexShard.state());
			
 
				                     return;
			
 
				                 }
			
 
				             }
			
 
				         }
			
 
				+
			
 
				+        // figure out where to recover from (node or disk, in which case sourceNode is null)
			
 
				+        DiscoveryNode sourceNode = null;
			
 
				+        if (!shardRouting.primary()) {
			
 
				+            IndexShardRoutingTable shardRoutingTable = routingTable.index(shardRouting.index()).shard(shardRouting.id());
			
 
				+            for (ShardRouting entry : shardRoutingTable) {
			
 
				+                if (entry.primary() && entry.started()) {
			
 
				+                    // only recover from started primary, if we can't find one, we will do it next round
			
 
				+                    sourceNode = nodes.get(entry.currentNodeId());
			
 
				+                    if (sourceNode == null) {
			
 
				+                        logger.trace("can't recover replica because primary shard {} is assigned to an unknown node. ignoring.", entry);
			
 
				+                        return;
			
 
				+                    }
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            if (sourceNode == null) {
			
 
				+                logger.trace("can't recover replica for {} because a primary shard can not be found. ignoring.", shardRouting.shardId());
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+        } else if (shardRouting.relocatingNodeId() != null) {
			
 
				+            sourceNode = nodes.get(shardRouting.relocatingNodeId());
			
 
				+            if (sourceNode == null) {
			
 
				+                logger.trace("can't recover from remote primary shard {} because it is assigned to an unknown node [{}]. ignoring.", shardRouting.shardId(), shardRouting.relocatingNodeId());
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+
			
 
				         // if there is no shard, create it
			
 
				         if (!indexService.hasShard(shardId)) {
			
 
				             if (failedShards.containsKey(shardRouting.shardId())) {
			
@@ -650,63 +685,45 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
 
				         if (indexShard.ignoreRecoveryAttempt()) {
			
 
				             // we are already recovering (we can get to this state since the cluster event can happen several
			
 
				             // times while we recover)
			
 
				+            logger.trace("ignoring recovery instruction for shard {} (shard state: [{}])", indexShard.shardId(), indexShard.state());
			
 
				             return;
			
 
				         }
			
 
				 
			
 
				+        if (sourceNode != null) {
			
 
				+            try {
			
 
				+                // we don't mark this one as relocated at the end.
			
 
				+                // For primaries: requests in any case are routed to both when its relocating and that way we handle
			
 
				+                //    the edge case where its mark as relocated, and we might need to roll it back...
			
 
				+                // For replicas: we are recovering a backup from a primary
			
 
				+
			
 
				+                RecoveryState.Type type = shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA;
			
 
				+                final StartRecoveryRequest request = new StartRecoveryRequest(indexShard.shardId(), sourceNode, nodes.localNode(),
			
 
				+                        false, indexShard.store().list(), type);
			
 
				+                recoveryTarget.startRecovery(request, indexShard, new PeerRecoveryListener(request, shardRouting, indexService, indexMetaData));
			
 
				 
			
 
				-        if (!shardRouting.primary()) {
			
 
				-            // recovery from primary
			
 
				-            IndexShardRoutingTable shardRoutingTable = routingTable.index(shardRouting.index()).shard(shardRouting.id());
			
 
				-            for (ShardRouting entry : shardRoutingTable) {
			
 
				-                if (entry.primary() && entry.started()) {
			
 
				-                    // only recover from started primary, if we can't find one, we will do it next round
			
 
				-                    final DiscoveryNode sourceNode = nodes.get(entry.currentNodeId());
			
 
				-                    try {
			
 
				-                        // we are recovering a backup from a primary, so no need to mark it as relocated
			
 
				-                        final StartRecoveryRequest request = new StartRecoveryRequest(indexShard.shardId(), sourceNode, nodes.localNode(),
			
 
				-                                false, indexShard.store().list(), RecoveryState.Type.REPLICA);
			
 
				-                        recoveryTarget.startRecovery(request, indexShard, new PeerRecoveryListener(request, shardRouting, indexService, indexMetaData));
			
 
				-                    } catch (Throwable e) {
			
 
				-                        handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e);
			
 
				-                        break;
			
 
				-                    }
			
 
				-                    break;
			
 
				-                }
			
 
				+            } catch (Throwable e) {
			
 
				+                handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e);
			
 
				             }
			
 
				         } else {
			
 
				-            if (shardRouting.relocatingNodeId() == null) {
			
 
				-                // we are the first primary, recover from the gateway
			
 
				-                // if its post api allocation, the index should exists
			
 
				-                boolean indexShouldExists = indexShardRouting.primaryAllocatedPostApi();
			
 
				-                IndexShardGatewayService shardGatewayService = indexService.shardInjector(shardId).getInstance(IndexShardGatewayService.class);
			
 
				-                shardGatewayService.recover(indexShouldExists, new IndexShardGatewayService.RecoveryListener() {
			
 
				-                    @Override
			
 
				-                    public void onRecoveryDone() {
			
 
				-                        shardStateAction.shardStarted(shardRouting, indexMetaData.getUUID(), "after recovery from gateway");
			
 
				-                    }
			
 
				+            // we are the first primary, recover from the gateway
			
 
				+            // if its post api allocation, the index should exists
			
 
				+            boolean indexShouldExists = indexShardRouting.primaryAllocatedPostApi();
			
 
				+            IndexShardGatewayService shardGatewayService = indexService.shardInjector(shardId).getInstance(IndexShardGatewayService.class);
			
 
				+            shardGatewayService.recover(indexShouldExists, new IndexShardGatewayService.RecoveryListener() {
			
 
				+                @Override
			
 
				+                public void onRecoveryDone() {
			
 
				+                    shardStateAction.shardStarted(shardRouting, indexMetaData.getUUID(), "after recovery from gateway");
			
 
				+                }
			
 
				 
			
 
				-                    @Override
			
 
				-                    public void onIgnoreRecovery(String reason) {
			
 
				-                    }
			
 
				+                @Override
			
 
				+                public void onIgnoreRecovery(String reason) {
			
 
				+                }
			
 
				 
			
 
				-                    @Override
			
 
				-                    public void onRecoveryFailed(IndexShardGatewayRecoveryException e) {
			
 
				-                        handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e);
			
 
				-                    }
			
 
				-                });
			
 
				-            } else {
			
 
				-                // relocating primaries, recovery from the relocating shard
			
 
				-                final DiscoveryNode sourceNode = nodes.get(shardRouting.relocatingNodeId());
			
 
				-                try {
			
 
				-                    // we don't mark this one as relocated at the end, requests in any case are routed to both when its relocating
			
 
				-                    // and that way we handle the edge case where its mark as relocated, and we might need to roll it back...
			
 
				-                    final StartRecoveryRequest request = new StartRecoveryRequest(indexShard.shardId(), sourceNode, nodes.localNode(),
			
 
				-                            false, indexShard.store().list(), RecoveryState.Type.RELOCATION);
			
 
				-                    recoveryTarget.startRecovery(request, indexShard, new PeerRecoveryListener(request, shardRouting, indexService, indexMetaData));
			
 
				-                } catch (Throwable e) {
			
 
				+                @Override
			
 
				+                public void onRecoveryFailed(IndexShardGatewayRecoveryException e) {
			
 
				                     handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e);
			
 
				                 }
			
 
				-            }
			
 
				+            });
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java
+++ b/src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java
@@ -187,10 +187,9 @@ public class RecoveryTarget extends AbstractComponent {
 
				     }
			
 
				 
			
 
				     private void doRecovery(final StartRecoveryRequest request, final RecoveryStatus recoveryStatus, final RecoveryListener listener) {
			
 
				-        if (request.sourceNode() == null) {
			
 
				-            listener.onIgnoreRecovery(false, "No node to recover from, retry on next cluster state update");
			
 
				-            return;
			
 
				-        }
			
 
				+
			
 
				+        assert request.sourceNode() != null : "can't do a recovery without a source node";
			
 
				+
			
 
				         final InternalIndexShard shard = recoveryStatus.indexShard;
			
 
				         if (shard == null) {
			
 
				             listener.onIgnoreRecovery(false, "shard missing locally, stop recovery");
			
--- a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
+++ b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
@@ -161,7 +161,7 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
 
				     }
			
 
				 
			
 
				     @Test
			
 
				-    @TestLogging("cluster.service:TRACE,discovery:TRACE")
			
 
				+    @TestLogging("cluster.service:TRACE,discovery:TRACE,indices.cluster:TRACE")
			
 
				     public void multipleNodesShutdownNonMasterNodes() throws Exception {
			
 
				         Settings settings = settingsBuilder()
			
 
				                 .put("discovery.type", "zen")