|
@@ -25,6 +25,7 @@ import org.elasticsearch.cluster.ClusterState;
|
|
|
import org.elasticsearch.cluster.RestoreInProgress;
|
|
|
import org.elasticsearch.cluster.health.ClusterHealthStatus;
|
|
|
import org.elasticsearch.cluster.health.ClusterStateHealth;
|
|
|
+import org.elasticsearch.cluster.metadata.AutoExpandReplicas;
|
|
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
|
|
import org.elasticsearch.cluster.metadata.MetaData;
|
|
|
import org.elasticsearch.cluster.routing.RoutingNode;
|
|
@@ -46,6 +47,7 @@ import java.util.Collections;
|
|
|
import java.util.Comparator;
|
|
|
import java.util.Iterator;
|
|
|
import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
import java.util.function.Function;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
@@ -206,11 +208,12 @@ public class AllocationService extends AbstractComponent {
|
|
|
* unassigned an shards that are associated with nodes that are no longer part of the cluster, potentially promoting replicas
|
|
|
* if needed.
|
|
|
*/
|
|
|
- public ClusterState deassociateDeadNodes(final ClusterState clusterState, boolean reroute, String reason) {
|
|
|
- RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
|
|
|
+ public ClusterState deassociateDeadNodes(ClusterState clusterState, boolean reroute, String reason) {
|
|
|
+ ClusterState fixedClusterState = adaptAutoExpandReplicas(clusterState);
|
|
|
+ RoutingNodes routingNodes = getMutableRoutingNodes(fixedClusterState);
|
|
|
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
|
|
|
routingNodes.unassigned().shuffle();
|
|
|
- RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState,
|
|
|
+ RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, fixedClusterState,
|
|
|
clusterInfoService.getClusterInfo(), currentNanoTime());
|
|
|
|
|
|
// first, clear from the shards any node id they used to belong to that is now dead
|
|
@@ -220,12 +223,40 @@ public class AllocationService extends AbstractComponent {
|
|
|
reroute(allocation);
|
|
|
}
|
|
|
|
|
|
- if (allocation.routingNodesChanged() == false) {
|
|
|
+ if (fixedClusterState == clusterState && allocation.routingNodesChanged() == false) {
|
|
|
return clusterState;
|
|
|
}
|
|
|
return buildResultAndLogHealthChange(clusterState, allocation, reason);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Checks if the are replicas with the auto-expand feature that need to be adapted.
|
|
|
+ * Returns an updated cluster state if changes were necessary, or the identical cluster if no changes were required.
|
|
|
+ */
|
|
|
+ private ClusterState adaptAutoExpandReplicas(ClusterState clusterState) {
|
|
|
+ final Map<Integer, List<String>> autoExpandReplicaChanges =
|
|
|
+ AutoExpandReplicas.getAutoExpandReplicaChanges(clusterState.metaData(), clusterState.nodes());
|
|
|
+ if (autoExpandReplicaChanges.isEmpty()) {
|
|
|
+ return clusterState;
|
|
|
+ } else {
|
|
|
+ final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(clusterState.routingTable());
|
|
|
+ final MetaData.Builder metaDataBuilder = MetaData.builder(clusterState.metaData());
|
|
|
+ for (Map.Entry<Integer, List<String>> entry : autoExpandReplicaChanges.entrySet()) {
|
|
|
+ final int numberOfReplicas = entry.getKey();
|
|
|
+ final String[] indices = entry.getValue().toArray(new String[entry.getValue().size()]);
|
|
|
+ // we do *not* update the in sync allocation ids as they will be removed upon the first index
|
|
|
+ // operation which make these copies stale
|
|
|
+ routingTableBuilder.updateNumberOfReplicas(numberOfReplicas, indices);
|
|
|
+ metaDataBuilder.updateNumberOfReplicas(numberOfReplicas, indices);
|
|
|
+ logger.info("updating number_of_replicas to [{}] for indices {}", numberOfReplicas, indices);
|
|
|
+ }
|
|
|
+ final ClusterState fixedState = ClusterState.builder(clusterState).routingTable(routingTableBuilder.build())
|
|
|
+ .metaData(metaDataBuilder).build();
|
|
|
+ assert AutoExpandReplicas.getAutoExpandReplicaChanges(fixedState.metaData(), fixedState.nodes()).isEmpty();
|
|
|
+ return fixedState;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Removes delay markers from unassigned shards based on current time stamp.
|
|
|
*/
|
|
@@ -301,6 +332,7 @@ public class AllocationService extends AbstractComponent {
|
|
|
if (retryFailed) {
|
|
|
resetFailedAllocationCounter(allocation);
|
|
|
}
|
|
|
+
|
|
|
reroute(allocation);
|
|
|
return new CommandsResult(explanations, buildResultAndLogHealthChange(clusterState, allocation, "reroute commands"));
|
|
|
}
|
|
@@ -320,15 +352,17 @@ public class AllocationService extends AbstractComponent {
|
|
|
* <p>
|
|
|
* If the same instance of ClusterState is returned, then no change has been made.
|
|
|
*/
|
|
|
- protected ClusterState reroute(final ClusterState clusterState, String reason, boolean debug) {
|
|
|
- RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
|
|
|
+ protected ClusterState reroute(ClusterState clusterState, String reason, boolean debug) {
|
|
|
+ ClusterState fixedClusterState = adaptAutoExpandReplicas(clusterState);
|
|
|
+
|
|
|
+ RoutingNodes routingNodes = getMutableRoutingNodes(fixedClusterState);
|
|
|
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
|
|
|
routingNodes.unassigned().shuffle();
|
|
|
- RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState,
|
|
|
+ RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, fixedClusterState,
|
|
|
clusterInfoService.getClusterInfo(), currentNanoTime());
|
|
|
allocation.debugDecision(debug);
|
|
|
reroute(allocation);
|
|
|
- if (allocation.routingNodesChanged() == false) {
|
|
|
+ if (fixedClusterState == clusterState && allocation.routingNodesChanged() == false) {
|
|
|
return clusterState;
|
|
|
}
|
|
|
return buildResultAndLogHealthChange(clusterState, allocation, reason);
|
|
@@ -353,6 +387,8 @@ public class AllocationService extends AbstractComponent {
|
|
|
|
|
|
private void reroute(RoutingAllocation allocation) {
|
|
|
assert hasDeadNodes(allocation) == false : "dead nodes should be explicitly cleaned up. See deassociateDeadNodes";
|
|
|
+ assert AutoExpandReplicas.getAutoExpandReplicaChanges(allocation.metaData(), allocation.nodes()).isEmpty() :
|
|
|
+ "auto-expand replicas out of sync with number of nodes in the cluster";
|
|
|
|
|
|
// now allocate all the unassigned to available nodes
|
|
|
if (allocation.routingNodes().unassigned().size() > 0) {
|