|
@@ -24,6 +24,7 @@ import org.elasticsearch.cluster.routing.RoutingTable;
|
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
|
import org.elasticsearch.cluster.routing.ShardRoutingState;
|
|
import org.elasticsearch.cluster.routing.ShardRoutingState;
|
|
import org.elasticsearch.cluster.routing.TestShardRouting;
|
|
import org.elasticsearch.cluster.routing.TestShardRouting;
|
|
|
|
+import org.elasticsearch.cluster.routing.UnassignedInfo;
|
|
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
|
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
|
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
|
|
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
|
|
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
|
|
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
|
|
@@ -34,12 +35,14 @@ import org.elasticsearch.cluster.routing.allocation.decider.NodeReplacementAlloc
|
|
import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider;
|
|
import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider;
|
|
import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.common.settings.Settings;
|
|
import org.elasticsearch.common.transport.TransportAddress;
|
|
import org.elasticsearch.common.transport.TransportAddress;
|
|
|
|
+import org.elasticsearch.gateway.GatewayAllocator;
|
|
import org.elasticsearch.index.Index;
|
|
import org.elasticsearch.index.Index;
|
|
import org.elasticsearch.index.shard.ShardId;
|
|
import org.elasticsearch.index.shard.ShardId;
|
|
import org.elasticsearch.node.Node;
|
|
import org.elasticsearch.node.Node;
|
|
import org.elasticsearch.snapshots.SnapshotShardSizeInfo;
|
|
import org.elasticsearch.snapshots.SnapshotShardSizeInfo;
|
|
import org.elasticsearch.snapshots.SnapshotsInfoService;
|
|
import org.elasticsearch.snapshots.SnapshotsInfoService;
|
|
import org.elasticsearch.test.ESTestCase;
|
|
import org.elasticsearch.test.ESTestCase;
|
|
|
|
+import org.elasticsearch.test.gateway.TestGatewayAllocator;
|
|
import org.hamcrest.Matcher;
|
|
import org.hamcrest.Matcher;
|
|
import org.junit.Before;
|
|
import org.junit.Before;
|
|
|
|
|
|
@@ -47,6 +50,7 @@ import java.util.Collections;
|
|
import java.util.HashMap;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
|
|
+import java.util.Set;
|
|
import java.util.concurrent.atomic.AtomicReference;
|
|
import java.util.concurrent.atomic.AtomicReference;
|
|
|
|
|
|
import static org.hamcrest.Matchers.allOf;
|
|
import static org.hamcrest.Matchers.allOf;
|
|
@@ -54,6 +58,9 @@ import static org.hamcrest.Matchers.containsString;
|
|
import static org.hamcrest.Matchers.equalTo;
|
|
import static org.hamcrest.Matchers.equalTo;
|
|
import static org.hamcrest.Matchers.is;
|
|
import static org.hamcrest.Matchers.is;
|
|
import static org.hamcrest.Matchers.nullValue;
|
|
import static org.hamcrest.Matchers.nullValue;
|
|
|
|
+import static org.mockito.ArgumentMatchers.anyInt;
|
|
|
|
+import static org.mockito.Mockito.doAnswer;
|
|
|
|
+import static org.mockito.Mockito.spy;
|
|
|
|
|
|
public class TransportGetShutdownStatusActionTests extends ESTestCase {
|
|
public class TransportGetShutdownStatusActionTests extends ESTestCase {
|
|
public static final String SHUTTING_DOWN_NODE_ID = "node1";
|
|
public static final String SHUTTING_DOWN_NODE_ID = "node1";
|
|
@@ -122,6 +129,7 @@ public class TransportGetShutdownStatusActionTests extends ESTestCase {
|
|
clusterInfoService,
|
|
clusterInfoService,
|
|
snapshotsInfoService
|
|
snapshotsInfoService
|
|
);
|
|
);
|
|
|
|
+ allocationService.setExistingShardsAllocators(Map.of(GatewayAllocator.ALLOCATOR_NAME, new TestGatewayAllocator()));
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -349,6 +357,60 @@ public class TransportGetShutdownStatusActionTests extends ESTestCase {
|
|
);
|
|
);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * Ensure we can detect stalled migrations when we have unassigned shards that had the shutting down node as their last known
|
|
|
|
+ * node id
|
|
|
|
+ */
|
|
|
|
+ public void testStalledUnassigned() {
|
|
|
|
+ Index index = new Index(randomAlphaOfLength(5), randomAlphaOfLengthBetween(1, 20));
|
|
|
|
+ IndexMetadata imd = spy(generateIndexMetadata(index, 3, 0));
|
|
|
|
+ // make sure the TestGatewayAllocator stays in sync always, avoid flaky tests
|
|
|
|
+ doAnswer(i -> {
|
|
|
|
+ if ((Integer) i.getArgument(0) < 2) {
|
|
|
|
+ return Set.of(LIVE_NODE_ID);
|
|
|
|
+ }
|
|
|
|
+ return Set.of(SHUTTING_DOWN_NODE_ID);
|
|
|
|
+ }).when(imd).inSyncAllocationIds(anyInt());
|
|
|
|
+
|
|
|
|
+ var shard0 = TestShardRouting.newShardRouting(new ShardId(index, 0), LIVE_NODE_ID, true, ShardRoutingState.STARTED);
|
|
|
|
+ var shard1 = TestShardRouting.newShardRouting(new ShardId(index, 1), LIVE_NODE_ID, true, ShardRoutingState.STARTED);
|
|
|
|
+
|
|
|
|
+ // we should stall the node if we find an unassigned shard with lastAllocatedNodeId matching the shutting down node
|
|
|
|
+ var unassigned = makeUnassignedShard(index, 2, SHUTTING_DOWN_NODE_ID, true);
|
|
|
|
+
|
|
|
|
+ assertShardMigration(
|
|
|
|
+ getUnassignedShutdownStatus(index, imd, shard0, shard1, unassigned),
|
|
|
|
+ SingleNodeShutdownMetadata.Status.STALLED,
|
|
|
|
+ 1,
|
|
|
|
+ allOf(containsString(index.getName()), containsString("[2] [primary]"))
|
|
|
|
+ );
|
|
|
|
+
|
|
|
|
+ // if the shard is unassigned, but it's not a primary on this node, we shouldn't stall
|
|
|
|
+ var shard2 = TestShardRouting.newShardRouting(new ShardId(index, 2), LIVE_NODE_ID, true, ShardRoutingState.STARTED);
|
|
|
|
+ var unassignedReplica = makeUnassignedShard(index, 2, SHUTTING_DOWN_NODE_ID, false);
|
|
|
|
+
|
|
|
|
+ var s = getUnassignedShutdownStatus(index, imd, shard0, shard1, shard2, unassignedReplica);
|
|
|
|
+ assertShardMigration(s, SingleNodeShutdownMetadata.Status.COMPLETE, 0, nullValue());
|
|
|
|
+
|
|
|
|
+ // check if we correctly count all of the unassigned shards
|
|
|
|
+ var unassigned3 = makeUnassignedShard(index, 3, SHUTTING_DOWN_NODE_ID, true);
|
|
|
|
+
|
|
|
|
+ assertShardMigration(
|
|
|
|
+ getUnassignedShutdownStatus(index, imd, shard0, shard1, unassigned3, unassigned),
|
|
|
|
+ SingleNodeShutdownMetadata.Status.STALLED,
|
|
|
|
+ 2,
|
|
|
|
+ allOf(containsString(index.getName()), containsString("[2] [primary]"))
|
|
|
|
+ );
|
|
|
|
+
|
|
|
|
+ // check if we correctly walk all of the unassigned shards, shard 2 replica, shard 3 primary
|
|
|
|
+ assertShardMigration(
|
|
|
|
+ getUnassignedShutdownStatus(index, imd, shard0, shard1, shard2, unassignedReplica, unassigned3),
|
|
|
|
+ SingleNodeShutdownMetadata.Status.STALLED,
|
|
|
|
+ 1,
|
|
|
|
+ allOf(containsString(index.getName()), containsString("[3] [primary]"))
|
|
|
|
+ );
|
|
|
|
+ }
|
|
|
|
+
|
|
public void testNotStalledIfAllShardsHaveACopyOnAnotherNode() {
|
|
public void testNotStalledIfAllShardsHaveACopyOnAnotherNode() {
|
|
Index index = new Index(randomAlphaOfLength(5), randomAlphaOfLengthBetween(1, 20));
|
|
Index index = new Index(randomAlphaOfLength(5), randomAlphaOfLengthBetween(1, 20));
|
|
IndexMetadata imd = generateIndexMetadata(index, 3, 0);
|
|
IndexMetadata imd = generateIndexMetadata(index, 3, 0);
|
|
@@ -571,4 +633,62 @@ public class TransportGetShutdownStatusActionTests extends ESTestCase {
|
|
.routingTable(indexRoutingTable)
|
|
.routingTable(indexRoutingTable)
|
|
.build();
|
|
.build();
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ private UnassignedInfo makeUnassignedInfo(String nodeId) {
|
|
|
|
+ return new UnassignedInfo(
|
|
|
|
+ UnassignedInfo.Reason.ALLOCATION_FAILED,
|
|
|
|
+ "testing",
|
|
|
|
+ null,
|
|
|
|
+ 1,
|
|
|
|
+ System.nanoTime(),
|
|
|
|
+ System.currentTimeMillis(),
|
|
|
|
+ false,
|
|
|
|
+ UnassignedInfo.AllocationStatus.NO_ATTEMPT,
|
|
|
|
+ Collections.emptySet(),
|
|
|
|
+ nodeId
|
|
|
|
+ );
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private ShardRouting makeUnassignedShard(Index index, int shardId, String nodeId, boolean primary) {
|
|
|
|
+ var unsignedInfo = makeUnassignedInfo(nodeId);
|
|
|
|
+
|
|
|
|
+ return TestShardRouting.newShardRouting(
|
|
|
|
+ new ShardId(index, shardId),
|
|
|
|
+ null,
|
|
|
|
+ null,
|
|
|
|
+ primary,
|
|
|
|
+ ShardRoutingState.UNASSIGNED,
|
|
|
|
+ unsignedInfo
|
|
|
|
+ );
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private ShutdownShardMigrationStatus getUnassignedShutdownStatus(Index index, IndexMetadata imd, ShardRouting... shards) {
|
|
|
|
+ var indexRoutingTableBuilder = IndexRoutingTable.builder(index);
|
|
|
|
+
|
|
|
|
+ for (var routing : shards) {
|
|
|
|
+ indexRoutingTableBuilder.addShard(routing);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ var indexRoutingTable = indexRoutingTableBuilder.build();
|
|
|
|
+
|
|
|
|
+ // Force a decision of NO for all moves and new allocations, simulating a decider that's stuck
|
|
|
|
+ canAllocate.set((r, n, a) -> Decision.NO);
|
|
|
|
+ // And the remain decider simulates NodeShutdownAllocationDecider
|
|
|
|
+ canRemain.set((r, n, a) -> n.nodeId().equals(SHUTTING_DOWN_NODE_ID) ? Decision.NO : Decision.YES);
|
|
|
|
+
|
|
|
|
+ RoutingTable.Builder routingTable = RoutingTable.builder();
|
|
|
|
+ routingTable.add(indexRoutingTable);
|
|
|
|
+ ClusterState state = createTestClusterState(routingTable.build(), List.of(imd), SingleNodeShutdownMetadata.Type.REMOVE);
|
|
|
|
+
|
|
|
|
+ return TransportGetShutdownStatusAction.shardMigrationStatus(
|
|
|
|
+ state,
|
|
|
|
+ SHUTTING_DOWN_NODE_ID,
|
|
|
|
+ SingleNodeShutdownMetadata.Type.REMOVE,
|
|
|
|
+ true,
|
|
|
|
+ clusterInfoService,
|
|
|
|
+ snapshotsInfoService,
|
|
|
|
+ allocationService,
|
|
|
|
+ allocationDeciders
|
|
|
|
+ );
|
|
|
|
+ }
|
|
}
|
|
}
|