Browse Source

Don't break allocation if resize source index is missing (#29311)

DiskThresholdDecider currently assumes that the source index of a resize operation (e.g. shrink)
is available, and throws an IndexNotFoundException otherwise, thereby breaking any kind of shard
allocation. This can be quite harmful if the source index is deleted during a shrink, or if the source
index is unavailable during state recovery.

While this behavior has been partly fixed in 6.1 and above (due to #26931), it relies on the order in
which AllocationDeciders are executed (i.e. that ResizeAllocationDecider returns NO, ensuring that
DiskThresholdDecider does not run, something that for example does not hold for the allocation
explain API).

This change adds a more complete fix, and also solves the situation for 5.6.
Yannick Welsch 7 years ago
parent
commit
2dc546ccec

+ 8 - 5
server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

@@ -409,11 +409,14 @@ public class DiskThresholdDecider extends AllocationDecider {
             // the worst case
             long targetShardSize = 0;
             final Index mergeSourceIndex = metaData.getResizeSourceIndex();
-            final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex);
-            final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards());
-            for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
-                if (shardIds.contains(shardRoutingTable.shardId())) {
-                    targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
+            final IndexMetaData sourceIndexMeta = allocation.metaData().index(mergeSourceIndex);
+            if (sourceIndexMeta != null) {
+                final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(),
+                    sourceIndexMeta, metaData.getNumberOfShards());
+                for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
+                    if (shardIds.contains(shardRoutingTable.shardId())) {
+                        targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
+                    }
                 }
             }
             return targetShardSize == 0 ? defaultValue : targetShardSize;

+ 14 - 0
server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java

@@ -342,6 +342,20 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase {
         target2 = ShardRouting.newUnassigned(new ShardId(new Index("target2", "9101112"), 1),
             true, LocalShardsRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
         assertEquals(1000L, DiskThresholdDecider.getExpectedShardSize(target2, allocation, 0));
+
+        // check that the DiskThresholdDecider still works even if the source index has been deleted
+        ClusterState clusterStateWithMissingSourceIndex = ClusterState.builder(clusterState)
+            .metaData(MetaData.builder(metaData).remove("test"))
+            .routingTable(RoutingTable.builder(clusterState.routingTable()).remove("test").build())
+            .build();
+
+        allocationService.reroute(clusterState, "foo");
+
+        RoutingAllocation allocationWithMissingSourceIndex = new RoutingAllocation(null,
+            clusterStateWithMissingSourceIndex.getRoutingNodes(), clusterStateWithMissingSourceIndex, info, 0);
+
+        assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target, allocationWithMissingSourceIndex, 42L));
+        assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target2, allocationWithMissingSourceIndex, 42L));
     }
 
 }