Przeglądaj źródła

IndexRoutingTable.initializeEmpty shouldn't override supplied primary RecoverySource (#20638)

When initializing a new index routing table, we make a decision where the primary shards should be recovered from. This can be an empty folder for new indices, a set of specific allocation ids for old indices or a snapshot. We currently allow callers of `IndexRoutingTable.initializeEmpty` to supply the source but also set it automatically if null is given. Sadly the current logic is reusing the supplied parameter to store the result of the automatic decision. This is flawed if some of the decision should be *different* between the different index shard (as the first decision that is maid sticks).

 This commit fixes this but also simplifies the API to always make an automatic decision.

This was discovered while working on #20637 which strengthens the testing infra and caused this to bubble up. I put it as a separate commit to make sure it is not lost as part of a bigger test only PR.
Boaz Leskes 9 lat temu
rodzic
commit
65356697ac

+ 23 - 19
core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java

@@ -31,7 +31,6 @@ import org.elasticsearch.cluster.routing.RecoverySource.LocalShardsRecoverySourc
 import org.elasticsearch.cluster.routing.RecoverySource.PeerRecoverySource;
 import org.elasticsearch.cluster.routing.RecoverySource.SnapshotRecoverySource;
 import org.elasticsearch.cluster.routing.RecoverySource.StoreRecoverySource;
-import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Randomness;
 import org.elasticsearch.common.collect.ImmutableOpenIntMap;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -359,31 +358,28 @@ public class IndexRoutingTable extends AbstractDiffable<IndexRoutingTable> imple
          * Initializes a new empty index, as if it was created from an API.
          */
         public Builder initializeAsNew(IndexMetaData indexMetaData) {
-            RecoverySource primaryRecoverySource = indexMetaData.getMergeSourceIndex() != null ?
-                LocalShardsRecoverySource.INSTANCE :
-                StoreRecoverySource.EMPTY_STORE_INSTANCE;
-            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null), primaryRecoverySource);
+            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null));
         }
 
         /**
          * Initializes an existing index.
          */
         public Builder initializeAsRecovery(IndexMetaData indexMetaData) {
-            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null), null);
+            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null));
         }
 
         /**
          * Initializes a new index caused by dangling index imported.
          */
         public Builder initializeAsFromDangling(IndexMetaData indexMetaData) {
-            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null), null);
+            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null));
         }
 
         /**
          * Initializes a new empty index, as as a result of opening a closed index.
          */
         public Builder initializeAsFromCloseToOpen(IndexMetaData indexMetaData) {
-            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null), null);
+            return initializeEmpty(indexMetaData, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null));
         }
 
         /**
@@ -435,28 +431,36 @@ public class IndexRoutingTable extends AbstractDiffable<IndexRoutingTable> imple
 
         /**
          * Initializes a new empty index, with an option to control if its from an API or not.
-         *
-         * @param primaryRecoverySource recovery source for primary shards. If null, it is automatically determined based on active
-         *                              allocation ids
          */
-        private Builder initializeEmpty(IndexMetaData indexMetaData, UnassignedInfo unassignedInfo, @Nullable RecoverySource primaryRecoverySource) {
+        private Builder initializeEmpty(IndexMetaData indexMetaData, UnassignedInfo unassignedInfo) {
             assert indexMetaData.getIndex().equals(index);
             if (!shards.isEmpty()) {
                 throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created");
             }
             for (int shardNumber = 0; shardNumber < indexMetaData.getNumberOfShards(); shardNumber++) {
                 ShardId shardId = new ShardId(index, shardNumber);
-                if (primaryRecoverySource == null) {
-                    if (indexMetaData.inSyncAllocationIds(shardNumber).isEmpty() && indexMetaData.getCreationVersion().onOrAfter(Version.V_5_0_0_alpha1)) {
-                        primaryRecoverySource = indexMetaData.getMergeSourceIndex() != null ? LocalShardsRecoverySource.INSTANCE : StoreRecoverySource.EMPTY_STORE_INSTANCE;
-                    } else {
-                        primaryRecoverySource = StoreRecoverySource.EXISTING_STORE_INSTANCE;
-                    }
+                final RecoverySource primaryRecoverySource;
+                if (indexMetaData.inSyncAllocationIds(shardNumber).isEmpty() == false) {
+                    // we have previous valid copies for this shard. use them for recovery
+                    primaryRecoverySource = StoreRecoverySource.EXISTING_STORE_INSTANCE;
+                } else if (indexMetaData.getCreationVersion().before(Version.V_5_0_0_alpha1) &&
+                    unassignedInfo.getReason() != UnassignedInfo.Reason.INDEX_CREATED // tests can create old indices
+                    ) {
+                    // the index is old and didn't maintain inSyncAllocationIds. Fall back to old behavior and require
+                    // finding existing copies
+                    primaryRecoverySource = StoreRecoverySource.EXISTING_STORE_INSTANCE;
+                } else if (indexMetaData.getMergeSourceIndex() != null) {
+                    // this is a new index but the initial shards should merged from another index
+                    primaryRecoverySource = LocalShardsRecoverySource.INSTANCE;
+                } else {
+                    // a freshly created index with no restriction
+                    primaryRecoverySource = StoreRecoverySource.EMPTY_STORE_INSTANCE;
                 }
                 IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId);
                 for (int i = 0; i <= indexMetaData.getNumberOfReplicas(); i++) {
                     boolean primary = i == 0;
-                    indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, primary, primary ? primaryRecoverySource : PeerRecoverySource.INSTANCE, unassignedInfo));
+                    indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, primary,
+                        primary ? primaryRecoverySource : PeerRecoverySource.INSTANCE, unassignedInfo));
                 }
                 shards.put(shardNumber, indexShardRoutingBuilder.build());
             }

+ 8 - 2
core/src/test/java/org/elasticsearch/cluster/health/ClusterStateHealthTests.java

@@ -277,9 +277,9 @@ public class ClusterStateHealthTests extends ESTestCase {
             // if the inactive primaries are due solely to recovery (not failed allocation or previously being allocated)
             // then cluster health is YELLOW, otherwise RED
             if (primaryInactiveDueToRecovery(indexName, clusterState)) {
-                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
+                assertThat("clusterState is:\n" + clusterState.prettyPrint(), health.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
             } else {
-                assertThat(health.getStatus(), equalTo(ClusterHealthStatus.RED));
+                assertThat("clusterState is:\n" + clusterState.prettyPrint(), health.getStatus(), equalTo(ClusterHealthStatus.RED));
             }
         }
     }
@@ -532,6 +532,12 @@ public class ClusterStateHealthTests extends ESTestCase {
                     primaryShard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE) {
                     return false;
                 }
+                if (primaryShard.unassignedInfo().getNumFailedAllocations() > 0) {
+                    return false;
+                }
+                if (primaryShard.unassignedInfo().getLastAllocationStatus() == UnassignedInfo.AllocationStatus.DECIDERS_NO) {
+                    return false;
+                }
             }
         }
         return true;