Ver Fonte

Fix SearchableSnapshotAllocator for Replica Shards (#67135)

The assertions in the existing `SnapshotSizeInfo` logic tripped for non-primary shards.
This commit adds a path to the size data without those assertions and a test that the
allocator works correctly for replica shards.
Armin Braun há 4 anos atrás
pai
commit
140a481aaf

+ 44 - 0
x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotAllocationIntegTests.java

@@ -7,6 +7,7 @@
 package org.elasticsearch.xpack.searchablesnapshots;
 
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeUnit;
@@ -15,9 +16,12 @@ import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.xpack.searchablesnapshots.cache.CacheService;
 
 import java.util.List;
+import java.util.Set;
 
 import static org.elasticsearch.index.IndexSettings.INDEX_SOFT_DELETES_SETTING;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.hamcrest.Matchers.in;
+import static org.hamcrest.Matchers.is;
 
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
 public class SearchableSnapshotAllocationIntegTests extends BaseSearchableSnapshotsIntegTestCase {
@@ -62,6 +66,46 @@ public class SearchableSnapshotAllocationIntegTests extends BaseSearchableSnapsh
         );
     }
 
+    public void testAllocatesReplicaToBestAvailableNodeOnRestart() throws Exception {
+        internalCluster().startMasterOnlyNode();
+        final String firstDataNode = internalCluster().startDataOnlyNode();
+        final String secondDataNode = internalCluster().startDataOnlyNode();
+        final String index = "test-idx";
+        createIndexWithContent(index, indexSettingsNoReplicas(1).put(INDEX_SOFT_DELETES_SETTING.getKey(), true).build());
+        final String repoName = "test-repo";
+        createRepository(repoName, "fs");
+        final String snapshotName = "test-snapshot";
+        createSnapshot(repoName, snapshotName, List.of(index));
+        assertAcked(client().admin().indices().prepareDelete(index));
+        final String restoredIndex = mountSnapshot(
+            repoName,
+            snapshotName,
+            index,
+            Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build()
+        );
+        ensureGreen(restoredIndex);
+        internalCluster().startDataOnlyNodes(randomIntBetween(1, 4));
+
+        setAllocation(EnableAllocationDecider.Allocation.NONE);
+
+        internalCluster().getInstance(CacheService.class, firstDataNode).synchronizeCache();
+        internalCluster().getInstance(CacheService.class, secondDataNode).synchronizeCache();
+        internalCluster().restartNode(firstDataNode);
+        internalCluster().restartNode(secondDataNode);
+        ensureStableCluster(internalCluster().numDataAndMasterNodes());
+
+        setAllocation(EnableAllocationDecider.Allocation.ALL);
+        ensureGreen(restoredIndex);
+
+        final ClusterState state = client().admin().cluster().prepareState().get().getState();
+        final Set<String> nodesWithCache = Set.of(
+            state.nodes().resolveNode(firstDataNode).getId(),
+            state.nodes().resolveNode(secondDataNode).getId()
+        );
+        assertThat(state.routingTable().index(restoredIndex).shard(0).primaryShard().currentNodeId(), is(in(nodesWithCache)));
+        assertThat(state.routingTable().index(restoredIndex).shard(0).replicaShards().get(0).currentNodeId(), is(in(nodesWithCache)));
+    }
+
     private void setAllocation(EnableAllocationDecider.Allocation allocation) {
         logger.info("--> setting allocation to [{}]", allocation);
         assertAcked(

+ 9 - 2
x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotAllocator.java

@@ -26,6 +26,7 @@ import org.elasticsearch.cluster.routing.allocation.FailedShard;
 import org.elasticsearch.cluster.routing.allocation.NodeAllocationResult;
 import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
 import org.elasticsearch.cluster.routing.allocation.decider.Decision;
+import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.collect.Tuple;
@@ -104,7 +105,6 @@ public class SearchableSnapshotAllocator implements ExistingShardsAllocator {
             && (shardRouting.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE
                 || shardRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE)) {
             // we always force snapshot recovery source to use the snapshot-based recovery process on the node
-
             final Settings indexSettings = allocation.metadata().index(shardRouting.index()).getSettings();
             final IndexId indexId = new IndexId(
                 SNAPSHOT_INDEX_NAME_SETTING.get(indexSettings),
@@ -136,7 +136,14 @@ public class SearchableSnapshotAllocator implements ExistingShardsAllocator {
                 unassignedAllocationHandler.initialize(
                     allocateUnassignedDecision.getTargetNode().getId(),
                     allocateUnassignedDecision.getAllocationId(),
-                    allocation.snapshotShardSizeInfo().getShardSize(shardRouting, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE),
+                    DiskThresholdDecider.getExpectedShardSize(
+                        shardRouting,
+                        ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE,
+                        allocation.clusterInfo(),
+                        allocation.snapshotShardSizeInfo(),
+                        allocation.metadata(),
+                        allocation.routingTable()
+                    ),
                     allocation.changes()
                 );
             } else {