Browse Source

Take initializing shards into consideration during awareness allocation

It makes decision consistent.
Fixes #12522
Masaru Hasegawa 10 years ago
parent
commit
5ae00a6129

+ 1 - 1
core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java

@@ -188,7 +188,7 @@ public class AwarenessAllocationDecider extends AllocationDecider {
                 if (assignedShard.relocating()) {
                     RoutingNode relocationNode = allocation.routingNodes().node(assignedShard.relocatingNodeId());
                     shardPerAttribute.addTo(relocationNode.node().attributes().get(awarenessAttribute), 1);
-                } else if (assignedShard.started()) {
+                } else if (assignedShard.started() || assignedShard.initializing()) {
                     RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId());
                     shardPerAttribute.addTo(routingNode.node().attributes().get(awarenessAttribute), 1);
                 }

+ 42 - 1
core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java

@@ -785,7 +785,7 @@ public class AwarenessAllocationTests extends ESAllocationTestCase {
 
         ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT).metaData(metaData).routingTable(routingTable).build();
 
-        logger.info("--> adding two nodes on same rack and do rerouting");
+        logger.info("--> adding two nodes in different zones and do rerouting");
         clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder()
                 .put(newNode("A-0", ImmutableMap.of("zone", "a")))
                 .put(newNode("B-0", ImmutableMap.of("zone", "b")))
@@ -826,4 +826,45 @@ public class AwarenessAllocationTests extends ESAllocationTestCase {
         assertThat(clusterState.getRoutingNodes().node("A-0").size(), equalTo(3));
         assertThat(clusterState.getRoutingNodes().node("B-0").size(), equalTo(5));
     }
+
+    @Test
+    public void testUnassignedShardsWithUnbalancedZones() {
+        AllocationService strategy = createAllocationService(settingsBuilder()
+                .put("cluster.routing.allocation.concurrent_recoveries", 10)
+                .put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, "always")
+                .put("cluster.routing.allocation.awareness.attributes", "zone")
+                .build());
+
+        logger.info("Building initial routing table for 'testUnassignedShardsWithUnbalancedZones'");
+
+        MetaData metaData = MetaData.builder()
+                .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(4))
+                .build();
+
+        RoutingTable routingTable = RoutingTable.builder()
+                .addAsNew(metaData.index("test"))
+                .build();
+
+        ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT).metaData(metaData).routingTable(routingTable).build();
+
+        logger.info("--> adding 5 nodes in different zones and do rerouting");
+        clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder()
+                        .put(newNode("A-0", ImmutableMap.of("zone", "a")))
+                        .put(newNode("A-1", ImmutableMap.of("zone", "a")))
+                        .put(newNode("A-2", ImmutableMap.of("zone", "a")))
+                        .put(newNode("A-3", ImmutableMap.of("zone", "a")))
+                        .put(newNode("B-0", ImmutableMap.of("zone", "b")))
+        ).build();
+        routingTable = strategy.reroute(clusterState).routingTable();
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
+        assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0));
+        assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
+
+        logger.info("--> start the shard (primary)");
+        routingTable = strategy.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)).routingTable();
+        clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
+        assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
+        assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(3));
+        assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1)); // Unassigned shard is expected.
+    }
 }

+ 5 - 0
docs/reference/modules/cluster/allocation_awareness.asciidoc

@@ -69,6 +69,11 @@ cluster.routing.allocation.awareness.attributes: rack_id,zone
 NOTE: When using awareness attributes, shards will not be allocated to
 nodes that don't have values set for those attributes.
 
+NOTE: Number of primary/replica of a shard allocated on a specific group
+of nodes with the same awareness attribute value is determined by the number
+of attribute values. When the number of nodes in groups is unbalanced and
+there are many replicas, replica shards may be left unassigned.
+
 [float]
 [[forced-awareness]]
 === Forced Awareness