Browse Source

Clarify allocation explain if random shard chosen (#75670)

Today we often encounter users that are confused by the behaviour of
calling `GET _cluster/allocation/explain` without a body: it _seems_ to
work, but it explains a random shard, and if this isn't the shard
they're thinking of then it's unclear how to proceed.

With this commit we add a note to the response when a shard was randomly
chosen indicating that it is possible, and possibly useful, to explain a
different shard. We also adjust the exception message in the case when
all shards are assigned to indicate why it's an invalid request and what
to do to make it valid.
David Turner 4 years ago
parent
commit
95edc6deb2

+ 2 - 0
docs/reference/cluster/allocation-explain.asciidoc

@@ -25,6 +25,8 @@ GET _cluster/allocation/explain
 
 `GET _cluster/allocation/explain`
 
+`POST _cluster/allocation/explain`
+
 [[cluster-allocation-explain-api-prereqs]]
 ==== {api-prereq-title}
 

+ 1 - 1
rest-api-spec/src/main/resources/rest-api-spec/api/cluster.allocation_explain.json

@@ -32,7 +32,7 @@
       }
     },
     "body":{
-      "description":"The index, shard, and primary flag to explain. Empty means 'explain the first unassigned shard'"
+      "description":"The index, shard, and primary flag to explain. Empty means 'explain a randomly-chosen unassigned shard'"
     }
   }
 }

+ 31 - 3
server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.action.admin.cluster.allocation;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.cluster.ClusterInfo;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.routing.ShardRouting;
@@ -36,15 +37,27 @@ import static org.elasticsearch.cluster.routing.allocation.AbstractAllocationDec
  */
 public final class ClusterAllocationExplanation implements ToXContentObject, Writeable {
 
+    static final String NO_SHARD_SPECIFIED_MESSAGE = "No shard was specified in the explain API request, so this response " +
+        "explains a randomly chosen unassigned shard. There may be other unassigned shards in this cluster which cannot be assigned for " +
+        "different reasons. It may not be possible to assign this shard until one of the other shards is assigned correctly. To explain " +
+        "the allocation of other shards (whether assigned or unassigned) you must specify the target shard in the request to this API.";
+
+    private final boolean specificShard;
     private final ShardRouting shardRouting;
     private final DiscoveryNode currentNode;
     private final DiscoveryNode relocationTargetNode;
     private final ClusterInfo clusterInfo;
     private final ShardAllocationDecision shardAllocationDecision;
 
-    public ClusterAllocationExplanation(ShardRouting shardRouting, @Nullable DiscoveryNode currentNode,
-                                        @Nullable DiscoveryNode relocationTargetNode, @Nullable ClusterInfo clusterInfo,
-                                        ShardAllocationDecision shardAllocationDecision) {
+    public ClusterAllocationExplanation(
+        boolean specificShard,
+        ShardRouting shardRouting,
+        @Nullable DiscoveryNode currentNode,
+        @Nullable DiscoveryNode relocationTargetNode,
+        @Nullable ClusterInfo clusterInfo,
+        ShardAllocationDecision shardAllocationDecision) {
+
+        this.specificShard = specificShard;
         this.shardRouting = shardRouting;
         this.currentNode = currentNode;
         this.relocationTargetNode = relocationTargetNode;
@@ -53,6 +66,11 @@ public final class ClusterAllocationExplanation implements ToXContentObject, Wri
     }
 
     public ClusterAllocationExplanation(StreamInput in) throws IOException {
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            this.specificShard = in.readBoolean();
+        } else {
+            this.specificShard = true; // suppress "this is a random shard" warning in BwC situations
+        }
         this.shardRouting = new ShardRouting(in);
         this.currentNode = in.readOptionalWriteable(DiscoveryNode::new);
         this.relocationTargetNode = in.readOptionalWriteable(DiscoveryNode::new);
@@ -62,6 +80,9 @@ public final class ClusterAllocationExplanation implements ToXContentObject, Wri
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeBoolean(specificShard);
+        } // else suppress "this is a random shard" warning in BwC situations
         shardRouting.writeTo(out);
         out.writeOptionalWriteable(currentNode);
         out.writeOptionalWriteable(relocationTargetNode);
@@ -69,6 +90,10 @@ public final class ClusterAllocationExplanation implements ToXContentObject, Wri
         shardAllocationDecision.writeTo(out);
     }
 
+    public boolean isSpecificShard() {
+        return specificShard;
+    }
+
     /**
      * Returns the shard that the explanation is about.
      */
@@ -131,6 +156,9 @@ public final class ClusterAllocationExplanation implements ToXContentObject, Wri
 
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject(); {
+            if (isSpecificShard() == false) {
+                builder.field("note", NO_SHARD_SPECIFIED_MESSAGE);
+            }
             builder.field("index", shardRouting.getIndexName());
             builder.field("shard", shardRouting.getId());
             builder.field("primary", shardRouting.primary());

+ 23 - 8
server/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java

@@ -81,15 +81,25 @@ public class TransportClusterAllocationExplainAction
         ShardRouting shardRouting = findShardToExplain(request, allocation);
         logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);
 
-        ClusterAllocationExplanation cae = explainShard(shardRouting, allocation,
-            request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService);
+        ClusterAllocationExplanation cae = explainShard(
+            shardRouting,
+            allocation,
+            request.includeDiskInfo() ? clusterInfo : null,
+            request.includeYesDecisions(),
+            request.useAnyUnassignedShard() == false,
+            allocationService);
         listener.onResponse(new ClusterAllocationExplainResponse(cae));
     }
 
     // public for testing
-    public static ClusterAllocationExplanation explainShard(ShardRouting shardRouting, RoutingAllocation allocation,
-                                                            ClusterInfo clusterInfo, boolean includeYesDecisions,
-                                                            AllocationService allocationService) {
+    public static ClusterAllocationExplanation explainShard(
+        ShardRouting shardRouting,
+        RoutingAllocation allocation,
+        ClusterInfo clusterInfo,
+        boolean includeYesDecisions,
+        boolean isSpecificShard,
+        AllocationService allocationService) {
+
         allocation.setDebugMode(includeYesDecisions ? DebugMode.ON : DebugMode.EXCLUDE_YES_DECISIONS);
 
         ShardAllocationDecision shardDecision;
@@ -99,10 +109,13 @@ public class TransportClusterAllocationExplainAction
             shardDecision = allocationService.explainShardAllocation(shardRouting, allocation);
         }
 
-        return new ClusterAllocationExplanation(shardRouting,
+        return new ClusterAllocationExplanation(
+            isSpecificShard,
+            shardRouting,
             shardRouting.currentNodeId() != null ? allocation.nodes().get(shardRouting.currentNodeId()) : null,
             shardRouting.relocatingNodeId() != null ? allocation.nodes().get(shardRouting.relocatingNodeId()) : null,
-            clusterInfo, shardDecision);
+            clusterInfo,
+            shardDecision);
     }
 
     // public for testing
@@ -115,7 +128,9 @@ public class TransportClusterAllocationExplainAction
                 foundShard = ui.next();
             }
             if (foundShard == null) {
-                throw new IllegalArgumentException("unable to find any unassigned shards to explain [" + request + "]");
+                throw new IllegalArgumentException("No shard was specified in the request which means the response should explain a " +
+                    "randomly-chosen unassigned shard, but there are no unassigned shards in this cluster. To explain the allocation of " +
+                    "an assigned shard you must specify the target shard in the request.");
             }
         } else {
             String index = request.getIndex();

+ 16 - 3
server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainActionTests.java

@@ -32,6 +32,8 @@ import java.util.Collections;
 import java.util.Locale;
 
 import static org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction.findShardToExplain;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
 
 /**
  * Tests for the {@link TransportClusterAllocationExplainAction} class.
@@ -46,7 +48,12 @@ public class ClusterAllocationExplainActionTests extends ESTestCase {
         ShardRouting shard = clusterState.getRoutingTable().index("idx").shard(0).primaryShard();
         RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Collections.emptyList()),
             clusterState.getRoutingNodes(), clusterState, null, null, System.nanoTime());
-        ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(shard, allocation, null, randomBoolean(),
+        ClusterAllocationExplanation cae = TransportClusterAllocationExplainAction.explainShard(
+            shard,
+            allocation,
+            null,
+            randomBoolean(),
+            true,
             new AllocationService(null, new TestGatewayAllocator(), new ShardsAllocator() {
                 @Override
                 public void allocate(RoutingAllocation allocation) {
@@ -64,6 +71,7 @@ public class ClusterAllocationExplainActionTests extends ESTestCase {
             }, null, null));
 
         assertEquals(shard.currentNodeId(), cae.getCurrentNode().getId());
+        assertTrue(cae.isSpecificShard());
         assertFalse(cae.getShardAllocationDecision().isDecisionTaken());
         assertFalse(cae.getShardAllocationDecision().getAllocateDecision().isDecisionTaken());
         assertFalse(cae.getShardAllocationDecision().getMoveDecision().isDecisionTaken());
@@ -110,8 +118,13 @@ public class ClusterAllocationExplainActionTests extends ESTestCase {
         final ClusterState allStartedClusterState = ClusterStateCreationUtils.state("idx", randomBoolean(),
             ShardRoutingState.STARTED, ShardRoutingState.STARTED);
         final ClusterAllocationExplainRequest anyUnassignedShardsRequest = new ClusterAllocationExplainRequest();
-        expectThrows(IllegalArgumentException.class, () ->
-            findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState)));
+        assertThat(expectThrows(
+            IllegalArgumentException.class,
+            () -> findShardToExplain(anyUnassignedShardsRequest, routingAllocation(allStartedClusterState))).getMessage(),
+            allOf(
+                // no point in asserting the precise wording of the message into this test, but we care that it contains these bits:
+                containsString("No shard was specified in the request"),
+                containsString("specify the target shard in the request")));
     }
 
     public void testFindPrimaryShardToExplain() {

+ 26 - 4
server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplanationTests.java

@@ -31,6 +31,9 @@ import org.elasticsearch.test.ESTestCase;
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.emptySet;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
 
 /**
  * Tests for the cluster allocation explanation
@@ -50,11 +53,12 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
     }
 
     public void testExplanationSerialization() throws Exception {
-        ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean());
+        ClusterAllocationExplanation cae = randomClusterAllocationExplanation(randomBoolean(), randomBoolean());
         BytesStreamOutput out = new BytesStreamOutput();
         cae.writeTo(out);
         StreamInput in = out.bytes().streamInput();
         ClusterAllocationExplanation cae2 = new ClusterAllocationExplanation(in);
+        assertEquals(cae.isSpecificShard(), cae2.isSpecificShard());
         assertEquals(cae.getShard(), cae2.getShard());
         assertEquals(cae.isPrimary(), cae2.isPrimary());
         assertTrue(cae2.isPrimary());
@@ -73,7 +77,7 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
     }
 
     public void testExplanationToXContent() throws Exception {
-        ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true);
+        ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, true);
         XContentBuilder builder = XContentFactory.jsonBuilder();
         cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
         assertEquals("{\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
@@ -83,7 +87,25 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
                          "that can both allocate this shard and improve the cluster balance\"}", Strings.toString(builder));
     }
 
-    private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) {
+    public void testRandomShardExplanationToXContent() throws Exception {
+        ClusterAllocationExplanation cae = randomClusterAllocationExplanation(true, false);
+        XContentBuilder builder = XContentFactory.jsonBuilder();
+        cae.toXContent(builder, ToXContent.EMPTY_PARAMS);
+        final String actual = Strings.toString(builder);
+        assertThat(actual, allOf(
+            equalTo("{\"note\":\"" + ClusterAllocationExplanation.NO_SHARD_SPECIFIED_MESSAGE +
+                "\",\"index\":\"idx\",\"shard\":0,\"primary\":true,\"current_state\":\"started\",\"current_node\":" +
+                "{\"id\":\"node-0\",\"name\":\"\",\"transport_address\":\"" + cae.getCurrentNode().getAddress() +
+                "\",\"weight_ranking\":3},\"can_remain_on_current_node\":\"yes\",\"can_rebalance_cluster\":\"yes\"," +
+                "\"can_rebalance_to_other_node\":\"no\",\"rebalance_explanation\":\"cannot rebalance as no target node exists " +
+                "that can both allocate this shard and improve the cluster balance\"}"),
+            // no point in asserting the precise wording of the message into this test, but we care that the note contains these bits:
+            containsString("No shard was specified in the explain API request"),
+            containsString("specify the target shard in the request")
+        ));
+    }
+
+    private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard, boolean specificShard) {
         ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0),
             assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED);
         DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(),
@@ -97,6 +119,6 @@ public final class ClusterAllocationExplanationTests extends ESTestCase {
             AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null);
             shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
         }
-        return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision);
+        return new ClusterAllocationExplanation(specificShard, shardRouting, node, null, null, shardAllocationDecision);
     }
 }