|
@@ -1,203 +1,289 @@
|
|
|
[[cluster-allocation-explain]]
|
|
|
== Cluster Allocation Explain API
|
|
|
|
|
|
-The cluster allocation explanation API is designed to assist in answering the
|
|
|
-question "why is this shard unassigned?". To explain the allocation (on
|
|
|
-unassigned state) of a shard, issue a request like:
|
|
|
+The purpose of the cluster allocation explain API is to provide
|
|
|
+explanations for shard allocations in the cluster. For unassigned shards,
|
|
|
+the explain API provides an explanation for why the shard is unassigned.
|
|
|
+For assigned shards, the explain API provides an explanation for why the
|
|
|
+shard is remaining on its current moved and has not moved or rebalanced to
|
|
|
+another node. This API can be very useful when attempting to diagnose why
|
|
|
+a shard is unassigned or why a shard continues to remain on its current node
|
|
|
+when you might expect otherwise.
|
|
|
|
|
|
-experimental[The cluster allocation explain API is new and should still be considered experimental. The API may change in ways that are not backwards compatible]
|
|
|
+=== Explain API Request
|
|
|
+
|
|
|
+To explain the allocation of a shard, issue a request:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain' -d'{
|
|
|
+$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain' -d '{
|
|
|
"index": "myindex",
|
|
|
"shard": 0,
|
|
|
- "primary": false
|
|
|
+ "primary": true
|
|
|
}'
|
|
|
--------------------------------------------------
|
|
|
|
|
|
Specify the `index` and `shard` id of the shard you would like an explanation
|
|
|
-for, as well as the `primary` flag to indicate whether to explain a primary or
|
|
|
-replica shard.
|
|
|
+for, as well as the `primary` flag to indicate whether to explain the primary
|
|
|
+shard for the given shard id or one of its replica shards. These three request
|
|
|
+parameters are required.
|
|
|
+
|
|
|
+You may also specify an optional `current_node` request parameter to only explain
|
|
|
+a shard that is currently located on `current_node`. The `current_node` can be
|
|
|
+specified as either the node id or node name.
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain' -d '{
|
|
|
+ "index": "myindex",
|
|
|
+ "shard": 0,
|
|
|
+ "primary": false,
|
|
|
+ "current_node": "nodeA" <1>
|
|
|
+}'
|
|
|
+--------------------------------------------------
|
|
|
+<1> The node where shard 0 currently has a replica on
|
|
|
+
|
|
|
+You can also have Elasticsearch explain the allocation of the first unassigned
|
|
|
+shard that it finds by sending an empty body for the request:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain'
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+=== Explain API Response
|
|
|
|
|
|
-The response looks like:
|
|
|
+This section includes examples of the cluster allocation explain API response output
|
|
|
+under various scenarios.
|
|
|
+
|
|
|
+The API response for an unassigned shard:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "shard" : {
|
|
|
- "index" : "myindex",
|
|
|
- "index_uuid" : "KnW0-zELRs6PK84l0r38ZA",
|
|
|
- "id" : 0,
|
|
|
- "primary" : false
|
|
|
- },
|
|
|
- "assigned" : false, <1>
|
|
|
- "shard_state_fetch_pending": false, <2>
|
|
|
+ "index" : "idx",
|
|
|
+ "shard" : 0,
|
|
|
+ "primary" : true,
|
|
|
+ "current_state" : "unassigned", <1>
|
|
|
"unassigned_info" : {
|
|
|
- "reason" : "INDEX_CREATED", <3>
|
|
|
- "at" : "2016-03-22T20:04:23.620Z"
|
|
|
+ "reason" : "INDEX_CREATED", <2>
|
|
|
+ "at" : "2017-01-04T18:08:16.600Z",
|
|
|
+ "last_allocation_status" : "no"
|
|
|
},
|
|
|
- "allocation_delay_ms" : 0, <4>
|
|
|
- "remaining_delay_ms" : 0, <5>
|
|
|
- "nodes" : {
|
|
|
- "V-Spi0AyRZ6ZvKbaI3691w" : {
|
|
|
- "node_name" : "H5dfFeA",
|
|
|
- "node_attributes" : { <6>
|
|
|
- "bar" : "baz"
|
|
|
- },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "NONE" <7>
|
|
|
- },
|
|
|
- "final_decision" : "NO", <8>
|
|
|
- "final_explanation" : "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
|
|
|
- "weight" : 0.06666675, <9>
|
|
|
- "decisions" : [ { <10>
|
|
|
- "decider" : "filter",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "node does not match index include filters [foo:\"bar\"]"
|
|
|
- } ]
|
|
|
+ "can_allocate" : "no", <3>
|
|
|
+ "allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes",
|
|
|
+ "node_allocation_decisions" : [
|
|
|
+ {
|
|
|
+ "node_id" : "8qt2rY-pT6KNZB3-hGfLnw",
|
|
|
+ "node_name" : "node_t1",
|
|
|
+ "transport_address" : "127.0.0.1:9401",
|
|
|
+ "node_decision" : "no", <4>
|
|
|
+ "weight_ranking" : 1,
|
|
|
+ "deciders" : [
|
|
|
+ {
|
|
|
+ "decider" : "filter", <5>
|
|
|
+ "decision" : "NO",
|
|
|
+ "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]" <6>
|
|
|
+ }
|
|
|
+ ]
|
|
|
},
|
|
|
- "Qc6VL8c5RWaw1qXZ0Rg57g" : {
|
|
|
- "node_name" : "bGG90GE",
|
|
|
- "node_attributes" : {
|
|
|
- "bar" : "baz",
|
|
|
- "foo" : "bar"
|
|
|
- },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "AVAILABLE"
|
|
|
- },
|
|
|
- "final_decision" : "NO",
|
|
|
- "final_explanation" : "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
|
|
|
- "weight" : -1.3833332,
|
|
|
- "decisions" : [ {
|
|
|
- "decider" : "same_shard",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "the shard cannot be allocated on the same node id [Qc6VL8c5RWaw1qXZ0Rg57g] on which it already exists"
|
|
|
- } ]
|
|
|
- },
|
|
|
- "PzdyMZGXQdGhqTJHF_hGgA" : {
|
|
|
- "node_name" : "DKDM97B",
|
|
|
- "node_attributes" : { },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "NONE"
|
|
|
- },
|
|
|
- "final_decision" : "NO",
|
|
|
- "final_explanation" : "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
|
|
|
- "weight" : 2.3166666,
|
|
|
- "decisions" : [ {
|
|
|
- "decider" : "filter",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "node does not match index include filters [foo:\"bar\"]"
|
|
|
- } ]
|
|
|
+ {
|
|
|
+ "node_id" : "7Wr-QxLXRLKDxhzNm50pFA",
|
|
|
+ "node_name" : "node_t0",
|
|
|
+ "transport_address" : "127.0.0.1:9400",
|
|
|
+ "node_decision" : "no",
|
|
|
+ "weight_ranking" : 2,
|
|
|
+ "deciders" : [
|
|
|
+ {
|
|
|
+ "decider" : "filter",
|
|
|
+ "decision" : "NO",
|
|
|
+ "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]"
|
|
|
+ }
|
|
|
+ ]
|
|
|
}
|
|
|
- }
|
|
|
+ ]
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
-<1> Whether the shard is assigned or unassigned
|
|
|
-<2> Whether information about the shard is still being fetched
|
|
|
-<3> Reason for the shard originally becoming unassigned
|
|
|
-<4> Configured delay before the shard can be allocated
|
|
|
-<5> Remaining delay before the shard can be allocated
|
|
|
-<6> User-added attributes the node has
|
|
|
-<7> The shard copy information for this node and error (if applicable)
|
|
|
-<8> Final decision and explanation of whether the shard can be allocated to this node
|
|
|
-<9> Weight for how much the allocator would like to allocate the shard to this node
|
|
|
-<10> List of node decisions factoring into final decision about the shard
|
|
|
+<1> The current state of the shard
|
|
|
+<2> The reason for the shard originally becoming unassigned
|
|
|
+<3> Whether to allocate the shard
|
|
|
+<4> Whether to allocate the shard to the particular node
|
|
|
+<5> The decider which led to the `no` decision for the node
|
|
|
+<6> An explanation as to why the decider returned a `no` decision, with a helpful hint pointing to the setting that led to the decision
|
|
|
|
|
|
-For a shard that is already assigned, the output looks similar to:
|
|
|
+You can return information gathered by the cluster info service about disk usage
|
|
|
+and shard sizes by setting the `include_disk_info` parameter to `true`:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain?include_disk_info=true'
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+Additionally, if you would like to include all decisions that were factored into the final
|
|
|
+decision, the `include_yes_decisions` parameter will return all decisions for each node:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+--------------------------------------------------
|
|
|
+$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain?include_yes_decisions=true'
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The default value for `include_yes_decisions` is `false`, which will only
|
|
|
+include the `no` decisions in the response. This is generally what you would
|
|
|
+want, as the `no` decisions indicate why a shard is unassigned or cannot be moved,
|
|
|
+and including all decisions include the `yes` ones adds a lot of verbosity to the
|
|
|
+API's response output.
|
|
|
+
|
|
|
+The API response output for an unassigned primary shard that had previously been
|
|
|
+allocated to a node in the cluster:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
{
|
|
|
- "shard" : {
|
|
|
- "index" : "only-foo",
|
|
|
- "index_uuid" : "KnW0-zELRs6PK84l0r38ZA",
|
|
|
- "id" : 0,
|
|
|
- "primary" : true
|
|
|
+ "index" : "idx",
|
|
|
+ "shard" : 0,
|
|
|
+ "primary" : true,
|
|
|
+ "current_state" : "unassigned",
|
|
|
+ "unassigned_info" : {
|
|
|
+ "reason" : "NODE_LEFT",
|
|
|
+ "at" : "2017-01-04T18:03:28.464Z",
|
|
|
+ "details" : "node_left[OIWe8UhhThCK0V5XfmdrmQ]",
|
|
|
+ "last_allocation_status" : "no_valid_shard_copy"
|
|
|
},
|
|
|
- "assigned" : true,
|
|
|
- "assigned_node_id" : "Qc6VL8c5RWaw1qXZ0Rg57g", <1>
|
|
|
- "shard_state_fetch_pending": false,
|
|
|
- "allocation_delay_ms" : 0,
|
|
|
- "remaining_delay_ms" : 0,
|
|
|
- "nodes" : {
|
|
|
- "V-Spi0AyRZ6ZvKbaI3691w" : {
|
|
|
- "node_name" : "bGG90GE",
|
|
|
- "node_attributes" : {
|
|
|
- "bar" : "baz"
|
|
|
- },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "NONE"
|
|
|
- },
|
|
|
- "final_decision" : "NO",
|
|
|
- "final_explanation" : "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
|
|
|
- "weight" : 1.4499999,
|
|
|
- "decisions" : [ {
|
|
|
- "decider" : "filter",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "node does not match index include filters [foo:\"bar\"]"
|
|
|
- } ]
|
|
|
- },
|
|
|
- "Qc6VL8c5RWaw1qXZ0Rg57g" : {
|
|
|
- "node_name" : "I8hydUG",
|
|
|
- "node_attributes" : {
|
|
|
- "bar" : "baz",
|
|
|
- "foo" : "bar"
|
|
|
- },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "AVAILABLE"
|
|
|
- },
|
|
|
- "final_decision" : "ALREADY_ASSIGNED", <2>
|
|
|
- "final_explanation" : "the shard is already assigned to this node",
|
|
|
- "weight" : 0.0,
|
|
|
- "decisions" : [ {
|
|
|
- "decider" : "same_shard",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "the shard cannot be allocated on the same node id [Qc6VL8c5RWaw1qXZ0Rg57g] on which it already exists"
|
|
|
- } ]
|
|
|
- },
|
|
|
- "PzdyMZGXQdGhqTJHF_hGgA" : {
|
|
|
- "node_name" : "H5dfFeA",
|
|
|
- "node_attributes" : { },
|
|
|
- "store" : {
|
|
|
- "shard_copy" : "NONE"
|
|
|
- },
|
|
|
- "final_decision" : "NO",
|
|
|
- "final_explanation" : "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision",
|
|
|
- "weight" : 3.6999998,
|
|
|
- "decisions" : [ {
|
|
|
- "decider" : "filter",
|
|
|
- "decision" : "NO",
|
|
|
- "explanation" : "node does not match index include filters [foo:\"bar\"]"
|
|
|
- } ]
|
|
|
- }
|
|
|
- }
|
|
|
+ "can_allocate" : "no_valid_shard_copy",
|
|
|
+ "allocate_explanation" : "cannot allocate because a previous copy of the primary shard existed but can no longer be found on the nodes in the cluster"
|
|
|
}
|
|
|
--------------------------------------------------
|
|
|
-<1> Node the shard is currently assigned to
|
|
|
-<2> The decision is "ALREADY_ASSIGNED" because the shard is currently assigned to this node
|
|
|
|
|
|
-You can also have Elasticsearch explain the allocation of the first unassigned
|
|
|
-shard it finds by sending an empty body, such as:
|
|
|
+The API response output for a replica that is unassigned due to delayed allocation:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain'
|
|
|
+{
|
|
|
+ "index" : "idx",
|
|
|
+ "shard" : 0,
|
|
|
+ "primary" : false,
|
|
|
+ "current_state" : "unassigned",
|
|
|
+ "unassigned_info" : {
|
|
|
+ "reason" : "NODE_LEFT",
|
|
|
+ "at" : "2017-01-04T18:53:59.498Z",
|
|
|
+ "details" : "node_left[G92ZwuuaRY-9n8_tc-IzEg]",
|
|
|
+ "last_allocation_status" : "no_attempt"
|
|
|
+ },
|
|
|
+ "can_allocate" : "allocation_delayed",
|
|
|
+ "allocate_explanation" : "cannot allocate because the cluster is still waiting 59.8s for the departed node holding a replica to rejoin, despite being allowed to allocate the shard to at least one other node",
|
|
|
+ "configured_delay" : "1m", <1>
|
|
|
+ "configured_delay_in_millis" : 60000,
|
|
|
+ "remaining_delay" : "59.8s", <2>
|
|
|
+ "remaining_delay_in_millis" : 59824,
|
|
|
+ "node_allocation_decisions" : [
|
|
|
+ {
|
|
|
+ "node_id" : "pmnHu_ooQWCPEFobZGbpWw",
|
|
|
+ "node_name" : "node_t2",
|
|
|
+ "transport_address" : "127.0.0.1:9402",
|
|
|
+ "node_decision" : "yes"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "node_id" : "3sULLVJrRneSg0EfBB-2Ew",
|
|
|
+ "node_name" : "node_t0",
|
|
|
+ "transport_address" : "127.0.0.1:9400",
|
|
|
+ "node_decision" : "no",
|
|
|
+ "store" : { <3>
|
|
|
+ "matching_size" : "4.2kb",
|
|
|
+ "matching_size_in_bytes" : 4325
|
|
|
+ },
|
|
|
+ "deciders" : [
|
|
|
+ {
|
|
|
+ "decider" : "same_shard",
|
|
|
+ "decision" : "NO",
|
|
|
+ "explanation" : "the shard cannot be allocated to the same node on which a copy of the shard already exists [[idx][0], node[3sULLVJrRneSg0EfBB-2Ew], [P], s[STARTED], a[id=eV9P8BN1QPqRc3B4PLx6cg]]"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
+<1> The configured delay before allocating a replica shard that does not exist due to the node holding it leaving the cluster
|
|
|
+<2> The remaining delay before allocating the replica shard
|
|
|
+<3> Information about the shard data found on a node
|
|
|
|
|
|
-If you would like to include all decisions that were factored into the final
|
|
|
-decision, the `include_yes_decisions` parameter will return all decisions:
|
|
|
+The API response output for an assigned shard that is not allowed to
|
|
|
+remain on its current node and is required to move:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain?include_yes_decisions=true'
|
|
|
+{
|
|
|
+ "index" : "idx",
|
|
|
+ "shard" : 0,
|
|
|
+ "primary" : true,
|
|
|
+ "current_state" : "started",
|
|
|
+ "current_node" : {
|
|
|
+ "id" : "8lWJeJ7tSoui0bxrwuNhTA",
|
|
|
+ "name" : "node_t1",
|
|
|
+ "transport_address" : "127.0.0.1:9401"
|
|
|
+ },
|
|
|
+ "can_remain_on_current_node" : "no", <1>
|
|
|
+ "can_remain_decisions" : [ <2>
|
|
|
+ {
|
|
|
+ "decider" : "filter",
|
|
|
+ "decision" : "NO",
|
|
|
+ "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "can_move_to_other_node" : "no", <3>
|
|
|
+ "move_explanation" : "cannot move shard to another node, even though it is not allowed to remain on its current node",
|
|
|
+ "node_allocation_decisions" : [
|
|
|
+ {
|
|
|
+ "node_id" : "_P8olZS8Twax9u6ioN-GGA",
|
|
|
+ "node_name" : "node_t0",
|
|
|
+ "transport_address" : "127.0.0.1:9400",
|
|
|
+ "node_decision" : "no",
|
|
|
+ "weight_ranking" : 1,
|
|
|
+ "deciders" : [
|
|
|
+ {
|
|
|
+ "decider" : "filter",
|
|
|
+ "decision" : "NO",
|
|
|
+ "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"non_existent_node\"]"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
+<1> Whether the shard is allowed to remain on its current node
|
|
|
+<2> The deciders that factored into the decision of why the shard is not allowed to remain on its current node
|
|
|
+<3> Whether the shard is allowed to be allocated to another node
|
|
|
|
|
|
-Additionally, you can return information gathered by the cluster info service
|
|
|
-about disk usage and shard sizes by setting the `include_disk_info` parameter to
|
|
|
-`true`:
|
|
|
+The API response output for an assigned shard that remains on its current node
|
|
|
+because moving the shard to another node does not form a better cluster balance:
|
|
|
|
|
|
[source,js]
|
|
|
--------------------------------------------------
|
|
|
-$ curl -XGET 'http://localhost:9200/_cluster/allocation/explain?include_disk_info=true'
|
|
|
+{
|
|
|
+ "index" : "idx",
|
|
|
+ "shard" : 0,
|
|
|
+ "primary" : true,
|
|
|
+ "current_state" : "started",
|
|
|
+ "current_node" : {
|
|
|
+ "id" : "wLzJm4N4RymDkBYxwWoJsg",
|
|
|
+ "name" : "node_t0",
|
|
|
+ "transport_address" : "127.0.0.1:9400",
|
|
|
+ "weight_ranking" : 1
|
|
|
+ },
|
|
|
+ "can_remain_on_current_node" : "yes",
|
|
|
+ "can_rebalance_cluster" : "yes", <1>
|
|
|
+ "can_rebalance_to_other_node" : "no", <2>
|
|
|
+ "rebalance_explanation" : "cannot rebalance as no target node exists that can both allocate this shard and improve the cluster balance",
|
|
|
+ "node_allocation_decisions" : [
|
|
|
+ {
|
|
|
+ "node_id" : "oE3EGFc8QN-Tdi5FFEprIA",
|
|
|
+ "node_name" : "node_t1",
|
|
|
+ "transport_address" : "127.0.0.1:9401",
|
|
|
+ "node_decision" : "worse_balance", <3>
|
|
|
+ "weight_ranking" : 1
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
--------------------------------------------------
|
|
|
+<1> Whether rebalancing is allowed on the cluster
|
|
|
+<2> Whether the shard can be rebalanced to another node
|
|
|
+<3> The reason the shard cannot be rebalanced to the node, in this case indicating that it offers no better balance than the current node
|