|
|
@@ -41,9 +41,40 @@ import java.util.Set;
|
|
|
import static org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING;
|
|
|
|
|
|
/**
|
|
|
- * Holds additional information as to why the shard is in unassigned state.
|
|
|
+ * Holds additional information as to why the shard is in an unassigned state.
|
|
|
+ *
|
|
|
+ * @param reason why the shard is unassigned.
|
|
|
+ * @param message optional details explaining the reasons.
|
|
|
+ * @param failure additional failure exception details if exists.
|
|
|
+ * @param failedAllocations number of previously failed allocations of this shard.
|
|
|
+ * @param delayed true if allocation of this shard is delayed due to {@link #INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING}.
|
|
|
+ * @param unassignedTimeMillis The timestamp in milliseconds when the shard became unassigned, based on System.currentTimeMillis().
|
|
|
+ * Note, we use timestamp here since we want to make sure its preserved across node serializations.
|
|
|
+ * @param unassignedTimeNanos The timestamp in nanoseconds when the shard became unassigned, based on System.nanoTime().
|
|
|
+ * Used to calculate the delay for delayed shard allocation.
|
|
|
+ * ONLY EXPOSED FOR TESTS!
|
|
|
+ * @param lastAllocationStatus status for the last allocation attempt for this shard.
|
|
|
+ * @param failedNodeIds A set of nodeIds that failed to complete allocations for this shard.
|
|
|
+ * {@link org.elasticsearch.gateway.ReplicaShardAllocator} uses this bset to avoid repeatedly canceling ongoing
|
|
|
+ * recoveries for copies on those nodes, although they can perform noop recoveries. This set will be discarded when a
|
|
|
+ * shard moves to started. And if a shard is failed while started (i.e., from started to unassigned), the currently
|
|
|
+ * assigned node won't be added to this set.
|
|
|
+ * @see org.elasticsearch.gateway.ReplicaShardAllocator#processExistingRecoveries
|
|
|
+ * @see org.elasticsearch.cluster.routing.allocation.AllocationService#applyFailedShards(ClusterState, List, List)
|
|
|
+ * @param lastAllocatedNodeId ID of the node this shard was last allocated to, or null if unavailable.
|
|
|
*/
|
|
|
-public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
+public record UnassignedInfo(
|
|
|
+ Reason reason,
|
|
|
+ @Nullable String message,
|
|
|
+ @Nullable Exception failure,
|
|
|
+ int failedAllocations,
|
|
|
+ long unassignedTimeNanos,
|
|
|
+ long unassignedTimeMillis,
|
|
|
+ boolean delayed,
|
|
|
+ AllocationStatus lastAllocationStatus,
|
|
|
+ Set<String> failedNodeIds,
|
|
|
+ @Nullable String lastAllocatedNodeId
|
|
|
+) implements ToXContentFragment, Writeable {
|
|
|
|
|
|
/**
|
|
|
* The version that the {@code lastAllocatedNode} field was added in. Used to adapt streaming of this class as appropriate for the
|
|
|
@@ -218,17 +249,6 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private final Reason reason;
|
|
|
- private final long unassignedTimeMillis; // used for display and log messages, in milliseconds
|
|
|
- private final long unassignedTimeNanos; // in nanoseconds, used to calculate delay for delayed shard allocation
|
|
|
- private final boolean delayed; // if allocation of this shard is delayed due to INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING
|
|
|
- private final String message;
|
|
|
- private final Exception failure;
|
|
|
- private final int failedAllocations;
|
|
|
- private final Set<String> failedNodeIds;
|
|
|
- private final AllocationStatus lastAllocationStatus; // result of the last allocation attempt for this shard
|
|
|
- private final String lastAllocatedNodeId;
|
|
|
-
|
|
|
/**
|
|
|
* creates an UnassignedInfo object based on **current** time
|
|
|
*
|
|
|
@@ -261,28 +281,10 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
* @param failedNodeIds a set of nodeIds that failed to complete allocations for this shard
|
|
|
* @param lastAllocatedNodeId the ID of the node this shard was last allocated to
|
|
|
*/
|
|
|
- public UnassignedInfo(
|
|
|
- Reason reason,
|
|
|
- @Nullable String message,
|
|
|
- @Nullable Exception failure,
|
|
|
- int failedAllocations,
|
|
|
- long unassignedTimeNanos,
|
|
|
- long unassignedTimeMillis,
|
|
|
- boolean delayed,
|
|
|
- AllocationStatus lastAllocationStatus,
|
|
|
- Set<String> failedNodeIds,
|
|
|
- @Nullable String lastAllocatedNodeId
|
|
|
- ) {
|
|
|
- this.reason = Objects.requireNonNull(reason);
|
|
|
- this.unassignedTimeMillis = unassignedTimeMillis;
|
|
|
- this.unassignedTimeNanos = unassignedTimeNanos;
|
|
|
- this.delayed = delayed;
|
|
|
- this.message = message;
|
|
|
- this.failure = failure;
|
|
|
- this.failedAllocations = failedAllocations;
|
|
|
- this.lastAllocationStatus = Objects.requireNonNull(lastAllocationStatus);
|
|
|
- this.failedNodeIds = Set.copyOf(failedNodeIds);
|
|
|
- this.lastAllocatedNodeId = lastAllocatedNodeId;
|
|
|
+ public UnassignedInfo {
|
|
|
+ Objects.requireNonNull(reason);
|
|
|
+ Objects.requireNonNull(lastAllocationStatus);
|
|
|
+ failedNodeIds = Set.copyOf(failedNodeIds);
|
|
|
assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED)
|
|
|
: "failedAllocations: " + failedAllocations + " for reason " + reason;
|
|
|
assert (message == null && failure != null) == false : "provide a message if a failure exception is provided";
|
|
|
@@ -294,24 +296,37 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
: "last allocated node ID must be set if the shard is unassigned due to a node restarting";
|
|
|
}
|
|
|
|
|
|
- public UnassignedInfo(StreamInput in) throws IOException {
|
|
|
+ public static UnassignedInfo fromStreamInput(StreamInput in) throws IOException {
|
|
|
// Because Reason.NODE_RESTARTING is new and can't be sent by older versions, there's no need to vary the deserialization behavior
|
|
|
- this.reason = Reason.values()[(int) in.readByte()];
|
|
|
- this.unassignedTimeMillis = in.readLong();
|
|
|
+ var reason = Reason.values()[(int) in.readByte()];
|
|
|
+ var unassignedTimeMillis = in.readLong();
|
|
|
// As System.nanoTime() cannot be compared across different JVMs, reset it to now.
|
|
|
// This means that in master fail-over situations, elapsed delay time is forgotten.
|
|
|
- this.unassignedTimeNanos = System.nanoTime();
|
|
|
- this.delayed = in.readBoolean();
|
|
|
- this.message = in.readOptionalString();
|
|
|
- this.failure = in.readException();
|
|
|
- this.failedAllocations = in.readVInt();
|
|
|
- this.lastAllocationStatus = AllocationStatus.readFrom(in);
|
|
|
- this.failedNodeIds = in.readCollectionAsImmutableSet(StreamInput::readString);
|
|
|
+ var unassignedTimeNanos = System.nanoTime();
|
|
|
+ var delayed = in.readBoolean();
|
|
|
+ var message = in.readOptionalString();
|
|
|
+ var failure = in.readException();
|
|
|
+ var failedAllocations = in.readVInt();
|
|
|
+ var lastAllocationStatus = AllocationStatus.readFrom(in);
|
|
|
+ var failedNodeIds = in.readCollectionAsImmutableSet(StreamInput::readString);
|
|
|
+ String lastAllocatedNodeId;
|
|
|
if (in.getTransportVersion().onOrAfter(VERSION_LAST_ALLOCATED_NODE_ADDED)) {
|
|
|
- this.lastAllocatedNodeId = in.readOptionalString();
|
|
|
+ lastAllocatedNodeId = in.readOptionalString();
|
|
|
} else {
|
|
|
- this.lastAllocatedNodeId = null;
|
|
|
+ lastAllocatedNodeId = null;
|
|
|
}
|
|
|
+ return new UnassignedInfo(
|
|
|
+ reason,
|
|
|
+ message,
|
|
|
+ failure,
|
|
|
+ failedAllocations,
|
|
|
+ unassignedTimeNanos,
|
|
|
+ unassignedTimeMillis,
|
|
|
+ delayed,
|
|
|
+ lastAllocationStatus,
|
|
|
+ failedNodeIds,
|
|
|
+ lastAllocatedNodeId
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
public void writeTo(StreamOutput out) throws IOException {
|
|
|
@@ -335,107 +350,25 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * Returns the number of previously failed allocations of this shard.
|
|
|
- */
|
|
|
- public int getNumFailedAllocations() {
|
|
|
- return failedAllocations;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Returns true if allocation of this shard is delayed due to {@link #INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING}
|
|
|
- */
|
|
|
- public boolean isDelayed() {
|
|
|
- return delayed;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * The reason why the shard is unassigned.
|
|
|
- */
|
|
|
- public Reason getReason() {
|
|
|
- return this.reason;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * The timestamp in milliseconds when the shard became unassigned, based on System.currentTimeMillis().
|
|
|
- * Note, we use timestamp here since we want to make sure its preserved across node serializations.
|
|
|
- */
|
|
|
- public long getUnassignedTimeInMillis() {
|
|
|
- return this.unassignedTimeMillis;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * The timestamp in nanoseconds when the shard became unassigned, based on System.nanoTime().
|
|
|
- * Used to calculate the delay for delayed shard allocation.
|
|
|
- * ONLY EXPOSED FOR TESTS!
|
|
|
- */
|
|
|
- public long getUnassignedTimeInNanos() {
|
|
|
- return this.unassignedTimeNanos;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Returns optional details explaining the reasons.
|
|
|
- */
|
|
|
- @Nullable
|
|
|
- public String getMessage() {
|
|
|
- return this.message;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Returns additional failure exception details if exists.
|
|
|
- */
|
|
|
- @Nullable
|
|
|
- public Exception getFailure() {
|
|
|
- return failure;
|
|
|
- }
|
|
|
-
|
|
|
/**
|
|
|
* Builds a string representation of the message and the failure if exists.
|
|
|
*/
|
|
|
@Nullable
|
|
|
- public String getDetails() {
|
|
|
+ public String details() {
|
|
|
if (message == null) {
|
|
|
return null;
|
|
|
}
|
|
|
return message + (failure == null ? "" : ", failure " + ExceptionsHelper.stackTrace(failure));
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * Gets the ID of the node this shard was last allocated to, or null if unavailable.
|
|
|
- */
|
|
|
- @Nullable
|
|
|
- public String getLastAllocatedNodeId() {
|
|
|
- return lastAllocatedNodeId;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Get the status for the last allocation attempt for this shard.
|
|
|
- */
|
|
|
- public AllocationStatus getLastAllocationStatus() {
|
|
|
- return lastAllocationStatus;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * A set of nodeIds that failed to complete allocations for this shard. {@link org.elasticsearch.gateway.ReplicaShardAllocator}
|
|
|
- * uses this set to avoid repeatedly canceling ongoing recoveries for copies on those nodes although they can perform noop recoveries.
|
|
|
- * This set will be discarded when a shard moves to started. And if a shard is failed while started (i.e., from started to unassigned),
|
|
|
- * the currently assigned node won't be added to this set.
|
|
|
- *
|
|
|
- * @see org.elasticsearch.gateway.ReplicaShardAllocator#processExistingRecoveries
|
|
|
- * @see org.elasticsearch.cluster.routing.allocation.AllocationService#applyFailedShards(ClusterState, List, List)
|
|
|
- */
|
|
|
- public Set<String> getFailedNodeIds() {
|
|
|
- return failedNodeIds;
|
|
|
- }
|
|
|
-
|
|
|
/**
|
|
|
* Calculates the delay left based on current time (in nanoseconds) and the delay defined by the index settings.
|
|
|
- * Only relevant if shard is effectively delayed (see {@link #isDelayed()})
|
|
|
+ * Only relevant if shard is effectively delayed (see {@link #delayed()})
|
|
|
* Returns 0 if delay is negative
|
|
|
*
|
|
|
* @return calculated delay in nanoseconds
|
|
|
*/
|
|
|
- public long getRemainingDelay(final long nanoTimeNow, final Settings indexSettings, final NodesShutdownMetadata nodesShutdownMetadata) {
|
|
|
+ public long remainingDelay(final long nanoTimeNow, final Settings indexSettings, final NodesShutdownMetadata nodesShutdownMetadata) {
|
|
|
final long indexLevelDelay = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexSettings).nanos();
|
|
|
long delayTimeoutNanos = Optional.ofNullable(lastAllocatedNodeId)
|
|
|
// If the node wasn't restarting when this became unassigned, use default delay
|
|
|
@@ -455,7 +388,7 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
public static int getNumberOfDelayedUnassigned(ClusterState state) {
|
|
|
int count = 0;
|
|
|
for (ShardRouting shard : state.getRoutingNodes().unassigned()) {
|
|
|
- if (shard.unassignedInfo().isDelayed()) {
|
|
|
+ if (shard.unassignedInfo().delayed()) {
|
|
|
count++;
|
|
|
}
|
|
|
}
|
|
|
@@ -472,10 +405,10 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
long nextDelayNanos = Long.MAX_VALUE;
|
|
|
for (ShardRouting shard : state.getRoutingNodes().unassigned()) {
|
|
|
UnassignedInfo unassignedInfo = shard.unassignedInfo();
|
|
|
- if (unassignedInfo.isDelayed()) {
|
|
|
+ if (unassignedInfo.delayed()) {
|
|
|
Settings indexSettings = metadata.index(shard.index()).getSettings();
|
|
|
// calculate next time to schedule
|
|
|
- final long newComputedLeftDelayNanos = unassignedInfo.getRemainingDelay(
|
|
|
+ final long newComputedLeftDelayNanos = unassignedInfo.remainingDelay(
|
|
|
currentNanoTime,
|
|
|
indexSettings,
|
|
|
metadata.nodeShutdowns()
|
|
|
@@ -502,7 +435,7 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
if (lastAllocatedNodeId != null) {
|
|
|
sb.append(", last_node[").append(lastAllocatedNodeId).append("]");
|
|
|
}
|
|
|
- String details = getDetails();
|
|
|
+ String details = details();
|
|
|
if (details != null) {
|
|
|
sb.append(", details[").append(details).append("]");
|
|
|
}
|
|
|
@@ -530,7 +463,7 @@ public final class UnassignedInfo implements ToXContentFragment, Writeable {
|
|
|
if (lastAllocatedNodeId != null) {
|
|
|
builder.field("last_node", lastAllocatedNodeId);
|
|
|
}
|
|
|
- String details = getDetails();
|
|
|
+ String details = details();
|
|
|
if (details != null) {
|
|
|
builder.field("details", details);
|
|
|
}
|