1
0
Эх сурвалжийг харах

Add more detailed shard status on shutdown (#102271)

When the shutdown api is polled, shard migrations are reported as a
single total remaining. However, that number is made up of more detailed
counts within cluster state. This commit expands the counts reported for
migration status to include started, relocating and initializing shard
counts.
Ryan Ernst 1 жил өмнө
parent
commit
ac649f6289

+ 1 - 0
server/src/main/java/org/elasticsearch/TransportVersions.java

@@ -171,6 +171,7 @@ public class TransportVersions {
     public static final TransportVersion ML_STATE_CHANGE_TIMESTAMPS = def(8_540_00_0);
     public static final TransportVersion DATA_STREAM_FAILURE_STORE_ADDED = def(8_541_00_0);
     public static final TransportVersion ML_INFERENCE_OPENAI_ADDED = def(8_542_00_0);
+    public static final TransportVersion SHUTDOWN_MIGRATION_STATUS_INCLUDE_COUNTS = def(8_543_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,

+ 79 - 9
server/src/main/java/org/elasticsearch/cluster/metadata/ShutdownShardMigrationStatus.java

@@ -36,27 +36,72 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
     public static final String NODE_ALLOCATION_DECISION_KEY = "node_allocation_decision";
 
     private final SingleNodeShutdownMetadata.Status status;
+    private final long startedShards;
+    private final long relocatingShards;
+    private final long initializingShards;
     private final long shardsRemaining;
     @Nullable
     private final String explanation;
     @Nullable
     private final ShardAllocationDecision allocationDecision;
 
-    public ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status status, long shardsRemaining) {
-        this(status, shardsRemaining, null, null);
+    public ShutdownShardMigrationStatus(
+        SingleNodeShutdownMetadata.Status status,
+        long shardsRemaining,
+        @Nullable String explanation,
+        @Nullable ShardAllocationDecision allocationDecision
+    ) {
+        this(status, -1, -1, -1, shardsRemaining, explanation, null);
     }
 
-    public ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status status, long shardsRemaining, @Nullable String explanation) {
-        this(status, shardsRemaining, explanation, null);
+    public ShutdownShardMigrationStatus(
+        SingleNodeShutdownMetadata.Status status,
+        long startedShards,
+        long relocatingShards,
+        long initializingShards
+    ) {
+        this(
+            status,
+            startedShards,
+            relocatingShards,
+            initializingShards,
+            startedShards + relocatingShards + initializingShards,
+            null,
+            null
+        );
     }
 
     public ShutdownShardMigrationStatus(
         SingleNodeShutdownMetadata.Status status,
+        long startedShards,
+        long relocatingShards,
+        long initializingShards,
+        @Nullable String explanation
+    ) {
+        this(
+            status,
+            startedShards,
+            relocatingShards,
+            initializingShards,
+            startedShards + relocatingShards + initializingShards,
+            explanation,
+            null
+        );
+    }
+
+    private ShutdownShardMigrationStatus(
+        SingleNodeShutdownMetadata.Status status,
+        long startedShards,
+        long relocatingShards,
+        long initializingShards,
         long shardsRemaining,
         @Nullable String explanation,
         @Nullable ShardAllocationDecision allocationDecision
     ) {
         this.status = Objects.requireNonNull(status, "status must not be null");
+        this.startedShards = startedShards;
+        this.relocatingShards = relocatingShards;
+        this.initializingShards = initializingShards;
         this.shardsRemaining = shardsRemaining;
         this.explanation = explanation;
         this.allocationDecision = allocationDecision;
@@ -64,7 +109,17 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
 
     public ShutdownShardMigrationStatus(StreamInput in) throws IOException {
         this.status = in.readEnum(SingleNodeShutdownMetadata.Status.class);
-        this.shardsRemaining = in.readLong();
+        if (in.getTransportVersion().onOrAfter(TransportVersions.SHUTDOWN_MIGRATION_STATUS_INCLUDE_COUNTS)) {
+            this.startedShards = in.readZLong();
+            this.relocatingShards = in.readZLong();
+            this.initializingShards = in.readZLong();
+            this.shardsRemaining = in.readZLong();
+        } else {
+            this.startedShards = -1;
+            this.relocatingShards = -1;
+            this.initializingShards = -1;
+            this.shardsRemaining = in.readLong();
+        }
         this.explanation = in.readOptionalString();
         if (in.getTransportVersion().onOrAfter(ALLOCATION_DECISION_ADDED_VERSION)) {
             this.allocationDecision = in.readOptionalWriteable(ShardAllocationDecision::new);
@@ -99,6 +154,11 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
 
     private XContentBuilder buildHeader(XContentBuilder builder) throws IOException {
         builder.field("status", status);
+        if (startedShards != -1) {
+            builder.field("started_shards", startedShards);
+            builder.field("relocating_shards", relocatingShards);
+            builder.field("initializing_shards", initializingShards);
+        }
         builder.field("shard_migrations_remaining", shardsRemaining);
         if (Objects.nonNull(explanation)) {
             builder.field("explanation", explanation);
@@ -109,7 +169,14 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeEnum(status);
-        out.writeLong(shardsRemaining);
+        if (out.getTransportVersion().onOrAfter(TransportVersions.SHUTDOWN_MIGRATION_STATUS_INCLUDE_COUNTS)) {
+            out.writeZLong(startedShards);
+            out.writeZLong(relocatingShards);
+            out.writeZLong(initializingShards);
+            out.writeZLong(shardsRemaining);
+        } else {
+            out.writeLong(shardsRemaining);
+        }
         out.writeOptionalString(explanation);
         if (out.getTransportVersion().onOrAfter(ALLOCATION_DECISION_ADDED_VERSION)) {
             out.writeOptionalWriteable(allocationDecision);
@@ -119,9 +186,12 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
     @Override
     public boolean equals(Object o) {
         if (this == o) return true;
-        if ((o instanceof ShutdownShardMigrationStatus) == false) return false;
+        if (o == null || getClass() != o.getClass()) return false;
         ShutdownShardMigrationStatus that = (ShutdownShardMigrationStatus) o;
-        return shardsRemaining == that.shardsRemaining
+        return startedShards == that.startedShards
+            && relocatingShards == that.relocatingShards
+            && initializingShards == that.initializingShards
+            && shardsRemaining == that.shardsRemaining
             && status == that.status
             && Objects.equals(explanation, that.explanation)
             && Objects.equals(allocationDecision, that.allocationDecision);
@@ -129,7 +199,7 @@ public class ShutdownShardMigrationStatus implements Writeable, ChunkedToXConten
 
     @Override
     public int hashCode() {
-        return Objects.hash(status, shardsRemaining, explanation, allocationDecision);
+        return Objects.hash(status, startedShards, relocatingShards, initializingShards, shardsRemaining, explanation, allocationDecision);
     }
 
     @Override

+ 17 - 7
x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusAction.java

@@ -187,7 +187,8 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
             return new ShutdownShardMigrationStatus(
                 SingleNodeShutdownMetadata.Status.COMPLETE,
                 0,
-                "no shard relocation is necessary for a node restart"
+                "no shard relocation is necessary for a node restart",
+                null
             );
         }
 
@@ -196,7 +197,8 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
             return new ShutdownShardMigrationStatus(
                 SingleNodeShutdownMetadata.Status.NOT_STARTED,
                 0,
-                "node is not currently part of the cluster"
+                "node is not currently part of the cluster",
+                null
             );
         }
 
@@ -242,7 +244,7 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
         // The node is in `DiscoveryNodes`, but not `RoutingNodes` - so there are no shards assigned to it. We're done.
         if (currentState.getRoutingNodes().node(nodeId) == null) {
             // We don't know about that node
-            return new ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status.COMPLETE, 0);
+            return new ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status.COMPLETE, 0, 0, 0);
         }
 
         // Check if there are any shards currently on this node, and if there are any relocating shards
@@ -256,12 +258,14 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
             SingleNodeShutdownMetadata.Status shardStatus = totalRemainingShards == 0
                 ? SingleNodeShutdownMetadata.Status.COMPLETE
                 : SingleNodeShutdownMetadata.Status.IN_PROGRESS;
-            return new ShutdownShardMigrationStatus(shardStatus, totalRemainingShards);
+            return new ShutdownShardMigrationStatus(shardStatus, startedShards, relocatingShards, initializingShards);
         } else if (initializingShards > 0 && relocatingShards == 0 && startedShards == 0) {
             // If there's only initializing shards left, return now with a note that only initializing shards are left
             return new ShutdownShardMigrationStatus(
                 SingleNodeShutdownMetadata.Status.IN_PROGRESS,
-                totalRemainingShards,
+                startedShards,
+                relocatingShards,
+                initializingShards,
                 "all remaining shards are currently INITIALIZING and must finish before they can be moved off this node"
             );
         }
@@ -314,7 +318,8 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
                 0,
                 "["
                     + shardsToIgnoreForFinalStatus.get()
-                    + "] shards cannot be moved away from this node but have at least one copy on another node in the cluster"
+                    + "] shards cannot be moved away from this node but have at least one copy on another node in the cluster",
+                null
             );
         } else if (unmovableShard.isPresent()) {
             // We found a shard that can't be moved, so shard relocation is stalled. Blame the unmovable shard.
@@ -334,7 +339,12 @@ public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
                 decision
             );
         } else {
-            return new ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status.IN_PROGRESS, totalRemainingShards);
+            return new ShutdownShardMigrationStatus(
+                SingleNodeShutdownMetadata.Status.IN_PROGRESS,
+                startedShards,
+                relocatingShards,
+                initializingShards
+            );
         }
     }
 

+ 1 - 1
x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/GetShutdownStatusResponseTests.java

@@ -68,7 +68,7 @@ public class GetShutdownStatusResponseTests extends AbstractWireSerializingTestC
     public static SingleNodeShutdownStatus randomNodeShutdownStatus() {
         return new SingleNodeShutdownStatus(
             randomNodeShutdownMetadata(),
-            new ShutdownShardMigrationStatus(randomStatus(), randomNonNegativeLong()),
+            new ShutdownShardMigrationStatus(randomStatus(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()),
             new ShutdownPersistentTasksStatus(),
             new ShutdownPluginsStatus(randomBoolean())
         );