|
@@ -31,6 +31,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
private final Double avgInferenceTime;
|
|
|
private final Instant lastAccess;
|
|
|
private final Integer pendingCount;
|
|
|
+ private final int errorCount;
|
|
|
+ private final int rejectedExecutionCount;
|
|
|
+ private final int timeoutCount;
|
|
|
private final RoutingStateAndReason routingState;
|
|
|
private final Instant startTime;
|
|
|
private final Integer inferenceThreads;
|
|
@@ -41,6 +44,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
long inferenceCount,
|
|
|
Double avgInferenceTime,
|
|
|
int pendingCount,
|
|
|
+ int errorCount,
|
|
|
+ int rejectedExecutionCount,
|
|
|
+ int timeoutCount,
|
|
|
Instant lastAccess,
|
|
|
Instant startTime,
|
|
|
Integer inferenceThreads,
|
|
@@ -52,6 +58,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
avgInferenceTime,
|
|
|
lastAccess,
|
|
|
pendingCount,
|
|
|
+ errorCount,
|
|
|
+ rejectedExecutionCount,
|
|
|
+ timeoutCount,
|
|
|
new RoutingStateAndReason(RoutingState.STARTED, null),
|
|
|
Objects.requireNonNull(startTime),
|
|
|
inferenceThreads,
|
|
@@ -60,7 +69,20 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
}
|
|
|
|
|
|
public static AllocationStats.NodeStats forNotStartedState(DiscoveryNode node, RoutingState state, String reason) {
|
|
|
- return new AllocationStats.NodeStats(node, null, null, null, null, new RoutingStateAndReason(state, reason), null, null, null);
|
|
|
+ return new AllocationStats.NodeStats(
|
|
|
+ node,
|
|
|
+ null,
|
|
|
+ null,
|
|
|
+ null,
|
|
|
+ null,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ new RoutingStateAndReason(state, reason),
|
|
|
+ null,
|
|
|
+ null,
|
|
|
+ null
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
public NodeStats(
|
|
@@ -69,6 +91,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
Double avgInferenceTime,
|
|
|
Instant lastAccess,
|
|
|
Integer pendingCount,
|
|
|
+ int errorCount,
|
|
|
+ int rejectedExecutionCount,
|
|
|
+ int timeoutCount,
|
|
|
RoutingStateAndReason routingState,
|
|
|
@Nullable Instant startTime,
|
|
|
@Nullable Integer inferenceThreads,
|
|
@@ -79,6 +104,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
this.avgInferenceTime = avgInferenceTime;
|
|
|
this.lastAccess = lastAccess;
|
|
|
this.pendingCount = pendingCount;
|
|
|
+ this.errorCount = errorCount;
|
|
|
+ this.rejectedExecutionCount = rejectedExecutionCount;
|
|
|
+ this.timeoutCount = timeoutCount;
|
|
|
this.routingState = routingState;
|
|
|
this.startTime = startTime;
|
|
|
this.inferenceThreads = inferenceThreads;
|
|
@@ -96,13 +124,18 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
this.pendingCount = in.readOptionalVInt();
|
|
|
this.routingState = in.readOptionalWriteable(RoutingStateAndReason::new);
|
|
|
this.startTime = in.readOptionalInstant();
|
|
|
-
|
|
|
if (in.getVersion().onOrAfter(Version.V_8_1_0)) {
|
|
|
this.inferenceThreads = in.readOptionalVInt();
|
|
|
this.modelThreads = in.readOptionalVInt();
|
|
|
+ this.errorCount = in.readVInt();
|
|
|
+ this.rejectedExecutionCount = in.readVInt();
|
|
|
+ this.timeoutCount = in.readVInt();
|
|
|
} else {
|
|
|
this.inferenceThreads = null;
|
|
|
this.modelThreads = null;
|
|
|
+ this.errorCount = 0;
|
|
|
+ this.rejectedExecutionCount = 0;
|
|
|
+ this.timeoutCount = 0;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -130,6 +163,18 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
return pendingCount;
|
|
|
}
|
|
|
|
|
|
+ public int getErrorCount() {
|
|
|
+ return errorCount;
|
|
|
+ }
|
|
|
+
|
|
|
+ public int getRejectedExecutionCount() {
|
|
|
+ return rejectedExecutionCount;
|
|
|
+ }
|
|
|
+
|
|
|
+ public int getTimeoutCount() {
|
|
|
+ return timeoutCount;
|
|
|
+ }
|
|
|
+
|
|
|
public Instant getStartTime() {
|
|
|
return startTime;
|
|
|
}
|
|
@@ -146,7 +191,8 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
if (inferenceCount != null) {
|
|
|
builder.field("inference_count", inferenceCount);
|
|
|
}
|
|
|
- if (avgInferenceTime != null) {
|
|
|
+ // avoid reporting the average time as 0 if count < 1
|
|
|
+ if (avgInferenceTime != null && (inferenceCount != null && inferenceCount > 0)) {
|
|
|
builder.field("average_inference_time_ms", avgInferenceTime);
|
|
|
}
|
|
|
if (lastAccess != null) {
|
|
@@ -155,6 +201,15 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
if (pendingCount != null) {
|
|
|
builder.field("number_of_pending_requests", pendingCount);
|
|
|
}
|
|
|
+ if (errorCount > 0) {
|
|
|
+ builder.field("error_count", errorCount);
|
|
|
+ }
|
|
|
+ if (rejectedExecutionCount > 0) {
|
|
|
+ builder.field("rejected_execution_count", rejectedExecutionCount);
|
|
|
+ }
|
|
|
+ if (timeoutCount > 0) {
|
|
|
+ builder.field("timeout_count", timeoutCount);
|
|
|
+ }
|
|
|
if (startTime != null) {
|
|
|
builder.timeField("start_time", "start_time_string", startTime.toEpochMilli());
|
|
|
}
|
|
@@ -180,6 +235,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
if (out.getVersion().onOrAfter(Version.V_8_1_0)) {
|
|
|
out.writeOptionalVInt(inferenceThreads);
|
|
|
out.writeOptionalVInt(modelThreads);
|
|
|
+ out.writeVInt(errorCount);
|
|
|
+ out.writeVInt(rejectedExecutionCount);
|
|
|
+ out.writeVInt(timeoutCount);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -193,6 +251,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
&& Objects.equals(node, that.node)
|
|
|
&& Objects.equals(lastAccess, that.lastAccess)
|
|
|
&& Objects.equals(pendingCount, that.pendingCount)
|
|
|
+ && Objects.equals(errorCount, that.errorCount)
|
|
|
+ && Objects.equals(rejectedExecutionCount, that.rejectedExecutionCount)
|
|
|
+ && Objects.equals(timeoutCount, that.timeoutCount)
|
|
|
&& Objects.equals(routingState, that.routingState)
|
|
|
&& Objects.equals(startTime, that.startTime)
|
|
|
&& Objects.equals(inferenceThreads, that.inferenceThreads)
|
|
@@ -207,6 +268,9 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
avgInferenceTime,
|
|
|
lastAccess,
|
|
|
pendingCount,
|
|
|
+ errorCount,
|
|
|
+ rejectedExecutionCount,
|
|
|
+ timeoutCount,
|
|
|
routingState,
|
|
|
startTime,
|
|
|
inferenceThreads,
|
|
@@ -331,6 +395,28 @@ public class AllocationStats implements ToXContentObject, Writeable {
|
|
|
builder.field("allocation_status", allocationStatus);
|
|
|
}
|
|
|
builder.timeField("start_time", "start_time_string", startTime.toEpochMilli());
|
|
|
+
|
|
|
+ int totalErrorCount = nodeStats.stream().mapToInt(NodeStats::getErrorCount).sum();
|
|
|
+ int totalRejectedExecutionCount = nodeStats.stream().mapToInt(NodeStats::getRejectedExecutionCount).sum();
|
|
|
+ int totalTimeoutCount = nodeStats.stream().mapToInt(NodeStats::getTimeoutCount).sum();
|
|
|
+ long totalInferenceCount = nodeStats.stream()
|
|
|
+ .filter(n -> n.getInferenceCount().isPresent())
|
|
|
+ .mapToLong(n -> n.getInferenceCount().get())
|
|
|
+ .sum();
|
|
|
+
|
|
|
+ if (totalErrorCount > 0) {
|
|
|
+ builder.field("error_count", totalErrorCount);
|
|
|
+ }
|
|
|
+ if (totalRejectedExecutionCount > 0) {
|
|
|
+ builder.field("rejected_execution_count", totalRejectedExecutionCount);
|
|
|
+ }
|
|
|
+ if (totalTimeoutCount > 0) {
|
|
|
+ builder.field("timeout_count", totalTimeoutCount);
|
|
|
+ }
|
|
|
+ if (totalInferenceCount > 0) {
|
|
|
+ builder.field("inference_count", totalInferenceCount);
|
|
|
+ }
|
|
|
+
|
|
|
builder.startArray("nodes");
|
|
|
for (AllocationStats.NodeStats nodeStat : nodeStats) {
|
|
|
nodeStat.toXContent(builder, params);
|