|
@@ -0,0 +1,299 @@
|
|
|
+[role="xpack"]
|
|
|
+[testenv="basic"]
|
|
|
+[[get-trained-model-deployment-stats]]
|
|
|
+= Get trained model deployment statistics API
|
|
|
+[subs="attributes"]
|
|
|
+++++
|
|
|
+<titleabbrev>Get trained model deployment stats</titleabbrev>
|
|
|
+++++
|
|
|
+
|
|
|
+Retrieves usage information for trained model deployments.
|
|
|
+
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-request]]
|
|
|
+== {api-request-title}
|
|
|
+
|
|
|
+`GET _ml/trained_models/<model_id>/deployment/_stats` +
|
|
|
+
|
|
|
+`GET _ml/trained_models/<model_id>,<model_id_2>/deployment/_stats` +
|
|
|
+
|
|
|
+`GET _ml/trained_models/<model_id_pattern*>,<model_id_2>/deployment/_stats`
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-prereq]]
|
|
|
+== {api-prereq-title}
|
|
|
+
|
|
|
+Requires the `monitor_ml` cluster privilege. This privilege is included in the
|
|
|
+`machine_learning_user` built-in role.
|
|
|
+
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-desc]]
|
|
|
+== {api-description-title}
|
|
|
+
|
|
|
+You can get deployment information for multiple trained models in a single API
|
|
|
+request by using a comma-separated list of model IDs or a wildcard expression.
|
|
|
+
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-path-params]]
|
|
|
+== {api-path-parms-title}
|
|
|
+
|
|
|
+`<model_id>`::
|
|
|
+(Optional, string)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
|
|
|
+
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-query-params]]
|
|
|
+== {api-query-parms-title}
|
|
|
+
|
|
|
+`allow_no_match`::
|
|
|
+(Optional, Boolean)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-models]
|
|
|
+
|
|
|
+[role="child_attributes"]
|
|
|
+[[ml-get-trained-model-deployment-stats-results]]
|
|
|
+== {api-response-body-title}
|
|
|
+
|
|
|
+`count`::
|
|
|
+(integer)
|
|
|
+The total number of deployment statistics that matched the requested ID
|
|
|
+patterns.
|
|
|
+
|
|
|
+`deployment_stats`::
|
|
|
+(array)
|
|
|
+An array of trained model deployment statistics, which are sorted by the `model_id` value
|
|
|
+in ascending order.
|
|
|
++
|
|
|
+.Properties of trained model deployment stats
|
|
|
+[%collapsible%open]
|
|
|
+====
|
|
|
+`model_id`:::
|
|
|
+(string)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
|
|
|
+
|
|
|
+`model_size`:::
|
|
|
+(<<byte-units,byte value>>)
|
|
|
+The size of the loaded model in bytes.
|
|
|
+
|
|
|
+`state`:::
|
|
|
+(string)
|
|
|
+The overall state of the deployment. The values may be:
|
|
|
++
|
|
|
+--
|
|
|
+* `starting`: The deployment has recently started but is not yet usable as the model is not allocated on any nodes.
|
|
|
+* `started`: The deployment is usable as at least one node has the model allocated.
|
|
|
+* `stopping`: The deployment is preparing to stop and un-allocate the model from the relevant nodes.
|
|
|
+--
|
|
|
+
|
|
|
+`allocation_status`:::
|
|
|
+(object)
|
|
|
+The detailed allocation status given the deployment configuration.
|
|
|
++
|
|
|
+.Properties of allocation stats
|
|
|
+[%collapsible%open]
|
|
|
+=====
|
|
|
+`allocation_count`:::
|
|
|
+(integer)
|
|
|
+The current number of nodes where the model is allocated.
|
|
|
+
|
|
|
+`target_allocation_count`:::
|
|
|
+(integer)
|
|
|
+The desired number of nodes for model allocation.
|
|
|
+
|
|
|
+`state`:::
|
|
|
+(string)
|
|
|
+The detailed allocation state related to the nodes.
|
|
|
++
|
|
|
+--
|
|
|
+* `starting`: Allocations are being attempted but no node currently has the model allocated.
|
|
|
+* `started`: At least one node has the model allocated.
|
|
|
+* `fully_allocated`: The deployment is fully allocated and satisfies the `target_allocation_count`.
|
|
|
+--
|
|
|
+=====
|
|
|
+
|
|
|
+`nodes`:::
|
|
|
+(array of objects)
|
|
|
+The deployment stats for each node that currently has the model allocated.
|
|
|
++
|
|
|
+.Properties of node stats
|
|
|
+[%collapsible%open]
|
|
|
+=====
|
|
|
+`average_inference_time_ms`:::
|
|
|
+(double)
|
|
|
+The average time for each inference call to complete on this node.
|
|
|
+
|
|
|
+`inference_count`:::
|
|
|
+(integer)
|
|
|
+The total number of inference calls made against this node for this model.
|
|
|
+
|
|
|
+`last_access`:::
|
|
|
+(long)
|
|
|
+The epoch time stamp of the last inference call for the model on this node.
|
|
|
+
|
|
|
+`node`:::
|
|
|
+(object)
|
|
|
+Information pertaining to the node.
|
|
|
++
|
|
|
+.Properties of node
|
|
|
+[%collapsible%open]
|
|
|
+======
|
|
|
+`attributes`:::
|
|
|
+(object)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-attributes]
|
|
|
+
|
|
|
+`ephemeral_id`:::
|
|
|
+(string)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
|
|
|
+
|
|
|
+`id`:::
|
|
|
+(string)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-id]
|
|
|
+
|
|
|
+`name`:::
|
|
|
+(string) The node name.
|
|
|
+
|
|
|
+`transport_address`:::
|
|
|
+(string)
|
|
|
+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-transport-address]
|
|
|
+======
|
|
|
+
|
|
|
+`routing_state`:::
|
|
|
+(object)
|
|
|
+The current routing state and reason for the current routing state for this allocation.
|
|
|
++
|
|
|
+--
|
|
|
+* `starting`: The model is attempting to allocate on this model, inference calls are not yet accepted.
|
|
|
+* `started`: The model is allocated and ready to accept inference requests.
|
|
|
+* `stopping`: The model is being de-allocated from this node.
|
|
|
+* `stopped`: The model is fully de-allocated from this node.
|
|
|
+* `failed`: The allocation attempt failed, see `reason` field for the potential cause.
|
|
|
+--
|
|
|
+
|
|
|
+`reason`:::
|
|
|
+(string)
|
|
|
+The reason for the current state. Usually only populated when the `routing_state` is `failed`.
|
|
|
+
|
|
|
+=====
|
|
|
+====
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-response-codes]]
|
|
|
+== {api-response-codes-title}
|
|
|
+
|
|
|
+`404` (Missing resources)::
|
|
|
+ If `allow_no_match` is `false`, this code indicates that there are no
|
|
|
+ resources that match the request or only partial matches for the request.
|
|
|
+
|
|
|
+[[ml-get-trained-model-deployment-stats-example]]
|
|
|
+== {api-examples-title}
|
|
|
+
|
|
|
+The following example gets deployment information for all currently started model deployments:
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET _ml/trained_models/*/deployment/_stats
|
|
|
+--------------------------------------------------
|
|
|
+// TEST[skip:TBD]
|
|
|
+
|
|
|
+
|
|
|
+The API returns the following results:
|
|
|
+
|
|
|
+[source,console-result]
|
|
|
+----
|
|
|
+{
|
|
|
+ "count": 2,
|
|
|
+ "deployment_stats": [
|
|
|
+ {
|
|
|
+ "model_id": "elastic__distilbert-base-uncased-finetuned-conll03-english",
|
|
|
+ "model_size": "253.3mb",
|
|
|
+ "state": "started",
|
|
|
+ "allocation_status": {
|
|
|
+ "allocation_count": 1,
|
|
|
+ "target_allocation_count": 1,
|
|
|
+ "state": "fully_allocated"
|
|
|
+ },
|
|
|
+ "nodes": [
|
|
|
+ {
|
|
|
+ "node": {
|
|
|
+ "6pzZQ9OmQUWAaswMlwVEwg": {
|
|
|
+ "name": "runTask-0",
|
|
|
+ "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
|
|
|
+ "transport_address": "127.0.0.1:9300",
|
|
|
+ "attributes": {
|
|
|
+ "ml.machine_memory": "68719476736",
|
|
|
+ "xpack.installed": "true",
|
|
|
+ "testattr": "test",
|
|
|
+ "ml.max_open_jobs": "512",
|
|
|
+ "ml.max_jvm_size": "4181590016"
|
|
|
+ },
|
|
|
+ "roles": [
|
|
|
+ "data",
|
|
|
+ "data_cold",
|
|
|
+ "data_content",
|
|
|
+ "data_frozen",
|
|
|
+ "data_hot",
|
|
|
+ "data_warm",
|
|
|
+ "ingest",
|
|
|
+ "master",
|
|
|
+ "ml",
|
|
|
+ "remote_cluster_client",
|
|
|
+ "transform"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "routing_state": {
|
|
|
+ "routing_state": "started"
|
|
|
+ },
|
|
|
+ "inference_count": 9,
|
|
|
+ "average_inference_time_ms": 51,
|
|
|
+ "last_access": 1632855681069
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "model_id": "typeform__distilbert-base-uncased-mnli",
|
|
|
+ "model_size": "255.5mb",
|
|
|
+ "state": "started",
|
|
|
+ "allocation_status": {
|
|
|
+ "allocation_count": 1,
|
|
|
+ "target_allocation_count": 1,
|
|
|
+ "state": "fully_allocated"
|
|
|
+ },
|
|
|
+ "nodes": [
|
|
|
+ {
|
|
|
+ "node": {
|
|
|
+ "6pzZQ9OmQUWAaswMlwVEwg": {
|
|
|
+ "name": "runTask-0",
|
|
|
+ "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
|
|
|
+ "transport_address": "127.0.0.1:9300",
|
|
|
+ "attributes": {
|
|
|
+ "ml.machine_memory": "68719476736",
|
|
|
+ "xpack.installed": "true",
|
|
|
+ "testattr": "test",
|
|
|
+ "ml.max_open_jobs": "512",
|
|
|
+ "ml.max_jvm_size": "4181590016"
|
|
|
+ },
|
|
|
+ "roles": [
|
|
|
+ "data",
|
|
|
+ "data_cold",
|
|
|
+ "data_content",
|
|
|
+ "data_frozen",
|
|
|
+ "data_hot",
|
|
|
+ "data_warm",
|
|
|
+ "ingest",
|
|
|
+ "master",
|
|
|
+ "ml",
|
|
|
+ "remote_cluster_client",
|
|
|
+ "transform"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "routing_state": {
|
|
|
+ "routing_state": "started"
|
|
|
+ },
|
|
|
+ "inference_count": 0,
|
|
|
+ "average_inference_time_ms": 0
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+----
|
|
|
+// NOTCONSOLE
|