4 years ago · b96d929af3
--- a/docs/reference/ml/df-analytics/apis/get-trained-model-deployment-stats.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/get-trained-model-deployment-stats.asciidoc
@@ -0,0 +1,299 @@
 
				+[role="xpack"]
			
 
				+[testenv="basic"]
			
 
				+[[get-trained-model-deployment-stats]]
			
 
				+= Get trained model deployment statistics API
			
 
				+[subs="attributes"]
			
 
				+++++
			
 
				+<titleabbrev>Get trained model deployment stats</titleabbrev>
			
 
				+++++
			
 
				+
			
 
				+Retrieves usage information for trained model deployments.
			
 
				+
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-request]]
			
 
				+== {api-request-title}
			
 
				+
			
 
				+`GET _ml/trained_models/<model_id>/deployment/_stats` +
			
 
				+
			
 
				+`GET _ml/trained_models/<model_id>,<model_id_2>/deployment/_stats` +
			
 
				+
			
 
				+`GET _ml/trained_models/<model_id_pattern*>,<model_id_2>/deployment/_stats`
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-prereq]]
			
 
				+== {api-prereq-title}
			
 
				+
			
 
				+Requires the `monitor_ml` cluster privilege. This privilege is included in the
			
 
				+`machine_learning_user` built-in role.
			
 
				+
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-desc]]
			
 
				+== {api-description-title}
			
 
				+
			
 
				+You can get deployment information for multiple trained models in a single API
			
 
				+request by using a comma-separated list of model IDs or a wildcard expression.
			
 
				+
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-path-params]]
			
 
				+== {api-path-parms-title}
			
 
				+
			
 
				+`<model_id>`::
			
 
				+(Optional, string)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
			
 
				+
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-query-params]]
			
 
				+== {api-query-parms-title}
			
 
				+
			
 
				+`allow_no_match`::
			
 
				+(Optional, Boolean)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-models]
			
 
				+
			
 
				+[role="child_attributes"]
			
 
				+[[ml-get-trained-model-deployment-stats-results]]
			
 
				+== {api-response-body-title}
			
 
				+
			
 
				+`count`::
			
 
				+(integer)
			
 
				+The total number of deployment statistics that matched the requested ID
			
 
				+patterns.
			
 
				+
			
 
				+`deployment_stats`::
			
 
				+(array)
			
 
				+An array of trained model deployment statistics, which are sorted by the `model_id` value
			
 
				+in ascending order.
			
 
				++
			
 
				+.Properties of trained model deployment stats
			
 
				+[%collapsible%open]
			
 
				+====
			
 
				+`model_id`:::
			
 
				+(string)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
			
 
				+
			
 
				+`model_size`:::
			
 
				+(<<byte-units,byte value>>)
			
 
				+The size of the loaded model in bytes.
			
 
				+
			
 
				+`state`:::
			
 
				+(string)
			
 
				+The overall state of the deployment. The values may be:
			
 
				++
			
 
				+--
			
 
				+* `starting`: The deployment has recently started but is not yet usable as the model is not allocated on any nodes.
			
 
				+* `started`: The deployment is usable as at least one node has the model allocated.
			
 
				+* `stopping`: The deployment is preparing to stop and un-allocate the model from the relevant nodes.
			
 
				+--
			
 
				+
			
 
				+`allocation_status`:::
			
 
				+(object)
			
 
				+The detailed allocation status given the deployment configuration.
			
 
				++
			
 
				+.Properties of allocation stats
			
 
				+[%collapsible%open]
			
 
				+=====
			
 
				+`allocation_count`:::
			
 
				+(integer)
			
 
				+The current number of nodes where the model is allocated.
			
 
				+
			
 
				+`target_allocation_count`:::
			
 
				+(integer)
			
 
				+The desired number of nodes for model allocation.
			
 
				+
			
 
				+`state`:::
			
 
				+(string)
			
 
				+The detailed allocation state related to the nodes.
			
 
				++
			
 
				+--
			
 
				+* `starting`: Allocations are being attempted but no node currently has the model allocated.
			
 
				+* `started`: At least one node has the model allocated.
			
 
				+* `fully_allocated`: The deployment is fully allocated and satisfies the `target_allocation_count`.
			
 
				+--
			
 
				+=====
			
 
				+
			
 
				+`nodes`:::
			
 
				+(array of objects)
			
 
				+The deployment stats for each node that currently has the model allocated.
			
 
				++
			
 
				+.Properties of node stats
			
 
				+[%collapsible%open]
			
 
				+=====
			
 
				+`average_inference_time_ms`:::
			
 
				+(double)
			
 
				+The average time for each inference call to complete on this node.
			
 
				+
			
 
				+`inference_count`:::
			
 
				+(integer)
			
 
				+The total number of inference calls made against this node for this model.
			
 
				+
			
 
				+`last_access`:::
			
 
				+(long)
			
 
				+The epoch time stamp of the last inference call for the model on this node.
			
 
				+
			
 
				+`node`:::
			
 
				+(object)
			
 
				+Information pertaining to the node.
			
 
				++
			
 
				+.Properties of node
			
 
				+[%collapsible%open]
			
 
				+======
			
 
				+`attributes`:::
			
 
				+(object)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-attributes]
			
 
				+
			
 
				+`ephemeral_id`:::
			
 
				+(string)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
			
 
				+
			
 
				+`id`:::
			
 
				+(string)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-id]
			
 
				+
			
 
				+`name`:::
			
 
				+(string) The node name.
			
 
				+
			
 
				+`transport_address`:::
			
 
				+(string)
			
 
				+include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-transport-address]
			
 
				+======
			
 
				+
			
 
				+`routing_state`:::
			
 
				+(object)
			
 
				+The current routing state and reason for the current routing state for this allocation.
			
 
				++
			
 
				+--
			
 
				+* `starting`: The model is attempting to allocate on this model, inference calls are not yet accepted.
			
 
				+* `started`: The model is allocated and ready to accept inference requests.
			
 
				+* `stopping`: The model is being de-allocated from this node.
			
 
				+* `stopped`: The model is fully de-allocated from this node.
			
 
				+* `failed`: The allocation attempt failed, see `reason` field for the potential cause.
			
 
				+--
			
 
				+
			
 
				+`reason`:::
			
 
				+(string)
			
 
				+The reason for the current state. Usually only populated when the `routing_state` is `failed`.
			
 
				+
			
 
				+=====
			
 
				+====
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-response-codes]]
			
 
				+== {api-response-codes-title}
			
 
				+
			
 
				+`404` (Missing resources)::
			
 
				+  If `allow_no_match` is `false`, this code indicates that there are no
			
 
				+  resources that match the request or only partial matches for the request.
			
 
				+
			
 
				+[[ml-get-trained-model-deployment-stats-example]]
			
 
				+== {api-examples-title}
			
 
				+
			
 
				+The following example gets deployment information for all currently started model deployments:
			
 
				+
			
 
				+[source,console]
			
 
				+--------------------------------------------------
			
 
				+GET _ml/trained_models/*/deployment/_stats
			
 
				+--------------------------------------------------
			
 
				+// TEST[skip:TBD]
			
 
				+
			
 
				+
			
 
				+The API returns the following results:
			
 
				+
			
 
				+[source,console-result]
			
 
				+----
			
 
				+{
			
 
				+    "count": 2,
			
 
				+    "deployment_stats": [
			
 
				+        {
			
 
				+            "model_id": "elastic__distilbert-base-uncased-finetuned-conll03-english",
			
 
				+            "model_size": "253.3mb",
			
 
				+            "state": "started",
			
 
				+            "allocation_status": {
			
 
				+                "allocation_count": 1,
			
 
				+                "target_allocation_count": 1,
			
 
				+                "state": "fully_allocated"
			
 
				+            },
			
 
				+            "nodes": [
			
 
				+                {
			
 
				+                    "node": {
			
 
				+                        "6pzZQ9OmQUWAaswMlwVEwg": {
			
 
				+                            "name": "runTask-0",
			
 
				+                            "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
			
 
				+                            "transport_address": "127.0.0.1:9300",
			
 
				+                            "attributes": {
			
 
				+                                "ml.machine_memory": "68719476736",
			
 
				+                                "xpack.installed": "true",
			
 
				+                                "testattr": "test",
			
 
				+                                "ml.max_open_jobs": "512",
			
 
				+                                "ml.max_jvm_size": "4181590016"
			
 
				+                            },
			
 
				+                            "roles": [
			
 
				+                                "data",
			
 
				+                                "data_cold",
			
 
				+                                "data_content",
			
 
				+                                "data_frozen",
			
 
				+                                "data_hot",
			
 
				+                                "data_warm",
			
 
				+                                "ingest",
			
 
				+                                "master",
			
 
				+                                "ml",
			
 
				+                                "remote_cluster_client",
			
 
				+                                "transform"
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    },
			
 
				+                    "routing_state": {
			
 
				+                        "routing_state": "started"
			
 
				+                    },
			
 
				+                    "inference_count": 9,
			
 
				+                    "average_inference_time_ms": 51,
			
 
				+                    "last_access": 1632855681069
			
 
				+                }
			
 
				+            ]
			
 
				+        },
			
 
				+        {
			
 
				+            "model_id": "typeform__distilbert-base-uncased-mnli",
			
 
				+            "model_size": "255.5mb",
			
 
				+            "state": "started",
			
 
				+            "allocation_status": {
			
 
				+                "allocation_count": 1,
			
 
				+                "target_allocation_count": 1,
			
 
				+                "state": "fully_allocated"
			
 
				+            },
			
 
				+            "nodes": [
			
 
				+                {
			
 
				+                    "node": {
			
 
				+                        "6pzZQ9OmQUWAaswMlwVEwg": {
			
 
				+                            "name": "runTask-0",
			
 
				+                            "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
			
 
				+                            "transport_address": "127.0.0.1:9300",
			
 
				+                            "attributes": {
			
 
				+                                "ml.machine_memory": "68719476736",
			
 
				+                                "xpack.installed": "true",
			
 
				+                                "testattr": "test",
			
 
				+                                "ml.max_open_jobs": "512",
			
 
				+                                "ml.max_jvm_size": "4181590016"
			
 
				+                            },
			
 
				+                            "roles": [
			
 
				+                                "data",
			
 
				+                                "data_cold",
			
 
				+                                "data_content",
			
 
				+                                "data_frozen",
			
 
				+                                "data_hot",
			
 
				+                                "data_warm",
			
 
				+                                "ingest",
			
 
				+                                "master",
			
 
				+                                "ml",
			
 
				+                                "remote_cluster_client",
			
 
				+                                "transform"
			
 
				+                            ]
			
 
				+                        }
			
 
				+                    },
			
 
				+                    "routing_state": {
			
 
				+                        "routing_state": "started"
			
 
				+                    },
			
 
				+                    "inference_count": 0,
			
 
				+                    "average_inference_time_ms": 0
			
 
				+                }
			
 
				+            ]
			
 
				+        }
			
 
				+    ]
			
 
				+}
			
 
				+----
			
 
				+// NOTCONSOLE
			
--- a/docs/reference/ml/df-analytics/apis/index.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/index.asciidoc
@@ -19,6 +19,7 @@ include::explain-dfanalytics.asciidoc[leveloffset=+2]
 
				 include::get-dfanalytics.asciidoc[leveloffset=+2]
			
 
				 include::get-dfanalytics-stats.asciidoc[leveloffset=+2]
			
 
				 include::get-trained-models.asciidoc[leveloffset=+2]
			
 
				+include::get-trained-model-deployment-stats.asciidoc[leveloffset=+2]
			
 
				 include::get-trained-models-stats.asciidoc[leveloffset=+2]
			
 
				 //INFER
			
 
				 include::infer-trained-model-deployment.asciidoc[leveloffset=+2]
			
--- a/docs/reference/ml/df-analytics/apis/ml-df-analytics-apis.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/ml-df-analytics-apis.asciidoc
@@ -26,6 +26,7 @@ You can use the following APIs to perform {infer} operations:
 
				 * <<delete-trained-models-aliases>>
			
 
				 * <<get-trained-models>>
			
 
				 * <<get-trained-models-stats>>
			
 
				+* <<get-trained-model-deployment-stats>>
			
 
				 
			
 
				 You can deploy a trained model to make predictions in an ingest pipeline or in
			
 
				 an aggregation. Refer to the following documentation to learn more: