get-trained-model-deployment-stats.asciidoc 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. [role="xpack"]
  2. [testenv="basic"]
  3. [[get-trained-model-deployment-stats]]
  4. = Get trained model deployment statistics API
  5. [subs="attributes"]
  6. ++++
  7. <titleabbrev>Get trained model deployment stats</titleabbrev>
  8. ++++
  9. Retrieves usage information for trained model deployments.
  10. [[ml-get-trained-model-deployment-stats-request]]
  11. == {api-request-title}
  12. `GET _ml/trained_models/<model_id>/deployment/_stats` +
  13. `GET _ml/trained_models/<model_id>,<model_id_2>/deployment/_stats` +
  14. `GET _ml/trained_models/<model_id_pattern*>,<model_id_2>/deployment/_stats`
  15. [[ml-get-trained-model-deployment-stats-prereq]]
  16. == {api-prereq-title}
  17. Requires the `monitor_ml` cluster privilege. This privilege is included in the
  18. `machine_learning_user` built-in role.
  19. [[ml-get-trained-model-deployment-stats-desc]]
  20. == {api-description-title}
  21. You can get deployment information for multiple trained models in a single API
  22. request by using a comma-separated list of model IDs or a wildcard expression.
  23. [[ml-get-trained-model-deployment-stats-path-params]]
  24. == {api-path-parms-title}
  25. `<model_id>`::
  26. (Optional, string)
  27. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
  28. [[ml-get-trained-model-deployment-stats-query-params]]
  29. == {api-query-parms-title}
  30. `allow_no_match`::
  31. (Optional, Boolean)
  32. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-models]
  33. [role="child_attributes"]
  34. [[ml-get-trained-model-deployment-stats-results]]
  35. == {api-response-body-title}
  36. `count`::
  37. (integer)
  38. The total number of deployment statistics that matched the requested ID
  39. patterns.
  40. `deployment_stats`::
  41. (array)
  42. An array of trained model deployment statistics, which are sorted by the `model_id` value
  43. in ascending order.
  44. +
  45. .Properties of trained model deployment stats
  46. [%collapsible%open]
  47. ====
  48. `model_id`:::
  49. (string)
  50. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
  51. `model_size`:::
  52. (<<byte-units,byte value>>)
  53. The size of the loaded model in bytes.
  54. `start_time`:::
  55. (long)
  56. The epoch timestamp when the deployment started.
  57. `state`:::
  58. (string)
  59. The overall state of the deployment. The values may be:
  60. +
  61. --
  62. * `starting`: The deployment has recently started but is not yet usable as the model is not allocated on any nodes.
  63. * `started`: The deployment is usable as at least one node has the model allocated.
  64. * `stopping`: The deployment is preparing to stop and un-allocate the model from the relevant nodes.
  65. --
  66. `allocation_status`:::
  67. (object)
  68. The detailed allocation status given the deployment configuration.
  69. +
  70. .Properties of allocation stats
  71. [%collapsible%open]
  72. =====
  73. `allocation_count`:::
  74. (integer)
  75. The current number of nodes where the model is allocated.
  76. `target_allocation_count`:::
  77. (integer)
  78. The desired number of nodes for model allocation.
  79. `state`:::
  80. (string)
  81. The detailed allocation state related to the nodes.
  82. +
  83. --
  84. * `starting`: Allocations are being attempted but no node currently has the model allocated.
  85. * `started`: At least one node has the model allocated.
  86. * `fully_allocated`: The deployment is fully allocated and satisfies the `target_allocation_count`.
  87. --
  88. =====
  89. `nodes`:::
  90. (array of objects)
  91. The deployment stats for each node that currently has the model allocated.
  92. +
  93. .Properties of node stats
  94. [%collapsible%open]
  95. =====
  96. `average_inference_time_ms`:::
  97. (double)
  98. The average time for each inference call to complete on this node.
  99. `inference_count`:::
  100. (integer)
  101. The total number of inference calls made against this node for this model.
  102. `last_access`:::
  103. (long)
  104. The epoch time stamp of the last inference call for the model on this node.
  105. `node`:::
  106. (object)
  107. Information pertaining to the node.
  108. +
  109. .Properties of node
  110. [%collapsible%open]
  111. ======
  112. `attributes`:::
  113. (object)
  114. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-attributes]
  115. `ephemeral_id`:::
  116. (string)
  117. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
  118. `id`:::
  119. (string)
  120. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-id]
  121. `name`:::
  122. (string) The node name.
  123. `transport_address`:::
  124. (string)
  125. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-transport-address]
  126. ======
  127. `routing_state`:::
  128. (object)
  129. The current routing state and reason for the current routing state for this allocation.
  130. +
  131. --
  132. * `starting`: The model is attempting to allocate on this model, inference calls are not yet accepted.
  133. * `started`: The model is allocated and ready to accept inference requests.
  134. * `stopping`: The model is being de-allocated from this node.
  135. * `stopped`: The model is fully de-allocated from this node.
  136. * `failed`: The allocation attempt failed, see `reason` field for the potential cause.
  137. --
  138. `reason`:::
  139. (string)
  140. The reason for the current state. Usually only populated when the `routing_state` is `failed`.
  141. `start_time`:::
  142. (long)
  143. The epoch timestamp when the allocation started.
  144. =====
  145. ====
  146. [[ml-get-trained-model-deployment-stats-response-codes]]
  147. == {api-response-codes-title}
  148. `404` (Missing resources)::
  149. If `allow_no_match` is `false`, this code indicates that there are no
  150. resources that match the request or only partial matches for the request.
  151. [[ml-get-trained-model-deployment-stats-example]]
  152. == {api-examples-title}
  153. The following example gets deployment information for all currently started model deployments:
  154. [source,console]
  155. --------------------------------------------------
  156. GET _ml/trained_models/*/deployment/_stats
  157. --------------------------------------------------
  158. // TEST[skip:TBD]
  159. The API returns the following results:
  160. [source,console-result]
  161. ----
  162. {
  163. "count": 2,
  164. "deployment_stats": [
  165. {
  166. "model_id": "elastic__distilbert-base-uncased-finetuned-conll03-english",
  167. "model_size": "253.3mb",
  168. "state": "started",
  169. "allocation_status": {
  170. "allocation_count": 1,
  171. "target_allocation_count": 1,
  172. "state": "fully_allocated"
  173. },
  174. "nodes": [
  175. {
  176. "node": {
  177. "6pzZQ9OmQUWAaswMlwVEwg": {
  178. "name": "runTask-0",
  179. "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
  180. "transport_address": "127.0.0.1:9300",
  181. "attributes": {
  182. "ml.machine_memory": "68719476736",
  183. "xpack.installed": "true",
  184. "testattr": "test",
  185. "ml.max_open_jobs": "512",
  186. "ml.max_jvm_size": "4181590016"
  187. },
  188. "roles": [
  189. "data",
  190. "data_cold",
  191. "data_content",
  192. "data_frozen",
  193. "data_hot",
  194. "data_warm",
  195. "ingest",
  196. "master",
  197. "ml",
  198. "remote_cluster_client",
  199. "transform"
  200. ]
  201. }
  202. },
  203. "routing_state": {
  204. "routing_state": "started"
  205. },
  206. "inference_count": 9,
  207. "average_inference_time_ms": 51,
  208. "last_access": 1632855681069
  209. }
  210. ]
  211. },
  212. {
  213. "model_id": "typeform__distilbert-base-uncased-mnli",
  214. "model_size": "255.5mb",
  215. "state": "started",
  216. "allocation_status": {
  217. "allocation_count": 1,
  218. "target_allocation_count": 1,
  219. "state": "fully_allocated"
  220. },
  221. "nodes": [
  222. {
  223. "node": {
  224. "6pzZQ9OmQUWAaswMlwVEwg": {
  225. "name": "runTask-0",
  226. "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
  227. "transport_address": "127.0.0.1:9300",
  228. "attributes": {
  229. "ml.machine_memory": "68719476736",
  230. "xpack.installed": "true",
  231. "testattr": "test",
  232. "ml.max_open_jobs": "512",
  233. "ml.max_jvm_size": "4181590016"
  234. },
  235. "roles": [
  236. "data",
  237. "data_cold",
  238. "data_content",
  239. "data_frozen",
  240. "data_hot",
  241. "data_warm",
  242. "ingest",
  243. "master",
  244. "ml",
  245. "remote_cluster_client",
  246. "transform"
  247. ]
  248. }
  249. },
  250. "routing_state": {
  251. "routing_state": "started"
  252. },
  253. "inference_count": 0,
  254. "average_inference_time_ms": 0
  255. }
  256. ]
  257. }
  258. ]
  259. }
  260. ----
  261. // NOTCONSOLE