get-trained-model-deployment-stats.asciidoc 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. [role="xpack"]
  2. [[get-trained-model-deployment-stats]]
  3. = Get trained model deployment statistics API
  4. [subs="attributes"]
  5. ++++
  6. <titleabbrev>Get trained model deployment stats</titleabbrev>
  7. ++++
  8. Retrieves usage information for trained model deployments.
  9. [[ml-get-trained-model-deployment-stats-request]]
  10. == {api-request-title}
  11. `GET _ml/trained_models/<model_id>/deployment/_stats` +
  12. `GET _ml/trained_models/<model_id>,<model_id_2>/deployment/_stats` +
  13. `GET _ml/trained_models/<model_id_pattern*>,<model_id_2>/deployment/_stats`
  14. [[ml-get-trained-model-deployment-stats-prereq]]
  15. == {api-prereq-title}
  16. Requires the `monitor_ml` cluster privilege. This privilege is included in the
  17. `machine_learning_user` built-in role.
  18. [[ml-get-trained-model-deployment-stats-desc]]
  19. == {api-description-title}
  20. You can get deployment information for multiple trained models in a single API
  21. request by using a comma-separated list of model IDs or a wildcard expression.
  22. [[ml-get-trained-model-deployment-stats-path-params]]
  23. == {api-path-parms-title}
  24. `<model_id>`::
  25. (Optional, string)
  26. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
  27. [[ml-get-trained-model-deployment-stats-query-params]]
  28. == {api-query-parms-title}
  29. `allow_no_match`::
  30. (Optional, Boolean)
  31. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-models]
  32. [role="child_attributes"]
  33. [[ml-get-trained-model-deployment-stats-results]]
  34. == {api-response-body-title}
  35. `count`::
  36. (integer)
  37. The total number of deployment statistics that matched the requested ID
  38. patterns.
  39. `deployment_stats`::
  40. (array)
  41. An array of trained model deployment statistics, which are sorted by the `model_id` value
  42. in ascending order.
  43. +
  44. .Properties of trained model deployment stats
  45. [%collapsible%open]
  46. ====
  47. `model_id`:::
  48. (string)
  49. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
  50. `model_size`:::
  51. (<<byte-units,byte value>>)
  52. The size of the loaded model in bytes.
  53. `start_time`:::
  54. (long)
  55. The epoch timestamp when the deployment started.
  56. `state`:::
  57. (string)
  58. The overall state of the deployment. The values may be:
  59. +
  60. --
  61. * `starting`: The deployment has recently started but is not yet usable as the model is not allocated on any nodes.
  62. * `started`: The deployment is usable as at least one node has the model allocated.
  63. * `stopping`: The deployment is preparing to stop and un-allocate the model from the relevant nodes.
  64. --
  65. `allocation_status`:::
  66. (object)
  67. The detailed allocation status given the deployment configuration.
  68. +
  69. .Properties of allocation stats
  70. [%collapsible%open]
  71. =====
  72. `allocation_count`:::
  73. (integer)
  74. The current number of nodes where the model is allocated.
  75. `target_allocation_count`:::
  76. (integer)
  77. The desired number of nodes for model allocation.
  78. `state`:::
  79. (string)
  80. The detailed allocation state related to the nodes.
  81. +
  82. --
  83. * `starting`: Allocations are being attempted but no node currently has the model allocated.
  84. * `started`: At least one node has the model allocated.
  85. * `fully_allocated`: The deployment is fully allocated and satisfies the `target_allocation_count`.
  86. --
  87. =====
  88. `nodes`:::
  89. (array of objects)
  90. The deployment stats for each node that currently has the model allocated.
  91. +
  92. .Properties of node stats
  93. [%collapsible%open]
  94. =====
  95. `average_inference_time_ms`:::
  96. (double)
  97. The average time for each inference call to complete on this node.
  98. `inference_count`:::
  99. (integer)
  100. The total number of inference calls made against this node for this model.
  101. `last_access`:::
  102. (long)
  103. The epoch time stamp of the last inference call for the model on this node.
  104. `node`:::
  105. (object)
  106. Information pertaining to the node.
  107. +
  108. .Properties of node
  109. [%collapsible%open]
  110. ======
  111. `attributes`:::
  112. (object)
  113. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-attributes]
  114. `ephemeral_id`:::
  115. (string)
  116. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
  117. `id`:::
  118. (string)
  119. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-id]
  120. `name`:::
  121. (string) The node name.
  122. `transport_address`:::
  123. (string)
  124. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-transport-address]
  125. ======
  126. `routing_state`:::
  127. (object)
  128. The current routing state and reason for the current routing state for this allocation.
  129. +
  130. --
  131. * `starting`: The model is attempting to allocate on this model, inference calls are not yet accepted.
  132. * `started`: The model is allocated and ready to accept inference requests.
  133. * `stopping`: The model is being de-allocated from this node.
  134. * `stopped`: The model is fully de-allocated from this node.
  135. * `failed`: The allocation attempt failed, see `reason` field for the potential cause.
  136. --
  137. `reason`:::
  138. (string)
  139. The reason for the current state. Usually only populated when the `routing_state` is `failed`.
  140. `start_time`:::
  141. (long)
  142. The epoch timestamp when the allocation started.
  143. =====
  144. ====
  145. [[ml-get-trained-model-deployment-stats-response-codes]]
  146. == {api-response-codes-title}
  147. `404` (Missing resources)::
  148. If `allow_no_match` is `false`, this code indicates that there are no
  149. resources that match the request or only partial matches for the request.
  150. [[ml-get-trained-model-deployment-stats-example]]
  151. == {api-examples-title}
  152. The following example gets deployment information for all currently started model deployments:
  153. [source,console]
  154. --------------------------------------------------
  155. GET _ml/trained_models/*/deployment/_stats
  156. --------------------------------------------------
  157. // TEST[skip:TBD]
  158. The API returns the following results:
  159. [source,console-result]
  160. ----
  161. {
  162. "count": 2,
  163. "deployment_stats": [
  164. {
  165. "model_id": "elastic__distilbert-base-uncased-finetuned-conll03-english",
  166. "model_size": "253.3mb",
  167. "state": "started",
  168. "allocation_status": {
  169. "allocation_count": 1,
  170. "target_allocation_count": 1,
  171. "state": "fully_allocated"
  172. },
  173. "nodes": [
  174. {
  175. "node": {
  176. "6pzZQ9OmQUWAaswMlwVEwg": {
  177. "name": "runTask-0",
  178. "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
  179. "transport_address": "127.0.0.1:9300",
  180. "attributes": {
  181. "ml.machine_memory": "68719476736",
  182. "xpack.installed": "true",
  183. "testattr": "test",
  184. "ml.max_open_jobs": "512",
  185. "ml.max_jvm_size": "4181590016"
  186. },
  187. "roles": [
  188. "data",
  189. "data_cold",
  190. "data_content",
  191. "data_frozen",
  192. "data_hot",
  193. "data_warm",
  194. "ingest",
  195. "master",
  196. "ml",
  197. "remote_cluster_client",
  198. "transform"
  199. ]
  200. }
  201. },
  202. "routing_state": {
  203. "routing_state": "started"
  204. },
  205. "inference_count": 9,
  206. "average_inference_time_ms": 51,
  207. "last_access": 1632855681069
  208. }
  209. ]
  210. },
  211. {
  212. "model_id": "typeform__distilbert-base-uncased-mnli",
  213. "model_size": "255.5mb",
  214. "state": "started",
  215. "allocation_status": {
  216. "allocation_count": 1,
  217. "target_allocation_count": 1,
  218. "state": "fully_allocated"
  219. },
  220. "nodes": [
  221. {
  222. "node": {
  223. "6pzZQ9OmQUWAaswMlwVEwg": {
  224. "name": "runTask-0",
  225. "ephemeral_id": "aI1OwkPMRCiAJ_1XkEAqdw",
  226. "transport_address": "127.0.0.1:9300",
  227. "attributes": {
  228. "ml.machine_memory": "68719476736",
  229. "xpack.installed": "true",
  230. "testattr": "test",
  231. "ml.max_open_jobs": "512",
  232. "ml.max_jvm_size": "4181590016"
  233. },
  234. "roles": [
  235. "data",
  236. "data_cold",
  237. "data_content",
  238. "data_frozen",
  239. "data_hot",
  240. "data_warm",
  241. "ingest",
  242. "master",
  243. "ml",
  244. "remote_cluster_client",
  245. "transform"
  246. ]
  247. }
  248. },
  249. "routing_state": {
  250. "routing_state": "started"
  251. },
  252. "inference_count": 0,
  253. "average_inference_time_ms": 0
  254. }
  255. ]
  256. }
  257. ]
  258. }
  259. ----
  260. // NOTCONSOLE