Updates the REST spec for the start deployment api. In particular, adds params: `number_of_allocations`, `threads_per_allocation`, and `queue_capacity`.
@@ -28,6 +28,24 @@
]
},
"params":{
+ "number_of_allocations":{
+ "type":"int",
+ "description": "The number of model allocations on each node where the model is deployed.",
+ "required": false,
+ "default": 1
+ },
+ "threads_per_allocation":{
+ "description": "The number of threads used by each model allocation during inference.",
+ "queue_capacity":{
+ "description": "Controls how many inference requests are allowed in the queue at a time.",
+ "default": 1024
"timeout":{
"type":"time",
"required":false,