put-trained-models.asciidoc 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503
  1. [role="xpack"]
  2. [[put-trained-models]]
  3. = Create trained models API
  4. [subs="attributes"]
  5. ++++
  6. <titleabbrev>Create trained models</titleabbrev>
  7. ++++
  8. Creates a trained model.
  9. WARNING: Models created in version 7.8.0 are not backwards compatible
  10. with older node versions. If in a mixed cluster environment,
  11. all nodes must be at least 7.8.0 to use a model stored by
  12. a 7.8.0 node.
  13. [[ml-put-trained-models-request]]
  14. == {api-request-title}
  15. `PUT _ml/trained_models/<model_id>`
  16. [[ml-put-trained-models-prereq]]
  17. == {api-prereq-title}
  18. Requires the `manage_ml` cluster privilege. This privilege is included in the
  19. `machine_learning_admin` built-in role.
  20. [[ml-put-trained-models-desc]]
  21. == {api-description-title}
  22. The create trained model API enables you to supply a trained model that is not
  23. created by {dfanalytics}.
  24. [[ml-put-trained-models-path-params]]
  25. == {api-path-parms-title}
  26. `<model_id>`::
  27. (Required, string)
  28. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-id]
  29. [[ml-put-trained-models-query-params]]
  30. == {api-query-parms-title}
  31. `defer_definition_decompression`::
  32. (Optional, boolean)
  33. If set to `true` and a `compressed_definition` is provided, the request defers
  34. definition decompression and skips relevant validations.
  35. This deferral is useful for systems or users that know a good byte size estimate for their
  36. model and know that their model is valid and likely won't fail during inference.
  37. [role="child_attributes"]
  38. [[ml-put-trained-models-request-body]]
  39. == {api-request-body-title}
  40. `compressed_definition`::
  41. (Required, string)
  42. The compressed (GZipped and Base64 encoded) {infer} definition of the model.
  43. If `compressed_definition` is specified, then `definition` cannot be specified.
  44. //Begin definition
  45. `definition`::
  46. (Required, object)
  47. The {infer} definition for the model. If `definition` is specified, then
  48. `compressed_definition` cannot be specified.
  49. +
  50. .Properties of `definition`
  51. [%collapsible%open]
  52. ====
  53. //Begin preprocessors
  54. `preprocessors`::
  55. (Optional, object)
  56. Collection of preprocessors. See <<ml-put-trained-models-preprocessor-example>>.
  57. +
  58. .Properties of `preprocessors`
  59. [%collapsible%open]
  60. =====
  61. //Begin frequency encoding
  62. `frequency_encoding`::
  63. (Required, object)
  64. Defines a frequency encoding for a field.
  65. +
  66. .Properties of `frequency_encoding`
  67. [%collapsible%open]
  68. ======
  69. `feature_name`::
  70. (Required, string)
  71. The name of the resulting feature.
  72. `field`::
  73. (Required, string)
  74. The field name to encode.
  75. `frequency_map`::
  76. (Required, object map of string:double)
  77. Object that maps the field value to the frequency encoded value.
  78. `custom`::
  79. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=custom-preprocessor]
  80. ======
  81. //End frequency encoding
  82. //Begin one hot encoding
  83. `one_hot_encoding`::
  84. (Required, object)
  85. Defines a one hot encoding map for a field.
  86. +
  87. .Properties of `one_hot_encoding`
  88. [%collapsible%open]
  89. ======
  90. `field`::
  91. (Required, string)
  92. The field name to encode.
  93. `hot_map`::
  94. (Required, object map of strings)
  95. String map of "field_value: one_hot_column_name".
  96. `custom`::
  97. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=custom-preprocessor]
  98. ======
  99. //End one hot encoding
  100. //Begin target mean encoding
  101. `target_mean_encoding`::
  102. (Required, object)
  103. Defines a target mean encoding for a field.
  104. +
  105. .Properties of `target_mean_encoding`
  106. [%collapsible%open]
  107. ======
  108. `default_value`:::
  109. (Required, double)
  110. The feature value if the field value is not in the `target_map`.
  111. `feature_name`:::
  112. (Required, string)
  113. The name of the resulting feature.
  114. `field`:::
  115. (Required, string)
  116. The field name to encode.
  117. `target_map`:::
  118. (Required, object map of string:double)
  119. Object that maps the field value to the target mean value.
  120. `custom`::
  121. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=custom-preprocessor]
  122. ======
  123. //End target mean encoding
  124. =====
  125. //End preprocessors
  126. //Begin trained model
  127. `trained_model`::
  128. (Required, object)
  129. The definition of the trained model.
  130. +
  131. .Properties of `trained_model`
  132. [%collapsible%open]
  133. =====
  134. //Begin tree
  135. `tree`::
  136. (Required, object)
  137. The definition for a binary decision tree.
  138. +
  139. .Properties of `tree`
  140. [%collapsible%open]
  141. ======
  142. `classification_labels`:::
  143. (Optional, string) An array of classification labels (used for
  144. `classification`).
  145. `feature_names`:::
  146. (Required, string)
  147. Features expected by the tree, in their expected order.
  148. `target_type`:::
  149. (Required, string)
  150. String indicating the model target type; `regression` or `classification`.
  151. `tree_structure`:::
  152. (Required, object)
  153. An array of `tree_node` objects. The nodes must be in ordinal order by their
  154. `tree_node.node_index` value.
  155. ======
  156. //End tree
  157. //Begin tree node
  158. `tree_node`::
  159. (Required, object)
  160. The definition of a node in a tree.
  161. +
  162. --
  163. There are two major types of nodes: leaf nodes and not-leaf nodes.
  164. * Leaf nodes only need `node_index` and `leaf_value` defined.
  165. * All other nodes need `split_feature`, `left_child`, `right_child`,
  166. `threshold`, `decision_type`, and `default_left` defined.
  167. --
  168. +
  169. .Properties of `tree_node`
  170. [%collapsible%open]
  171. ======
  172. `decision_type`::
  173. (Optional, string)
  174. Indicates the positive value (in other words, when to choose the left node)
  175. decision type. Supported `lt`, `lte`, `gt`, `gte`. Defaults to `lte`.
  176. `default_left`::
  177. (Optional, Boolean)
  178. Indicates whether to default to the left when the feature is missing. Defaults
  179. to `true`.
  180. `leaf_value`::
  181. (Optional, double)
  182. The leaf value of the of the node, if the value is a leaf (in other words, no
  183. children).
  184. `left_child`::
  185. (Optional, integer)
  186. The index of the left child.
  187. `node_index`::
  188. (Integer)
  189. The index of the current node.
  190. `right_child`::
  191. (Optional, integer)
  192. The index of the right child.
  193. `split_feature`::
  194. (Optional, integer)
  195. The index of the feature value in the feature array.
  196. `split_gain`::
  197. (Optional, double) The information gain from the split.
  198. `threshold`::
  199. (Optional, double)
  200. The decision threshold with which to compare the feature value.
  201. ======
  202. //End tree node
  203. //Begin ensemble
  204. `ensemble`::
  205. (Optional, object)
  206. The definition for an ensemble model. See <<ml-put-trained-models-model-example>>.
  207. +
  208. .Properties of `ensemble`
  209. [%collapsible%open]
  210. ======
  211. //Begin aggregate output
  212. `aggregate_output`::
  213. (Required, object)
  214. An aggregated output object that defines how to aggregate the outputs of the
  215. `trained_models`. Supported objects are `weighted_mode`, `weighted_sum`, and
  216. `logistic_regression`. See <<ml-put-trained-models-aggregated-output-example>>.
  217. +
  218. .Properties of `aggregate_output`
  219. [%collapsible%open]
  220. =======
  221. //Begin logistic regression
  222. `logistic_regression`::
  223. (Optional, object)
  224. This `aggregated_output` type works with binary classification (classification
  225. for values [0, 1]). It multiplies the outputs (in the case of the `ensemble`
  226. model, the inference model values) by the supplied `weights`. The resulting
  227. vector is summed and passed to a
  228. {wikipedia}/Sigmoid_function[`sigmoid` function]. The result
  229. of the `sigmoid` function is considered the probability of class 1 (`P_1`),
  230. consequently, the probability of class 0 is `1 - P_1`. The class with the
  231. highest probability (either 0 or 1) is then returned. For more information about
  232. logistic regression, see
  233. {wikipedia}/Logistic_regression[this wiki article].
  234. +
  235. .Properties of `logistic_regression`
  236. [%collapsible%open]
  237. ========
  238. `weights`:::
  239. (Required, double)
  240. The weights to multiply by the input values (the inference values of the trained
  241. models).
  242. ========
  243. //End logistic regression
  244. //Begin weighted sum
  245. `weighted_sum`::
  246. (Optional, object)
  247. This `aggregated_output` type works with regression. The weighted sum of the
  248. input values.
  249. +
  250. .Properties of `weighted_sum`
  251. [%collapsible%open]
  252. ========
  253. `weights`:::
  254. (Required, double)
  255. The weights to multiply by the input values (the inference values of the trained
  256. models).
  257. ========
  258. //End weighted sum
  259. //Begin weighted mode
  260. `weighted_mode`::
  261. (Optional, object)
  262. This `aggregated_output` type works with regression or classification. It takes
  263. a weighted vote of the input values. The most common input value (taking the
  264. weights into account) is returned.
  265. +
  266. .Properties of `weighted_mode`
  267. [%collapsible%open]
  268. ========
  269. `weights`:::
  270. (Required, double)
  271. The weights to multiply by the input values (the inference values of the trained
  272. models).
  273. ========
  274. //End weighted mode
  275. //Begin exponent
  276. `exponent`::
  277. (Optional, object)
  278. This `aggregated_output` type works with regression. It takes a weighted sum of
  279. the input values and passes the result to an exponent function
  280. (`e^x` where `x` is the sum of the weighted values).
  281. +
  282. .Properties of `exponent`
  283. [%collapsible%open]
  284. ========
  285. `weights`:::
  286. (Required, double)
  287. The weights to multiply by the input values (the inference values of the trained
  288. models).
  289. ========
  290. //End exponent
  291. =======
  292. //End aggregate output
  293. `classification_labels`::
  294. (Optional, string)
  295. An array of classification labels.
  296. `feature_names`::
  297. (Optional, string)
  298. Features expected by the ensemble, in their expected order.
  299. `target_type`::
  300. (Required, string)
  301. String indicating the model target type; `regression` or `classification.`
  302. `trained_models`::
  303. (Required, object)
  304. An array of `trained_model` objects. Supported trained models are `tree` and
  305. `ensemble`.
  306. ======
  307. //End ensemble
  308. =====
  309. //End trained model
  310. ====
  311. //End definition
  312. `description`::
  313. (Optional, string)
  314. A human-readable description of the {infer} trained model.
  315. `estimated_heap_memory_usage_bytes`::
  316. (Optional, integer) deprecated:[7.16.0,Replaced by `model_size_bytes`]
  317. `estimated_operations`::
  318. (Optional, integer)
  319. The estimated number of operations to use the trained model during inference.
  320. This property is supported only if `defer_definition_decompression` is `true` or
  321. the model definition is not supplied.
  322. //Begin inference_config
  323. `inference_config`::
  324. (Required, object)
  325. The default configuration for inference. This can be: `regression`,
  326. `classification`, `fill_mask`, `ner`, `question_answering`,
  327. `text_classification`, `text_embedding` or `zero_shot_classification`.
  328. If `regression` or `classification`, it must match the `target_type` of the
  329. underlying `definition.trained_model`. If `fill_mask`, `ner`,
  330. `question_answering`, `text_classification`, or `text_embedding`; the
  331. `model_type` must be `pytorch`.
  332. +
  333. .Properties of `inference_config`
  334. [%collapsible%open]
  335. ====
  336. `classification`:::
  337. (Optional, object)
  338. Classification configuration for inference.
  339. +
  340. .Properties of classification inference
  341. [%collapsible%open]
  342. =====
  343. `num_top_classes`::::
  344. (Optional, integer)
  345. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-num-top-classes]
  346. `num_top_feature_importance_values`::::
  347. (Optional, integer)
  348. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-num-top-feature-importance-values]
  349. `prediction_field_type`::::
  350. (Optional, string)
  351. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-prediction-field-type]
  352. `results_field`::::
  353. (Optional, string)
  354. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  355. `top_classes_results_field`::::
  356. (Optional, string)
  357. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-top-classes-results-field]
  358. =====
  359. `fill_mask`:::
  360. (Optional, object)
  361. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-fill-mask]
  362. +
  363. .Properties of fill_mask inference
  364. [%collapsible%open]
  365. =====
  366. `num_top_classes`::::
  367. (Optional, integer)
  368. Number of top predicted tokens to return for replacing the mask token. Defaults to `0`.
  369. `results_field`::::
  370. (Optional, string)
  371. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  372. `tokenization`::::
  373. (Optional, object)
  374. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  375. +
  376. .Properties of tokenization
  377. [%collapsible%open]
  378. ======
  379. `bert`::::
  380. (Optional, object)
  381. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  382. +
  383. .Properties of bert
  384. [%collapsible%open]
  385. =======
  386. `do_lower_case`::::
  387. (Optional, boolean)
  388. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  389. `max_sequence_length`::::
  390. (Optional, integer)
  391. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  392. `truncate`::::
  393. (Optional, string)
  394. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  395. `with_special_tokens`::::
  396. (Optional, boolean)
  397. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  398. =======
  399. `roberta`::::
  400. (Optional, object)
  401. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  402. +
  403. .Properties of roberta
  404. [%collapsible%open]
  405. =======
  406. `add_prefix_space`::::
  407. (Optional, boolean)
  408. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  409. `max_sequence_length`::::
  410. (Optional, integer)
  411. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  412. `truncate`::::
  413. (Optional, string)
  414. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  415. `with_special_tokens`::::
  416. (Optional, boolean)
  417. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  418. =======
  419. `mpnet`::::
  420. (Optional, object)
  421. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  422. +
  423. .Properties of mpnet
  424. [%collapsible%open]
  425. =======
  426. `do_lower_case`::::
  427. (Optional, boolean)
  428. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  429. `max_sequence_length`::::
  430. (Optional, integer)
  431. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  432. `truncate`::::
  433. (Optional, string)
  434. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  435. `with_special_tokens`::::
  436. (Optional, boolean)
  437. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  438. =======
  439. ======
  440. =====
  441. `ner`:::
  442. (Optional, object)
  443. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-ner]
  444. +
  445. .Properties of ner inference
  446. [%collapsible%open]
  447. =====
  448. `classification_labels`::::
  449. (Optional, string)
  450. An array of classification labels. NER only supports Inside-Outside-Beginning
  451. labels (IOB) and only persons, organizations, locations, and miscellaneous.
  452. Example: ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC",
  453. "I-MISC"]
  454. `results_field`::::
  455. (Optional, string)
  456. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  457. `tokenization`::::
  458. (Optional, object)
  459. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  460. +
  461. .Properties of tokenization
  462. [%collapsible%open]
  463. ======
  464. `bert`::::
  465. (Optional, object)
  466. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  467. +
  468. .Properties of bert
  469. [%collapsible%open]
  470. =======
  471. `do_lower_case`::::
  472. (Optional, boolean)
  473. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  474. `max_sequence_length`::::
  475. (Optional, integer)
  476. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  477. `truncate`::::
  478. (Optional, string)
  479. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  480. `with_special_tokens`::::
  481. (Optional, boolean)
  482. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  483. =======
  484. `roberta`::::
  485. (Optional, object)
  486. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  487. +
  488. .Properties of roberta
  489. [%collapsible%open]
  490. =======
  491. `add_prefix_space`::::
  492. (Optional, boolean)
  493. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  494. `max_sequence_length`::::
  495. (Optional, integer)
  496. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  497. `truncate`::::
  498. (Optional, string)
  499. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  500. `with_special_tokens`::::
  501. (Optional, boolean)
  502. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  503. =======
  504. `mpnet`::::
  505. (Optional, object)
  506. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  507. +
  508. .Properties of mpnet
  509. [%collapsible%open]
  510. =======
  511. `do_lower_case`::::
  512. (Optional, boolean)
  513. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  514. `max_sequence_length`::::
  515. (Optional, integer)
  516. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  517. `truncate`::::
  518. (Optional, string)
  519. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  520. `with_special_tokens`::::
  521. (Optional, boolean)
  522. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  523. =======
  524. ======
  525. =====
  526. `pass_through`:::
  527. (Optional, object)
  528. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-pass-through]
  529. +
  530. .Properties of pass_through inference
  531. [%collapsible%open]
  532. =====
  533. `results_field`::::
  534. (Optional, string)
  535. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  536. `tokenization`::::
  537. (Optional, object)
  538. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  539. +
  540. .Properties of tokenization
  541. [%collapsible%open]
  542. ======
  543. `bert`::::
  544. (Optional, object)
  545. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  546. +
  547. .Properties of bert
  548. [%collapsible%open]
  549. =======
  550. `do_lower_case`::::
  551. (Optional, boolean)
  552. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  553. `max_sequence_length`::::
  554. (Optional, integer)
  555. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  556. `truncate`::::
  557. (Optional, string)
  558. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  559. `with_special_tokens`::::
  560. (Optional, boolean)
  561. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  562. =======
  563. `roberta`::::
  564. (Optional, object)
  565. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  566. +
  567. .Properties of roberta
  568. [%collapsible%open]
  569. =======
  570. `add_prefix_space`::::
  571. (Optional, boolean)
  572. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  573. `max_sequence_length`::::
  574. (Optional, integer)
  575. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  576. `truncate`::::
  577. (Optional, string)
  578. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  579. `with_special_tokens`::::
  580. (Optional, boolean)
  581. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  582. =======
  583. `mpnet`::::
  584. (Optional, object)
  585. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  586. +
  587. .Properties of mpnet
  588. [%collapsible%open]
  589. =======
  590. `do_lower_case`::::
  591. (Optional, boolean)
  592. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  593. `max_sequence_length`::::
  594. (Optional, integer)
  595. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  596. `truncate`::::
  597. (Optional, string)
  598. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  599. `with_special_tokens`::::
  600. (Optional, boolean)
  601. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  602. =======
  603. ======
  604. =====
  605. `question_answering`:::
  606. (Optional, object)
  607. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-question-answering]
  608. +
  609. .Properties of question_answering inference
  610. [%collapsible%open]
  611. =====
  612. `max_answer_length`::::
  613. (Optional, integer)
  614. The maximum amount of words in the answer. Defaults to `15`.
  615. `results_field`::::
  616. (Optional, string)
  617. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  618. `tokenization`::::
  619. (Optional, object)
  620. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  621. +
  622. Recommended to set `max_sentence_length` to `386` with `128` of `span` and set
  623. `truncate` to `none`.
  624. +
  625. .Properties of tokenization
  626. [%collapsible%open]
  627. ======
  628. `bert`::::
  629. (Optional, object)
  630. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  631. +
  632. .Properties of bert
  633. [%collapsible%open]
  634. =======
  635. `do_lower_case`::::
  636. (Optional, boolean)
  637. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  638. `max_sequence_length`::::
  639. (Optional, integer)
  640. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  641. `span`::::
  642. (Optional, integer)
  643. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  644. `truncate`::::
  645. (Optional, string)
  646. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  647. `with_special_tokens`::::
  648. (Optional, boolean)
  649. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  650. =======
  651. `roberta`::::
  652. (Optional, object)
  653. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  654. +
  655. .Properties of roberta
  656. [%collapsible%open]
  657. =======
  658. `add_prefix_space`::::
  659. (Optional, boolean)
  660. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  661. `max_sequence_length`::::
  662. (Optional, integer)
  663. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  664. `span`::::
  665. (Optional, integer)
  666. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  667. `truncate`::::
  668. (Optional, string)
  669. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  670. `with_special_tokens`::::
  671. (Optional, boolean)
  672. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  673. =======
  674. `mpnet`::::
  675. (Optional, object)
  676. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  677. +
  678. .Properties of mpnet
  679. [%collapsible%open]
  680. =======
  681. `do_lower_case`::::
  682. (Optional, boolean)
  683. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  684. `max_sequence_length`::::
  685. (Optional, integer)
  686. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  687. `span`::::
  688. (Optional, integer)
  689. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  690. `truncate`::::
  691. (Optional, string)
  692. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  693. `with_special_tokens`::::
  694. (Optional, boolean)
  695. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  696. =======
  697. ======
  698. =====
  699. `regression`:::
  700. (Optional, object)
  701. Regression configuration for inference.
  702. +
  703. .Properties of regression inference
  704. [%collapsible%open]
  705. =====
  706. `num_top_feature_importance_values`::::
  707. (Optional, integer)
  708. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-regression-num-top-feature-importance-values]
  709. `results_field`::::
  710. (Optional, string)
  711. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  712. =====
  713. `text_classification`:::
  714. (Optional, object)
  715. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-text-classification]
  716. +
  717. .Properties of text_classification inference
  718. [%collapsible%open]
  719. =====
  720. `classification_labels`::::
  721. (Optional, string) An array of classification labels.
  722. `num_top_classes`::::
  723. (Optional, integer)
  724. Specifies the number of top class predictions to return. Defaults to all classes (-1).
  725. `results_field`::::
  726. (Optional, string)
  727. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  728. `tokenization`::::
  729. (Optional, object)
  730. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  731. +
  732. .Properties of tokenization
  733. [%collapsible%open]
  734. ======
  735. `bert`::::
  736. (Optional, object)
  737. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  738. +
  739. .Properties of bert
  740. [%collapsible%open]
  741. =======
  742. `do_lower_case`::::
  743. (Optional, boolean)
  744. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  745. `max_sequence_length`::::
  746. (Optional, integer)
  747. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  748. `span`::::
  749. (Optional, integer)
  750. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  751. `truncate`::::
  752. (Optional, string)
  753. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  754. `with_special_tokens`::::
  755. (Optional, boolean)
  756. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  757. =======
  758. `roberta`::::
  759. (Optional, object)
  760. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  761. +
  762. .Properties of roberta
  763. [%collapsible%open]
  764. =======
  765. `add_prefix_space`::::
  766. (Optional, boolean)
  767. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  768. `max_sequence_length`::::
  769. (Optional, integer)
  770. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  771. `span`::::
  772. (Optional, integer)
  773. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  774. `truncate`::::
  775. (Optional, string)
  776. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  777. `with_special_tokens`::::
  778. (Optional, boolean)
  779. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  780. =======
  781. `mpnet`::::
  782. (Optional, object)
  783. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  784. +
  785. .Properties of mpnet
  786. [%collapsible%open]
  787. =======
  788. `do_lower_case`::::
  789. (Optional, boolean)
  790. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  791. `max_sequence_length`::::
  792. (Optional, integer)
  793. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  794. `truncate`::::
  795. (Optional, string)
  796. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  797. `with_special_tokens`::::
  798. (Optional, boolean)
  799. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  800. =======
  801. ======
  802. =====
  803. `text_embedding`:::
  804. (Object, optional)
  805. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-text-embedding]
  806. +
  807. .Properties of text_embedding inference
  808. [%collapsible%open]
  809. =====
  810. `results_field`::::
  811. (Optional, string)
  812. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  813. `tokenization`::::
  814. (Optional, object)
  815. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  816. +
  817. .Properties of tokenization
  818. [%collapsible%open]
  819. ======
  820. `bert`::::
  821. (Optional, object)
  822. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  823. +
  824. .Properties of bert
  825. [%collapsible%open]
  826. =======
  827. `do_lower_case`::::
  828. (Optional, boolean)
  829. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  830. `max_sequence_length`::::
  831. (Optional, integer)
  832. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  833. `truncate`::::
  834. (Optional, string)
  835. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  836. `with_special_tokens`::::
  837. (Optional, boolean)
  838. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  839. =======
  840. `roberta`::::
  841. (Optional, object)
  842. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  843. +
  844. .Properties of roberta
  845. [%collapsible%open]
  846. =======
  847. `add_prefix_space`::::
  848. (Optional, boolean)
  849. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  850. `max_sequence_length`::::
  851. (Optional, integer)
  852. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  853. `truncate`::::
  854. (Optional, string)
  855. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  856. `with_special_tokens`::::
  857. (Optional, boolean)
  858. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  859. =======
  860. `mpnet`::::
  861. (Optional, object)
  862. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  863. +
  864. .Properties of mpnet
  865. [%collapsible%open]
  866. =======
  867. `do_lower_case`::::
  868. (Optional, boolean)
  869. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  870. `max_sequence_length`::::
  871. (Optional, integer)
  872. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  873. `truncate`::::
  874. (Optional, string)
  875. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  876. `with_special_tokens`::::
  877. (Optional, boolean)
  878. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  879. =======
  880. ======
  881. =====
  882. `text_similarity`::::
  883. (Object, optional)
  884. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-text-similarity]
  885. +
  886. .Properties of text_similarity inference
  887. [%collapsible%open]
  888. =====
  889. `span_score_combination_function`::::
  890. (Optional, string)
  891. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-text-similarity-span-score-func]
  892. `tokenization`::::
  893. (Optional, object)
  894. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  895. +
  896. .Properties of tokenization
  897. [%collapsible%open]
  898. ======
  899. `bert`::::
  900. (Optional, object)
  901. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  902. +
  903. .Properties of bert
  904. [%collapsible%open]
  905. =======
  906. `do_lower_case`::::
  907. (Optional, boolean)
  908. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  909. `max_sequence_length`::::
  910. (Optional, integer)
  911. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  912. `span`::::
  913. (Optional, integer)
  914. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  915. `truncate`::::
  916. (Optional, string)
  917. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  918. `with_special_tokens`::::
  919. (Optional, boolean)
  920. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  921. =======
  922. `roberta`::::
  923. (Optional, object)
  924. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  925. +
  926. .Properties of roberta
  927. [%collapsible%open]
  928. =======
  929. `add_prefix_space`::::
  930. (Optional, boolean)
  931. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  932. `max_sequence_length`::::
  933. (Optional, integer)
  934. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  935. `span`::::
  936. (Optional, integer)
  937. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  938. `truncate`::::
  939. (Optional, string)
  940. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  941. `with_special_tokens`::::
  942. (Optional, boolean)
  943. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  944. =======
  945. `mpnet`::::
  946. (Optional, object)
  947. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  948. +
  949. .Properties of mpnet
  950. [%collapsible%open]
  951. =======
  952. `do_lower_case`::::
  953. (Optional, boolean)
  954. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  955. `max_sequence_length`::::
  956. (Optional, integer)
  957. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  958. `span`::::
  959. (Optional, integer)
  960. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
  961. `truncate`::::
  962. (Optional, string)
  963. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  964. `with_special_tokens`::::
  965. (Optional, boolean)
  966. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  967. =======
  968. ======
  969. =====
  970. `zero_shot_classification`:::
  971. (Object, optional)
  972. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-zero-shot-classification]
  973. +
  974. .Properties of zero_shot_classification inference
  975. [%collapsible%open]
  976. =====
  977. `classification_labels`::::
  978. (Required, array)
  979. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-zero-shot-classification-classification-labels]
  980. `hypothesis_template`::::
  981. (Optional, string)
  982. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-zero-shot-classification-hypothesis-template]
  983. `labels`::::
  984. (Optional, array)
  985. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-zero-shot-classification-labels]
  986. `multi_label`::::
  987. (Optional, boolean)
  988. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-zero-shot-classification-multi-label]
  989. `results_field`::::
  990. (Optional, string)
  991. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-results-field]
  992. `tokenization`::::
  993. (Optional, object)
  994. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization]
  995. +
  996. .Properties of tokenization
  997. [%collapsible%open]
  998. ======
  999. `bert`::::
  1000. (Optional, object)
  1001. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert]
  1002. +
  1003. .Properties of bert
  1004. [%collapsible%open]
  1005. =======
  1006. `do_lower_case`::::
  1007. (Optional, boolean)
  1008. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  1009. `max_sequence_length`::::
  1010. (Optional, integer)
  1011. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  1012. `truncate`::::
  1013. (Optional, string)
  1014. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  1015. `with_special_tokens`::::
  1016. (Optional, boolean)
  1017. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
  1018. =======
  1019. `roberta`::::
  1020. (Optional, object)
  1021. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
  1022. +
  1023. .Properties of roberta
  1024. [%collapsible%open]
  1025. =======
  1026. `add_prefix_space`::::
  1027. (Optional, boolean)
  1028. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-add-prefix-space]
  1029. `max_sequence_length`::::
  1030. (Optional, integer)
  1031. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  1032. `truncate`::::
  1033. (Optional, string)
  1034. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  1035. `with_special_tokens`::::
  1036. (Optional, boolean)
  1037. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta-with-special-tokens]
  1038. =======
  1039. `mpnet`::::
  1040. (Optional, object)
  1041. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet]
  1042. +
  1043. .Properties of mpnet
  1044. [%collapsible%open]
  1045. =======
  1046. `do_lower_case`::::
  1047. (Optional, boolean)
  1048. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
  1049. `max_sequence_length`::::
  1050. (Optional, integer)
  1051. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
  1052. `truncate`::::
  1053. (Optional, string)
  1054. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
  1055. `with_special_tokens`::::
  1056. (Optional, boolean)
  1057. include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-mpnet-with-special-tokens]
  1058. =======
  1059. ======
  1060. =====
  1061. ====
  1062. //End of inference_config
  1063. //Begin input
  1064. `input`::
  1065. (Required, object)
  1066. The input field names for the model definition.
  1067. +
  1068. .Properties of `input`
  1069. [%collapsible%open]
  1070. ====
  1071. `field_names`:::
  1072. (Required, string)
  1073. An array of input field names for the model.
  1074. ====
  1075. //End input
  1076. // Begin location
  1077. `location`::
  1078. (Optional, object)
  1079. The model definition location. If the `definition` or `compressed_definition`
  1080. are not specified, the `location` is required.
  1081. +
  1082. .Properties of `location`
  1083. [%collapsible%open]
  1084. ====
  1085. `index`:::
  1086. (Required, object)
  1087. Indicates that the model definition is stored in an index. This object must be
  1088. empty as the index for storing model definitions is configured automatically.
  1089. ====
  1090. // End location
  1091. `metadata`::
  1092. (Optional, object)
  1093. An object map that contains metadata about the model.
  1094. `model_size_bytes`::
  1095. (Optional, integer)
  1096. The estimated memory usage in bytes to keep the trained model in memory. This
  1097. property is supported only if `defer_definition_decompression` is `true` or the
  1098. model definition is not supplied.
  1099. `model_type`::
  1100. (Optional, string)
  1101. The created model type. By default the model type is `tree_ensemble`.
  1102. Appropriate types are:
  1103. +
  1104. --
  1105. * `tree_ensemble`: The model definition is an ensemble model of decision trees.
  1106. * `lang_ident`: A special type reserved for language identification models.
  1107. * `pytorch`: The stored definition is a PyTorch (specifically a TorchScript) model. Currently only
  1108. NLP models are supported. For more information, refer to {ml-docs}/ml-nlp.html[{nlp-cap}].
  1109. --
  1110. `tags`::
  1111. (Optional, string)
  1112. An array of tags to organize the model.
  1113. [[ml-put-trained-models-example]]
  1114. == {api-examples-title}
  1115. [[ml-put-trained-models-preprocessor-example]]
  1116. === Preprocessor examples
  1117. The example below shows a `frequency_encoding` preprocessor object:
  1118. [source,js]
  1119. ----------------------------------
  1120. {
  1121. "frequency_encoding":{
  1122. "field":"FlightDelayType",
  1123. "feature_name":"FlightDelayType_frequency",
  1124. "frequency_map":{
  1125. "Carrier Delay":0.6007414737092798,
  1126. "NAS Delay":0.6007414737092798,
  1127. "Weather Delay":0.024573576178086153,
  1128. "Security Delay":0.02476631010889467,
  1129. "No Delay":0.6007414737092798,
  1130. "Late Aircraft Delay":0.6007414737092798
  1131. }
  1132. }
  1133. }
  1134. ----------------------------------
  1135. //NOTCONSOLE
  1136. The next example shows a `one_hot_encoding` preprocessor object:
  1137. [source,js]
  1138. ----------------------------------
  1139. {
  1140. "one_hot_encoding":{
  1141. "field":"FlightDelayType",
  1142. "hot_map":{
  1143. "Carrier Delay":"FlightDelayType_Carrier Delay",
  1144. "NAS Delay":"FlightDelayType_NAS Delay",
  1145. "No Delay":"FlightDelayType_No Delay",
  1146. "Late Aircraft Delay":"FlightDelayType_Late Aircraft Delay"
  1147. }
  1148. }
  1149. }
  1150. ----------------------------------
  1151. //NOTCONSOLE
  1152. This example shows a `target_mean_encoding` preprocessor object:
  1153. [source,js]
  1154. ----------------------------------
  1155. {
  1156. "target_mean_encoding":{
  1157. "field":"FlightDelayType",
  1158. "feature_name":"FlightDelayType_targetmean",
  1159. "target_map":{
  1160. "Carrier Delay":39.97465788139886,
  1161. "NAS Delay":39.97465788139886,
  1162. "Security Delay":203.171206225681,
  1163. "Weather Delay":187.64705882352948,
  1164. "No Delay":39.97465788139886,
  1165. "Late Aircraft Delay":39.97465788139886
  1166. },
  1167. "default_value":158.17995752420433
  1168. }
  1169. }
  1170. ----------------------------------
  1171. //NOTCONSOLE
  1172. [[ml-put-trained-models-model-example]]
  1173. === Model examples
  1174. The first example shows a `trained_model` object:
  1175. [source,js]
  1176. ----------------------------------
  1177. {
  1178. "tree":{
  1179. "feature_names":[
  1180. "DistanceKilometers",
  1181. "FlightTimeMin",
  1182. "FlightDelayType_NAS Delay",
  1183. "Origin_targetmean",
  1184. "DestRegion_targetmean",
  1185. "DestCityName_targetmean",
  1186. "OriginAirportID_targetmean",
  1187. "OriginCityName_frequency",
  1188. "DistanceMiles",
  1189. "FlightDelayType_Late Aircraft Delay"
  1190. ],
  1191. "tree_structure":[
  1192. {
  1193. "decision_type":"lt",
  1194. "threshold":9069.33437193022,
  1195. "split_feature":0,
  1196. "split_gain":4112.094574306927,
  1197. "node_index":0,
  1198. "default_left":true,
  1199. "left_child":1,
  1200. "right_child":2
  1201. },
  1202. ...
  1203. {
  1204. "node_index":9,
  1205. "leaf_value":-27.68987349695448
  1206. },
  1207. ...
  1208. ],
  1209. "target_type":"regression"
  1210. }
  1211. }
  1212. ----------------------------------
  1213. //NOTCONSOLE
  1214. The following example shows an `ensemble` model object:
  1215. [source,js]
  1216. ----------------------------------
  1217. "ensemble":{
  1218. "feature_names":[
  1219. ...
  1220. ],
  1221. "trained_models":[
  1222. {
  1223. "tree":{
  1224. "feature_names":[],
  1225. "tree_structure":[
  1226. {
  1227. "decision_type":"lte",
  1228. "node_index":0,
  1229. "leaf_value":47.64069875778043,
  1230. "default_left":false
  1231. }
  1232. ],
  1233. "target_type":"regression"
  1234. }
  1235. },
  1236. ...
  1237. ],
  1238. "aggregate_output":{
  1239. "weighted_sum":{
  1240. "weights":[
  1241. ...
  1242. ]
  1243. }
  1244. },
  1245. "target_type":"regression"
  1246. }
  1247. ----------------------------------
  1248. //NOTCONSOLE
  1249. [[ml-put-trained-models-aggregated-output-example]]
  1250. === Aggregated output example
  1251. Example of a `logistic_regression` object:
  1252. [source,js]
  1253. ----------------------------------
  1254. "aggregate_output" : {
  1255. "logistic_regression" : {
  1256. "weights" : [2.0, 1.0, .5, -1.0, 5.0, 1.0, 1.0]
  1257. }
  1258. }
  1259. ----------------------------------
  1260. //NOTCONSOLE
  1261. Example of a `weighted_sum` object:
  1262. [source,js]
  1263. ----------------------------------
  1264. "aggregate_output" : {
  1265. "weighted_sum" : {
  1266. "weights" : [1.0, -1.0, .5, 1.0, 5.0]
  1267. }
  1268. }
  1269. ----------------------------------
  1270. //NOTCONSOLE
  1271. Example of a `weighted_mode` object:
  1272. [source,js]
  1273. ----------------------------------
  1274. "aggregate_output" : {
  1275. "weighted_mode" : {
  1276. "weights" : [1.0, 1.0, 1.0, 1.0, 1.0]
  1277. }
  1278. }
  1279. ----------------------------------
  1280. //NOTCONSOLE
  1281. Example of an `exponent` object:
  1282. [source,js]
  1283. ----------------------------------
  1284. "aggregate_output" : {
  1285. "exponent" : {
  1286. "weights" : [1.0, 1.0, 1.0, 1.0, 1.0]
  1287. }
  1288. }
  1289. ----------------------------------
  1290. //NOTCONSOLE
  1291. [[ml-put-trained-models-json-schema]]
  1292. === Trained models JSON schema
  1293. For the full JSON schema of trained models,
  1294. https://github.com/elastic/ml-json-schemas[click here].