1 ano atrás · bca80f7797
--- a/docs/reference/ingest/processors/inference.asciidoc
+++ b/docs/reference/ingest/processors/inference.asciidoc
@@ -169,6 +169,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
@@ -224,6 +236,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
@@ -304,6 +328,23 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`span`::::
			
 
				+(Optional, integer)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
			
 
				+
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
@@ -363,6 +404,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
@@ -424,6 +477,22 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`span`::::
			
 
				+(Optional, integer)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
			
 
				+
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
@@ -515,6 +584,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				 
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -988,6 +988,7 @@ values are
 
				 +
			
 
				 --
			
 
				 * `bert`: Use for BERT-style models
			
 
				+* `deberta_v2`: Use for DeBERTa v2 and v3-style models
			
 
				 * `mpnet`: Use for MPNet-style models
			
 
				 * `roberta`: Use for RoBERTa-style and BART-style models
			
 
				 * experimental:[] `xlm_roberta`: Use for XLMRoBERTa-style models
			
@@ -1037,6 +1038,19 @@ sequence. Therefore, do not use `second` in this case.
 
				 
			
 
				 end::inference-config-nlp-tokenization-truncate[]
			
 
				 
			
 
				+tag::inference-config-nlp-tokenization-truncate-deberta-v2[]
			
 
				+Indicates how tokens are truncated when they exceed `max_sequence_length`.
			
 
				+The default value is `first`.
			
 
				++
			
 
				+--
			
 
				+* `balanced`: One or both of the first and second sequences may be truncated so as to balance the tokens included from both sequences.
			
 
				+* `none`: No truncation occurs; the inference request receives an error.
			
 
				+* `first`: Only the first sequence is truncated.
			
 
				+* `second`: Only the second sequence is truncated. If there is just one sequence, that sequence is truncated.
			
 
				+--
			
 
				+
			
 
				+end::inference-config-nlp-tokenization-truncate-deberta-v2[]
			
 
				+
			
 
				 tag::inference-config-nlp-tokenization-bert-with-special-tokens[]
			
 
				 Tokenize with special tokens. The tokens typically included in BERT-style tokenization are:
			
 
				 +
			
@@ -1050,10 +1064,23 @@ tag::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
 
				 Tokenize with special tokens if `true`.
			
 
				 end::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
			
 
				 
			
 
				+tag::inference-config-nlp-tokenization-deberta-v2[]
			
 
				+DeBERTa-style tokenization is to be performed with the enclosed settings.
			
 
				+end::inference-config-nlp-tokenization-deberta-v2[]
			
 
				+
			
 
				 tag::inference-config-nlp-tokenization-max-sequence-length[]
			
 
				 Specifies the maximum number of tokens allowed to be output by the tokenizer.
			
 
				 end::inference-config-nlp-tokenization-max-sequence-length[]
			
 
				 
			
 
				+tag::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
			
 
				+Tokenize with special tokens. The tokens typically included in DeBERTa-style tokenization are:
			
 
				++
			
 
				+--
			
 
				+* `[CLS]`: The first token of the sequence being classified.
			
 
				+* `[SEP]`: Indicates sequence separation and sequence end.
			
 
				+--
			
 
				+end::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
			
 
				+
			
 
				 tag::inference-config-nlp-tokenization-roberta[]
			
 
				 RoBERTa-style tokenization is to be performed with the enclosed settings.
			
 
				 end::inference-config-nlp-tokenization-roberta[]
			
--- a/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
+++ b/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
@@ -137,6 +137,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 (Optional, string)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
			
 
				 =======
			
 
				+`deberta_v2`::::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+=======
			
 
				+`truncate`::::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+=======
			
 
				+
			
 
				 `roberta`::::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
			
--- a/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc
+++ b/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc
@@ -773,6 +773,37 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
 
				 (Optional, boolean)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
			
 
				 ====
			
 
				+`deberta_v2`::
			
 
				+(Optional, object)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
			
 
				++
			
 
				+.Properties of deberta_v2
			
 
				+[%collapsible%open]
			
 
				+====
			
 
				+`do_lower_case`:::
			
 
				+(Optional, boolean)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
			
 
				++
			
 
				+--
			
 
				+Defaults to `false`.
			
 
				+--
			
 
				+
			
 
				+`max_sequence_length`:::
			
 
				+(Optional, integer)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
			
 
				+
			
 
				+`span`:::
			
 
				+(Optional, integer)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
			
 
				+
			
 
				+`truncate`:::
			
 
				+(Optional, string)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
			
 
				+
			
 
				+`with_special_tokens`:::
			
 
				+(Optional, boolean)
			
 
				+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2-with-special-tokens]
			
 
				+====
			
 
				 `roberta`::
			
 
				 (Optional, object)
			
 
				 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]