1 year ago · 2e847e8817
--- a/docs/reference/inference/post-inference.asciidoc
+++ b/docs/reference/inference/post-inference.asciidoc
@@ -69,27 +69,40 @@ seconds.
 
				 ==== {api-request-body-title}
			
 
				 
			
 
				 `input`::
			
 
				-(Required, array of strings)
			
 
				+(Required, string or array of strings)
			
 
				 The text on which you want to perform the {infer} task.
			
 
				 `input` can be a single string or an array.
			
 
				++
			
 
				+--
			
 
				 [NOTE]
			
 
				 ====
			
 
				-Inference endpoints for the `completion` task type currently only support a single string as input.
			
 
				+Inference endpoints for the `completion` task type currently only support a
			
 
				+single string as input.
			
 
				 ====
			
 
				+--
			
 
				+
			
 
				+`query`::
			
 
				+(Required, string)
			
 
				+Only for `rerank` {infer} endpoints. The search query text.
			
 
				 
			
 
				 
			
 
				 [discrete]
			
 
				 [[post-inference-api-example]]
			
 
				 ==== {api-examples-title}
			
 
				 
			
 
				-The following example performs sparse embedding on the example sentence.
			
 
				+
			
 
				+[discrete]
			
 
				+[[inference-example-completion]]
			
 
				+===== Completion example
			
 
				+
			
 
				+The following example performs a completion on the example question.
			
 
				 
			
 
				 
			
 
				 [source,console]
			
 
				 ------------------------------------------------------------
			
 
				-POST _inference/sparse_embedding/my-elser-model
			
 
				+POST _inference/completion/openai_chat_completions
			
 
				 {
			
 
				-  "input": "The sky above the port was the color of television tuned to a dead channel."
			
 
				+  "input": "What is Elastic?"
			
 
				 }
			
 
				 ------------------------------------------------------------
			
 
				 // TEST[skip:TBD]
			
@@ -101,39 +114,90 @@ The API returns the following response:
 
				 [source,console-result]
			
 
				 ------------------------------------------------------------
			
 
				 {
			
 
				-  "sparse_embedding": [
			
 
				+  "completion": [
			
 
				     {
			
 
				-      "port": 2.1259406,
			
 
				-      "sky": 1.7073475,
			
 
				-      "color": 1.6922266,
			
 
				-      "dead": 1.6247464,
			
 
				-      "television": 1.3525393,
			
 
				-      "above": 1.2425821,
			
 
				-      "tuned": 1.1440028,
			
 
				-      "colors": 1.1218185,
			
 
				-      "tv": 1.0111054,
			
 
				-      "ports": 1.0067928,
			
 
				-      "poem": 1.0042328,
			
 
				-      "channel": 0.99471164,
			
 
				-      "tune": 0.96235967,
			
 
				-      "scene": 0.9020516,
			
 
				-      (...)
			
 
				-    },
			
 
				-    (...)
			
 
				+      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
			
 
				+    }
			
 
				   ]
			
 
				 }
			
 
				 ------------------------------------------------------------
			
 
				 // NOTCONSOLE
			
 
				 
			
 
				+[discrete]
			
 
				+[[inference-example-rerank]]
			
 
				+===== Rerank example
			
 
				 
			
 
				-The next example performs a completion on the example question.
			
 
				+The following example performs reranking on the example input.
			
 
				+
			
 
				+[source,console]
			
 
				+------------------------------------------------------------
			
 
				+POST _inference/rerank/cohere_rerank
			
 
				+{
			
 
				+  "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], 
			
 
				+  "query": "star wars main character" 
			
 
				+}
			
 
				+------------------------------------------------------------
			
 
				+// TEST[skip:TBD]
			
 
				+
			
 
				+The API returns the following response:
			
 
				+
			
 
				+
			
 
				+[source,console-result]
			
 
				+------------------------------------------------------------
			
 
				+{
			
 
				+  "rerank": [
			
 
				+    {
			
 
				+      "index": "2",
			
 
				+      "relevance_score": "0.011597361",
			
 
				+      "text": "leia"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "0",
			
 
				+      "relevance_score": "0.006338922",
			
 
				+      "text": "luke"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "5",
			
 
				+      "relevance_score": "0.0016166499",
			
 
				+      "text": "star"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "4",
			
 
				+      "relevance_score": "0.0011695103",
			
 
				+      "text": "r2d2"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "1",
			
 
				+      "relevance_score": "5.614787E-4",
			
 
				+      "text": "like"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "6",
			
 
				+      "relevance_score": "3.7850367E-4",
			
 
				+      "text": "wars"
			
 
				+    },
			
 
				+    {
			
 
				+      "index": "3",
			
 
				+      "relevance_score": "1.2508839E-5",
			
 
				+      "text": "chewy"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[inference-example-sparse]]
			
 
				+===== Sparse embedding example
			
 
				+
			
 
				+The following example performs sparse embedding on the example sentence.
			
 
				 
			
 
				 
			
 
				 [source,console]
			
 
				 ------------------------------------------------------------
			
 
				-POST _inference/completion/openai_chat_completions
			
 
				+POST _inference/sparse_embedding/my-elser-model
			
 
				 {
			
 
				-  "input": "What is Elastic?"
			
 
				+  "input": "The sky above the port was the color of television tuned to a dead channel."
			
 
				 }
			
 
				 ------------------------------------------------------------
			
 
				 // TEST[skip:TBD]
			
@@ -145,10 +209,25 @@ The API returns the following response:
 
				 [source,console-result]
			
 
				 ------------------------------------------------------------
			
 
				 {
			
 
				-  "completion": [
			
 
				+  "sparse_embedding": [
			
 
				     {
			
 
				-      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
			
 
				-    }
			
 
				+      "port": 2.1259406,
			
 
				+      "sky": 1.7073475,
			
 
				+      "color": 1.6922266,
			
 
				+      "dead": 1.6247464,
			
 
				+      "television": 1.3525393,
			
 
				+      "above": 1.2425821,
			
 
				+      "tuned": 1.1440028,
			
 
				+      "colors": 1.1218185,
			
 
				+      "tv": 1.0111054,
			
 
				+      "ports": 1.0067928,
			
 
				+      "poem": 1.0042328,
			
 
				+      "channel": 0.99471164,
			
 
				+      "tune": 0.96235967,
			
 
				+      "scene": 0.9020516,
			
 
				+      (...)
			
 
				+    },
			
 
				+    (...)
			
 
				   ]
			
 
				 }
			
 
				 ------------------------------------------------------------
			
--- a/docs/reference/inference/put-inference.asciidoc
+++ b/docs/reference/inference/put-inference.asciidoc
@@ -57,24 +57,27 @@ The unique identifier of the {infer} endpoint.
 
				 `<task_type>`::
			
 
				 (Required, string)
			
 
				 The type of the {infer} task that the model will perform. Available task types:
			
 
				+* `completion`,
			
 
				+* `rerank`,
			
 
				 * `sparse_embedding`,
			
 
				-* `text_embedding`,
			
 
				-* `completion`
			
 
				+* `text_embedding`.
			
 
				 
			
 
				 
			
 
				 [discrete]
			
 
				 [[put-inference-api-request-body]]
			
 
				-== {api-request-body-title}
			
 
				+==== {api-request-body-title}
			
 
				 
			
 
				 `service`::
			
 
				 (Required, string)
			
 
				 The type of service supported for the specified task type.
			
 
				 Available services:
			
 
				-* `cohere`: specify the `text_embedding` task type to use the Cohere service.
			
 
				+* `cohere`: specify the `text_embedding` or the `rerank` task type to use the
			
 
				+Cohere service.
			
 
				 * `elser`: specify the `sparse_embedding` task type to use the ELSER service.
			
 
				 * `hugging_face`: specify the `text_embedding` task type to use the Hugging Face
			
 
				 service.
			
 
				-* `openai`: specify the `text_embedding` task type to use the OpenAI service.
			
 
				+* `openai`: specify the `completion` or `text_embedding` task type to use the
			
 
				+OpenAI service.
			
 
				 * `elasticsearch`: specify the `text_embedding` task type to use the E5
			
 
				 built-in model or text embedding models uploaded by Eland.
			
 
				 
			
@@ -100,7 +103,8 @@ the same name and the updated API key.
 
				 
			
 
				 `embedding_type`::
			
 
				 (Optional, string)
			
 
				-Specifies the types of embeddings you want to get back. Defaults to `float`.
			
 
				+Only for `text_embedding`. Specifies the types of embeddings you want to get
			
 
				+back. Defaults to `float`.
			
 
				 Valid values are:
			
 
				   * `byte`: use it for signed int8 embeddings (this is a synonym of `int8`).
			
 
				   * `float`: use it for the default float embeddings.
			
@@ -108,10 +112,13 @@ Valid values are:
 
				 
			
 
				 `model_id`::
			
 
				 (Optional, string)
			
 
				-The name of the model to use for the {infer} task. To review the available
			
 
				-models, refer to the
			
 
				-https://docs.cohere.com/reference/embed[Cohere docs]. Defaults to
			
 
				-`embed-english-v2.0`.
			
 
				+The name of the model to use for the {infer} task.
			
 
				+To review the availble `rerank` models, refer to the
			
 
				+https://docs.cohere.com/reference/rerank-1[Cohere docs].
			
 
				+
			
 
				+To review the available `text_embedding` models, refer to the
			
 
				+https://docs.cohere.com/reference/embed[Cohere docs]. The default value for
			
 
				+`text_embedding` is `embed-english-v2.0`.
			
 
				 =====
			
 
				 +
			
 
				 .`service_settings` for the `elser` service
			
@@ -210,11 +217,34 @@ allocations. Must be a power of 2. Max allowed value is 32.
 
				 Settings to configure the {infer} task. These settings are specific to the
			
 
				 `<task_type>` you specified.
			
 
				 +
			
 
				+.`task_settings` for the `completion` task type
			
 
				+[%collapsible%closed]
			
 
				+=====
			
 
				+`user`:::
			
 
				+(Optional, string)
			
 
				+For `openai` service only. Specifies the user issuing the request, which can be
			
 
				+used for abuse detection.
			
 
				+=====
			
 
				++
			
 
				+.`task_settings` for the `rerank` task type
			
 
				+[%collapsible%closed]
			
 
				+=====
			
 
				+`return_documents`::
			
 
				+(Optional, boolean)
			
 
				+For `cohere` service only. Specify whether to return doc text within the 
			
 
				+results.
			
 
				+
			
 
				+`top_n`::
			
 
				+(Optional, integer)
			
 
				+The number of most relevant documents to return, defaults to the number of the
			
 
				+documents.
			
 
				+=====
			
 
				++
			
 
				 .`task_settings` for the `text_embedding` task type
			
 
				 [%collapsible%closed]
			
 
				 =====
			
 
				 `input_type`:::
			
 
				-(optional, string)
			
 
				+(Optional, string)
			
 
				 For `cohere` service only. Specifies the type of input passed to the model.
			
 
				 Valid values are:
			
 
				   * `classification`: use it for embeddings passed through a text classifier.
			
@@ -236,15 +266,8 @@ maximum token length. Defaults to `END`. Valid values are:
 
				 
			
 
				 `user`:::
			
 
				 (optional, string)
			
 
				-For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection.
			
 
				-=====
			
 
				-+
			
 
				-.`task_settings` for the `completion` task type
			
 
				-[%collapsible%closed]
			
 
				-=====
			
 
				-`user`:::
			
 
				-(optional, string)
			
 
				-For `openai` service only. Specifies the user issuing the request, which can be used for abuse detection.
			
 
				+For `openai` service only. Specifies the user issuing the request, which can be
			
 
				+used for abuse detection.
			
 
				 =====
			
 
				 
			
 
				 
			
@@ -260,7 +283,7 @@ This section contains example API calls for every service type.
 
				 ===== Cohere service
			
 
				 
			
 
				 The following example shows how to create an {infer} endpoint called
			
 
				-`cohere_embeddings` to perform a `text_embedding` task type.
			
 
				+`cohere-embeddings` to perform a `text_embedding` task type.
			
 
				 
			
 
				 [source,console]
			
 
				 ------------------------------------------------------------
			
@@ -277,6 +300,30 @@ PUT _inference/text_embedding/cohere-embeddings
 
				 // TEST[skip:TBD]
			
 
				 
			
 
				 
			
 
				+The following example shows how to create an {infer} endpoint called
			
 
				+`cohere-rerank` to perform a `rerank` task type.
			
 
				+
			
 
				+[source,console]
			
 
				+------------------------------------------------------------
			
 
				+PUT _inference/rerank/cohere-rerank 
			
 
				+{
			
 
				+    "service": "cohere",
			
 
				+    "service_settings": {
			
 
				+        "api_key": "<API-KEY>", 
			
 
				+        "model_id": "rerank-english-v3.0"
			
 
				+    },
			
 
				+    "task_settings": {
			
 
				+        "top_n": 10,
			
 
				+        "return_documents": true
			
 
				+    }
			
 
				+}
			
 
				+------------------------------------------------------------
			
 
				+// TEST[skip:TBD]
			
 
				+
			
 
				+For more examples, also review the 
			
 
				+https://docs.cohere.com/docs/elasticsearch-and-cohere#rerank-search-results-with-cohere-and-elasticsearch[Cohere documentation].
			
 
				+
			
 
				+
			
 
				 [discrete]
			
 
				 [[inference-example-e5]]
			
 
				 ===== E5 via the elasticsearch service
			
@@ -414,11 +461,11 @@ been
 
				 ===== OpenAI service
			
 
				 
			
 
				 The following example shows how to create an {infer} endpoint called
			
 
				-`openai_embeddings` to perform a `text_embedding` task type.
			
 
				+`openai-embeddings` to perform a `text_embedding` task type.
			
 
				 
			
 
				 [source,console]
			
 
				 ------------------------------------------------------------
			
 
				-PUT _inference/text_embedding/openai_embeddings
			
 
				+PUT _inference/text_embedding/openai-embeddings
			
 
				 {
			
 
				     "service": "openai",
			
 
				     "service_settings": {
			
@@ -430,11 +477,11 @@ PUT _inference/text_embedding/openai_embeddings
 
				 // TEST[skip:TBD]
			
 
				 
			
 
				 The next example shows how to create an {infer} endpoint called
			
 
				-`openai_completion` to perform a `completion` task type.
			
 
				+`openai-completion` to perform a `completion` task type.
			
 
				 
			
 
				 [source,console]
			
 
				 ------------------------------------------------------------
			
 
				-PUT _inference/completion/openai_completion
			
 
				+PUT _inference/completion/openai-completion
			
 
				 {
			
 
				     "service": "openai",
			
 
				     "service_settings": {