2 years ago · 9d01def3dc
--- a/docs/reference/search/search-your-data/semantic-search-elser.asciidoc
+++ b/docs/reference/search/search-your-data/semantic-search-elser.asciidoc
@@ -14,7 +14,7 @@ The instructions in this tutorial shows you how to use ELSER to perform semantic
 
				 search on your data.
			
 
				 
			
 
				 NOTE: Only the first 512 extracted tokens per field are considered during 
			
 
				-semantic search with ELSER v1. Refer to 
			
 
				+semantic search with ELSER. Refer to 
			
 
				 {ml-docs}/ml-nlp-limitations.html#ml-nlp-elser-v1-limit-512[this page] for more 
			
 
				 information.
			
 
				 
			
@@ -44,15 +44,16 @@ you must provide suitably sized nodes yourself.
 
				 
			
 
				 First, the mapping of the destination index - the index that contains the tokens 
			
 
				 that the model created based on your text - must be created.  The destination 
			
 
				-index must have a field with the <<rank-features, `rank_features`>> field type 
			
 
				-to index the ELSER output.
			
 
				+index must have a field with the 
			
 
				+<<rank-features, `sparse_vector` or `rank_features`>> field type to index the 
			
 
				+ELSER output.
			
 
				 
			
 
				-NOTE: ELSER output must be ingested into a field with the `rank_features` field 
			
 
				-type. Otherwise, {es} interprets the token-weight pairs as a massive amount of 
			
 
				-fields in a document. If you get an error similar to this 
			
 
				+NOTE: ELSER output must be ingested into a field with the `sparse_vector` or 
			
 
				+`rank_features` field type. Otherwise, {es} interprets the token-weight pairs as 
			
 
				+a massive amount of fields in a document. If you get an error similar to this 
			
 
				 `"Limit of total fields [1000] has been exceeded while adding new fields"` then 
			
 
				 the ELSER output field is not mapped properly and it has a field type different 
			
 
				-than `rank_features`.
			
 
				+than `sparse_vector` or `rank_features`.
			
 
				 
			
 
				 [source,console]
			
 
				 ----
			
@@ -61,7 +62,7 @@ PUT my-index
 
				   "mappings": {
			
 
				     "properties": {
			
 
				       "ml.tokens": { <1>
			
 
				-        "type": "rank_features" <2>
			
 
				+        "type": "sparse_vector" <2>
			
 
				       },
			
 
				       "text": { <3>
			
 
				         "type": "text" <4>
			
@@ -72,7 +73,7 @@ PUT my-index
 
				 ----
			
 
				 // TEST[skip:TBD]
			
 
				 <1> The name of the field to contain the generated tokens.
			
 
				-<2> The field to contain the tokens is a `rank_features` field.
			
 
				+<2> The field to contain the tokens is a `sparse_vector` field.
			
 
				 <3> The name of the field from which to create the sparse vector representation. 
			
 
				 In this example, the name of the field is `text`.
			
 
				 <4> The field type which is text in this example.
			
@@ -90,12 +91,12 @@ that is being ingested in the pipeline.
 
				 
			
 
				 [source,console]
			
 
				 ----
			
 
				-PUT _ingest/pipeline/elser-v1-test
			
 
				+PUT _ingest/pipeline/elser-v2-test
			
 
				 {
			
 
				   "processors": [
			
 
				     {
			
 
				       "inference": {
			
 
				-        "model_id": ".elser_model_1",
			
 
				+        "model_id": ".elser_model_2",
			
 
				         "target_field": "ml",
			
 
				         "field_map": { <1>
			
 
				           "text": "text_field"
			
@@ -155,7 +156,7 @@ POST _reindex?wait_for_completion=false
 
				   },
			
 
				   "dest": {
			
 
				     "index": "my-index",
			
 
				-    "pipeline": "elser-v1-test"
			
 
				+    "pipeline": "elser-v2-test"
			
 
				   }
			
 
				 }
			
 
				 ----
			
@@ -192,7 +193,7 @@ GET my-index/_search
 
				    "query":{
			
 
				       "text_expansion":{
			
 
				          "ml.tokens":{
			
 
				-            "model_id":".elser_model_1",
			
 
				+            "model_id":".elser_model_2",
			
 
				             "model_text":"How to avoid muscle soreness after running?"
			
 
				          }
			
 
				       }
			
@@ -236,7 +237,7 @@ weights.
 
				                "exercises":0.36694175,
			
 
				                (...)
			
 
				             },
			
 
				-            "model_id":".elser_model_1"
			
 
				+            "model_id":".elser_model_2"
			
 
				          }
			
 
				       }
			
 
				    },
			
@@ -276,7 +277,7 @@ GET my-index/_search
 
				           "text_expansion": { 
			
 
				             "ml.tokens": {
			
 
				               "model_text": "How to avoid muscle soreness after running?",
			
 
				-              "model_id": ".elser_model_1",
			
 
				+              "model_id": ".elser_model_2",
			
 
				               "boost": 1 <2>
			
 
				             }
			
 
				           }
			
@@ -342,7 +343,7 @@ PUT my-index
 
				     },
			
 
				     "properties": {
			
 
				       "ml.tokens": {
			
 
				-        "type": "rank_features" 
			
 
				+        "type": "sparse_vector" 
			
 
				       },
			
 
				       "text": { 
			
 
				         "type": "text" 
			
@@ -359,7 +360,7 @@ PUT my-index
 
				 ==== Further reading
			
 
				 
			
 
				 * {ml-docs}/ml-nlp-elser.html[How to download and deploy ELSER]
			
 
				-* {ml-docs}/ml-nlp-limitations.html#ml-nlp-elser-v1-limit-512[ELSER v1 limitation]
			
 
				+* {ml-docs}/ml-nlp-limitations.html#ml-nlp-elser-v1-limit-512[ELSER limitation]
			
 
				 * https://www.elastic.co/blog/may-2023-launch-information-retrieval-elasticsearch-ai-model[Improving information retrieval in the Elastic Stack: Introducing Elastic Learned Sparse Encoder, our new retrieval model]
			
 
				 
			
 
				 [discrete]
			
--- a/docs/reference/tab-widgets/semantic-search/field-mappings.asciidoc
+++ b/docs/reference/tab-widgets/semantic-search/field-mappings.asciidoc
@@ -1,15 +1,15 @@
 
				 // tag::elser[]
			
 
				 
			
 
				 ELSER produces token-weight pairs as output from the input text and the query. 
			
 
				-The {es} <<rank-features,`rank_features`>> field type can store these 
			
 
				+The {es} <<rank-features,`sparse_vector`>> field type can store these 
			
 
				 token-weight pairs as numeric feature vectors. The index must have a field with 
			
 
				-the `rank_features` field type to index the tokens that ELSER generates.
			
 
				+the `sparse_vector` field type to index the tokens that ELSER generates.
			
 
				 
			
 
				 To create a mapping for your ELSER index, refer to the 
			
 
				 <<elser-mappings,Create the index mapping section>> of the tutorial. The example 
			
 
				 shows how to create an index mapping for `my-index` that defines the 
			
 
				 `my_embeddings.tokens` field - which will contain the ELSER output - as a 
			
 
				-`rank_features` field.
			
 
				+`sparse_vector` field.
			
 
				 
			
 
				 [source,console]
			
 
				 ----
			
@@ -18,7 +18,7 @@ PUT my-index
 
				   "mappings": {
			
 
				     "properties": {
			
 
				       "my_embeddings.tokens": { <1>
			
 
				-        "type": "rank_features" <2>
			
 
				+        "type": "sparse_vector" <2>
			
 
				       },
			
 
				       "my_text_field": { <3>
			
 
				         "type": "text" <4>
			
@@ -28,7 +28,7 @@ PUT my-index
 
				 }
			
 
				 ----
			
 
				 <1> The name of the field that will contain the tokens generated by ELSER.
			
 
				-<2> The field that contains the tokens must be a `rank_features` field.
			
 
				+<2> The field that contains the tokens must be a `sparse_vector` field.
			
 
				 <3> The name of the field from which to create the sparse vector representation. 
			
 
				 In this example, the name of the field is `my_text_field`.
			
 
				 <4> The field type is `text` in this example.
			
--- a/docs/reference/tab-widgets/semantic-search/generate-embeddings.asciidoc
+++ b/docs/reference/tab-widgets/semantic-search/generate-embeddings.asciidoc
@@ -21,7 +21,7 @@ PUT _ingest/pipeline/my-text-embeddings-pipeline
 
				   "processors": [
			
 
				     {
			
 
				       "inference": {
			
 
				-        "model_id": ".elser_model_1",
			
 
				+        "model_id": ".elser_model_2",
			
 
				         "target_field": "my_embeddings",
			
 
				         "field_map": { <1>
			
 
				           "my_text_field": "text_field"
			
--- a/docs/reference/tab-widgets/semantic-search/hybrid-search.asciidoc
+++ b/docs/reference/tab-widgets/semantic-search/hybrid-search.asciidoc
@@ -22,7 +22,7 @@ GET my-index/_search
 
				       "query": {
			
 
				         "text_expansion": {
			
 
				           "my_embeddings.tokens": {
			
 
				-            "model_id": ".elser_model_1",
			
 
				+            "model_id": ".elser_model_2",
			
 
				             "model_text": "the query string"
			
 
				           }
			
 
				         }
			
--- a/docs/reference/tab-widgets/semantic-search/search.asciidoc
+++ b/docs/reference/tab-widgets/semantic-search/search.asciidoc
@@ -12,7 +12,7 @@ GET my-index/_search
 
				    "query":{
			
 
				       "text_expansion":{
			
 
				          "my_embeddings.tokens":{ <1>
			
 
				-            "model_id":".elser_model_1",
			
 
				+            "model_id":".elser_model_2",
			
 
				             "model_text":"the query string"
			
 
				          }
			
 
				       }
			
@@ -20,7 +20,7 @@ GET my-index/_search
 
				 }
			
 
				 ----
			
 
				 // TEST[skip:TBD]
			
 
				-<1> The field of type `rank_features`.
			
 
				+<1> The field of type `sparse_vector`.
			
 
				 
			
 
				 // end::elser[]