Browse Source

Enable `exclude_source_vectors` by default for new indices (#131907)

This commit sets `index.mapping.exclude_source_vectors` to `true` by default
for newly created indices. When enabled, vector fields (`dense_vector`,
`sparse_vector`, `rank_vector`) are excluded from `_source` on disk and are
not returned in API responses unless explicitly requested.

The change improves indexing performance, reduces storage size, and avoids
unnecessary payload bloat in responses. Vector values continue to be rehydrated
transparently for partial updates, reindex, and recovery.

Existing indices are not affected and continue to store vectors in `_source`
by default.
Jim Ferenczi 1 month ago
parent
commit
8036a0849f
36 changed files with 503 additions and 235 deletions
  1. 26 0
      docs/changelog/131907.yaml
  2. 83 8
      docs/reference/elasticsearch/mapping-reference/dense-vector.md
  3. 72 2
      docs/reference/elasticsearch/mapping-reference/rank-vectors.md
  4. 76 6
      docs/reference/elasticsearch/mapping-reference/sparse-vector.md
  5. 2 5
      modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java
  6. 1 3
      modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java
  7. 4 0
      rest-api-spec/build.gradle
  8. 14 0
      rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml
  9. 1 3
      rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml
  10. 1 3
      rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml
  11. 37 15
      rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml
  12. 3 3
      server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java
  13. 1 5
      server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
  14. 5 5
      server/src/main/java/org/elasticsearch/index/IndexSettings.java
  15. 1 0
      server/src/main/java/org/elasticsearch/index/IndexVersions.java
  16. 5 3
      server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java
  17. 2 2
      server/src/main/java/org/elasticsearch/index/get/ShardGetService.java
  18. 1 1
      server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
  19. 12 12
      server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
  20. 16 69
      server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java
  21. 43 12
      server/src/main/java/org/elasticsearch/index/translog/Translog.java
  22. 2 6
      server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
  23. 1 16
      server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java
  24. 4 11
      server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java
  25. 1 3
      server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java
  26. 1 8
      server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java
  27. 11 4
      server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java
  28. 2 0
      x-pack/plugin/build.gradle
  29. 1 1
      x-pack/plugin/ml/qa/ml-with-security/build.gradle
  30. 30 1
      x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java
  31. 3 0
      x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java
  32. 9 3
      x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml
  33. 2 2
      x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java
  34. 10 10
      x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java
  35. 19 10
      x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml
  36. 1 3
      x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml

+ 26 - 0
docs/changelog/131907.yaml

@@ -0,0 +1,26 @@
+pr: 131907
+summary: Enable `exclude_source_vectors` by default for new indices
+area: Vector Search
+type: breaking
+issues: []
+breaking:
+  title: Enable `exclude_source_vectors` by default for new indices
+  area: Search
+  details: |-
+    The `exclude_source_vectors` setting is now enabled by default for newly created indices.
+    This means that vector fields (e.g., `dense_vector`) are no longer stored in the `_source` field
+    by default, although they remain fully accessible through search and retrieval operations.
+
+    Instead of being persisted in `_source`, vectors are now rehydrated on demand from the underlying
+    index structures when needed. This reduces index size and improves performance for typical vector
+    search workloads where the original vector values do not need to be part of the `_source`.
+
+    If your use case requires vector fields to be stored in `_source`, you can disable this behavior by
+    setting `exclude_source_vectors: false` at index creation time.
+  impact: |-
+    Vector fields will no longer be stored in `_source` by default for new indices. Applications or tools
+    that expect to see vector fields in `_source` (for raw document inspection)
+    may need to be updated or configured to explicitly retain vectors using `exclude_source_vectors: false`.
+
+    Retrieval of vector fields via search or the `_source` API remains fully supported.
+  notable: true

+ 83 - 8
docs/reference/elasticsearch/mapping-reference/dense-vector.md

@@ -102,6 +102,81 @@ PUT my-index-2
 
 {{es}} uses the [HNSW algorithm](https://arxiv.org/abs/1603.09320) to support efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `dense_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "dense_vector"
+      }
+    }
+  }
+}
+```
+
+When this setting is disabled:
+
+* `dense_vector` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
+
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
+
 ## Automatically quantize vectors for kNN search [dense-vector-quantization]
 
 The `dense_vector` type supports quantization to reduce the memory footprint required when [searching](docs-content://solutions/search/vector/knn.md#approximate-knn) `float` vectors. The three following quantization strategies are supported:
@@ -266,16 +341,16 @@ $$$dense-vector-index-options$$$
 `type`
 :   (Required, string) The type of kNN algorithm to use. Can be either any of:
     * `hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) for scalable approximate kNN search. This supports all `element_type` values.
-    * `int8_hnsw` - The default index type for some float vectors: 
-        
-      * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. 
+    * `int8_hnsw` - The default index type for some float vectors:
+
+      * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions.
       * {applies_to}`stack: ga 9.0` Default for float all vectors.
-      
+
       This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 4x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
     * `int4_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 8x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
     * `bbq_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
-        
-      {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. 
+
+      {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions.
     * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.
     * `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports `element_type` of `float`.
     * `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports `element_type` of `float`.
@@ -295,8 +370,8 @@ $$$dense-vector-index-options$$$
 :   (Optional, object) An optional section that configures automatic vector rescoring on knn queries for the given field. Only applicable to quantized index types.
 :::::{dropdown} Properties of rescore_vector
 `oversample`
-:   (required, float) The amount to oversample the search results by. This value should be one of the following: 
-    * Greater than `1.0` and less than `10.0` 
+:   (required, float) The amount to oversample the search results by. This value should be one of the following:
+    * Greater than `1.0` and less than `10.0`
     * Exactly `0` to indicate no oversampling and rescoring should occur {applies_to}`stack: ga 9.1`
     :   The higher the value, the more vectors will be gathered and rescored with the raw values per shard.
     :   In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.

+ 72 - 2
docs/reference/elasticsearch/mapping-reference/rank-vectors.md

@@ -108,11 +108,81 @@ $$$rank-vectors-element-type$$$
 `dims`
 :   (Optional, integer) Number of vector dimensions. Can’t exceed `4096`. If `dims` is not specified, it will be set to the length of the first vector added to the field.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `rank_vectors` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `rank_vectors` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "rank_vectors",
+        "dims": 128
+      }
+    }
+  }
+}
+```
 
-## Synthetic `_source` [rank-vectors-synthetic-source]
+When this setting is disabled:
 
-`rank_vectors` fields support [synthetic `_source`](mapping-source-field.md#synthetic-source) .
+* `rank_vectors` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
 
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
 
 ## Scoring with rank vectors [rank-vectors-scoring]
 

+ 76 - 6
docs/reference/elasticsearch/mapping-reference/sparse-vector.md

@@ -57,12 +57,6 @@ See [semantic search with ELSER](docs-content://solutions/search/semantic-search
 
 The following parameters are accepted by `sparse_vector` fields:
 
-[store](/reference/elasticsearch/mapping-reference/mapping-store.md)
-:   Indicates whether the field value should be stored and retrievable independently of the [_source](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. Accepted values: true or false (default). The field’s data is stored using term vectors, a disk-efficient structure compared to the original JSON input. The input map can be retrieved during a search request via the [`fields` parameter](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param). To benefit from reduced disk usage, you must either:
-
-    * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering).
-    * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source).
-
 index_options {applies_to}`stack: ga 9.1`
 :   (Optional, object) You can set index options for your  `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your [`sparse_vector` query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md), Elasticsearch will use the default options configured for the field, if any.
 
@@ -96,6 +90,82 @@ This ensures that:
 * The tokens that are kept are frequent enough and have significant scoring.
 * Very infrequent tokens that may not have as high of a score are removed.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `sparse_vector` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `sparse_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format.
+Internally, vectors are stored as floats with 9 significant bits for the precision, so the rehydrated values will have reduced precision.
+This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "sparse_vector"
+      }
+    }
+  }
+}
+```
+
+When this setting is disabled:
+
+* `sparse_vector` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
+
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
 
 ## Multi-value sparse vectors [index-multi-value-sparse-vectors]
 

+ 2 - 5
modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java

@@ -23,7 +23,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@@ -182,14 +181,13 @@ public class ReindexBasicTests extends ReindexTestCase {
     }
 
     public void testReindexIncludeVectors() throws Exception {
-        assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS);
         var resp1 = prepareCreate("test").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp1);
 
         var resp2 = prepareCreate("test_reindex").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp2);
 
@@ -237,5 +235,4 @@ public class ReindexBasicTests extends ReindexTestCase {
             searchResponse.decRef();
         }
     }
-
 }

+ 1 - 3
modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java

@@ -24,7 +24,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@@ -158,9 +157,8 @@ public class UpdateByQueryBasicTests extends ReindexTestCase {
     }
 
     public void testUpdateByQueryIncludeVectors() throws Exception {
-        assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS);
         var resp1 = prepareCreate("test").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp1);
 

+ 4 - 0
rest-api-spec/build.gradle

@@ -90,6 +90,10 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
   task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
   task.skipTest("cluster.info/30_info_thread_pool/Cluster HTTP Info", "The search_throttled thread pool has been removed")
   task.skipTest("synonyms/80_synonyms_from_index/Fail loading synonyms from index if synonyms_set doesn't exist", "Synonyms do no longer fail if the synonyms_set doesn't exist")
+  task.skipTest("get/100_synthetic_source/indexed dense vectors", "Vectors are not returned by default")
+  task.skipTest("get/100_synthetic_source/non-indexed dense vectors", "Vectors are not returned by default")
+  task.skipTest("search.vectors/90_sparse_vector/stored sparse_vector synthetic source", "Vectors are not returned by default")
+  task.skipTest("search.vectors/90_sparse_vector/sparse_vector synthetic source", "Vectors are not returned by default")
   task.skipTest("update/100_synthetic_source/keyword", "synthetic recovery source means _recovery_source field will not be present")
   task.skipTest("update/100_synthetic_source/stored text", "synthetic recovery source means _recovery_source field will not be present")
 })

+ 14 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml

@@ -427,6 +427,11 @@ indexed dense vectors:
   - requires:
       cluster_features: ["gte_v8.5.0"]
       reason: introduced in 8.5.0
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_source_vectors_setting ]
 
   - do:
       indices.create:
@@ -457,6 +462,8 @@ indexed dense vectors:
       get:
         index: test
         id:    1
+        _source_exclude_vectors: false
+
   - match: {_index: "test"}
   - match: {_id: "1"}
   - match: {_version: 1}
@@ -472,6 +479,11 @@ non-indexed dense vectors:
   - requires:
       cluster_features: ["gte_v8.5.0"]
       reason: introduced in 8.5.0
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_source_vectors_setting ]
 
   - do:
       indices.create:
@@ -501,6 +513,8 @@ non-indexed dense vectors:
       get:
         index: test
         id:    1
+        _source_exclude_vectors: false
+
   - match: {_index: "test"}
   - match: {_id: "1"}
   - match: {_version: 1}

+ 1 - 3
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml

@@ -5,7 +5,7 @@ setup:
       capabilities:
         - method: GET
           path: /_search
-          capabilities: [ synthetic_vectors_setting ]
+          capabilities: [ exclude_source_vectors_setting ]
   - skip:
       features: "headers"
 
@@ -13,8 +13,6 @@ setup:
       indices.create:
         index: test
         body:
-          settings:
-            index.mapping.synthetic_vectors: true
           mappings:
             properties:
               name:

+ 1 - 3
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml

@@ -5,7 +5,7 @@ setup:
       capabilities:
         - method: GET
           path: /_search
-          capabilities: [ synthetic_vectors_setting ]
+          capabilities: [ exclude_source_vectors_setting ]
   - skip:
       features: "headers"
 
@@ -13,8 +13,6 @@ setup:
       indices.create:
         index: test
         body:
-          settings:
-            index.mapping.synthetic_vectors: true
           mappings:
             properties:
               name:

+ 37 - 15
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml

@@ -387,8 +387,13 @@
 "sparse_vector synthetic source":
 
   - requires:
-      cluster_features: [ "mapper.source.mode_from_index_setting" ]
+      cluster_features: [ "mapper.source.mode_from_index_setting"]
       reason: "Source mode configured through index setting"
+      test_runner_features: [ capabilities, "close_to" ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_vectors_param, exclude_source_vectors_setting ]
 
   - do:
       indices.create:
@@ -402,6 +407,18 @@
               ml.tokens:
                 type: sparse_vector
 
+  - do:
+      indices.create:
+        index: test_include_vectors
+        body:
+          settings:
+            index:
+              mapping.exclude_source_vectors: false
+          mappings:
+            properties:
+              ml.tokens:
+                type: sparse_vector
+
   - match: { acknowledged: true }
 
   - do:
@@ -421,7 +438,7 @@
 
   - do:
       index:
-        index: test
+        index: test_include_vectors
         id: "2"
         body:
           ml:
@@ -431,7 +448,7 @@
 
   - do:
       index:
-        index: test
+        index: test_include_vectors
         id: "3"
         body:
           ml:
@@ -446,20 +463,17 @@
       get:
         index: test
         id: "1"
+        _source_exclude_vectors: false
 
-  - match:
-      _source:
-        ml:
-          tokens:
-            running: 2.4097164
-            good: 2.170997
-            run: 2.052153
-            race: 1.4575411
-            for: 1.1908325
+  - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } }
+  - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } }
+  - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } }
+  - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } }
+  - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } }
 
   - do:
       get:
-        index: test
+        index: test_include_vectors
         id: "2"
 
   - match:
@@ -467,7 +481,7 @@
 
   - do:
       get:
-        index: test
+        index: test_include_vectors
         id: "3"
 
   - match:
@@ -527,8 +541,14 @@
 "stored sparse_vector synthetic source":
 
   - requires:
-      cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ]
       reason: "sparse_vector supports store parameter"
+      cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ]
+      test_runner_features: [ capabilities, "close_to" ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_vectors_param ]
+
 
   - do:
       indices.create:
@@ -567,6 +587,8 @@
       search:
         index: test
         body:
+          _source:
+            exclude_vectors: false
           fields: [ "ml.tokens" ]
 
   - match:

+ 3 - 3
server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java

@@ -88,9 +88,9 @@ public class ExistsIT extends ESIntegTestCase {
             // object fields
             singletonMap("bar", barObject),
             singletonMap("bar", singletonMap("baz", 42)),
-            // sparse_vector field empty
-            singletonMap("vec", emptyMap()),
-            // sparse_vector field non-empty
+            // sparse_vector field
+            singletonMap("vec", singletonMap("6", 100)),
+            // sparse_vector field
             singletonMap("vec", singletonMap("1", 100)),
             // empty doc
             emptyMap() };

+ 1 - 5
server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

@@ -49,8 +49,6 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
-
 /**
  * Encapsulates all valid index level settings.
  * @see Property#IndexScope
@@ -243,9 +241,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
         if (IndexSettings.DOC_VALUES_SKIPPER) {
             settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER);
         }
-        if (SYNTHETIC_VECTORS) {
-            settings.add(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING);
-        }
+        settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
         BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
     };
 

+ 5 - 5
server/src/main/java/org/elasticsearch/index/IndexSettings.java

@@ -848,12 +848,12 @@ public final class IndexSettings {
         Property.Final
     );
 
-    public static final boolean SYNTHETIC_VECTORS = new FeatureFlag("mapping_synthetic_vectors").isEnabled();
-    public static final Setting<Boolean> INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING = Setting.boolSetting(
-        "index.mapping.synthetic_vectors",
-        false,
+    public static final Setting<Boolean> INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING = Setting.boolSetting(
+        "index.mapping.exclude_source_vectors",
+        settings -> String.valueOf(SETTING_INDEX_VERSION_CREATED.get(settings).onOrAfter(IndexVersions.EXCLUDE_SOURCE_VECTORS_DEFAULT)),
         Property.IndexScope,
-        Property.Final
+        Property.Final,
+        Property.ServerlessPublic
     );
 
     private final Index index;

+ 1 - 0
server/src/main/java/org/elasticsearch/index/IndexVersions.java

@@ -181,6 +181,7 @@ public class IndexVersions {
     public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
     public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
     public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2);
+    public static final IndexVersion EXCLUDE_SOURCE_VECTORS_DEFAULT = def(9_035_0_00, Version.LUCENE_10_2_2);
 
     /*
      * STOP! READ THIS FIRST! No, really,

+ 5 - 3
server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java

@@ -41,8 +41,10 @@ public abstract class TranslogOperationAsserter {
                 if (engineConfig.getIndexSettings().isRecoverySourceSyntheticEnabled()
                     || engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false
                     || engineConfig.getMapperService().mappingLookup().syntheticVectorFields().isEmpty() == false) {
-                    return super.assertSameIndexOperation(synthesizeSource(engineConfig, o1), o2)
-                        || super.assertSameIndexOperation(o1, synthesizeSource(engineConfig, o2));
+                    // for synthetic source and synthetic fields, we check that the resulting source map is equivalent
+                    // since ordering might not be preserved.
+                    return Translog.Index.equalsWithoutAutoGeneratedTimestamp(synthesizeSource(engineConfig, o1), o2, false)
+                        || Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, synthesizeSource(engineConfig, o2), false);
                 }
                 return false;
             }
@@ -99,6 +101,6 @@ public abstract class TranslogOperationAsserter {
     }
 
     public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) throws IOException {
-        return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2);
+        return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2, true);
     }
 }

+ 2 - 2
server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

@@ -60,7 +60,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING;
+import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
 
@@ -418,7 +418,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
      */
     public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) {
         if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) {
-            return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings());
+            return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings());
         }
         return fetchSourceContext.excludeVectors();
     }

+ 1 - 1
server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

@@ -794,7 +794,7 @@ public final class DocumentParser {
             DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
                 fieldName,
                 context.indexSettings().getIndexVersionCreated(),
-                IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings())
+                IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings())
             );
             builder.dimensions(mappers.size());
             DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);

+ 12 - 12
server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

@@ -117,7 +117,7 @@ import java.util.stream.Stream;
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED;
 import static org.elasticsearch.common.Strings.format;
 import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
-import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING;
+import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING;
 import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER;
 import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER;
 
@@ -255,9 +255,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
 
         final IndexVersion indexVersionCreated;
-        final boolean isSyntheticVector;
+        final boolean isExcludeSourceVectors;
 
-        public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) {
+        public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) {
             super(name);
             this.indexVersionCreated = indexVersionCreated;
             // This is defined as updatable because it can be updated once, from [null] to a valid dim size,
@@ -289,7 +289,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
                         }
                     }
                 });
-            this.isSyntheticVector = isSyntheticVector;
+            this.isExcludeSourceVectors = isExcludeSourceVectors;
             final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION);
             final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW);
             final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW);
@@ -431,7 +431,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
             // Validate again here because the dimensions or element type could have been set programmatically,
             // which affects index option validity
             validate();
-            boolean isSyntheticVectorFinal = (context.isSourceSynthetic() == false) && indexed.getValue() && isSyntheticVector;
+            boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && indexed.getValue() && isExcludeSourceVectors;
             return new DenseVectorFieldMapper(
                 leafName(),
                 new DenseVectorFieldType(
@@ -448,7 +448,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
                 builderParams(this, context),
                 indexOptions.getValue(),
                 indexVersionCreated,
-                isSyntheticVectorFinal
+                isExcludeSourceVectorsFinal
             );
         }
     }
@@ -2391,7 +2391,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
         (n, c) -> new Builder(
             n,
             c.getIndexSettings().getIndexVersionCreated(),
-            INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
+            INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
         ),
         notInMultiFields(CONTENT_TYPE)
     );
@@ -2850,7 +2850,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
 
     private final DenseVectorIndexOptions indexOptions;
     private final IndexVersion indexCreatedVersion;
-    private final boolean isSyntheticVector;
+    private final boolean isExcludeSourceVectors;
 
     private DenseVectorFieldMapper(
         String simpleName,
@@ -2858,12 +2858,12 @@ public class DenseVectorFieldMapper extends FieldMapper {
         BuilderParams params,
         DenseVectorIndexOptions indexOptions,
         IndexVersion indexCreatedVersion,
-        boolean isSyntheticVector
+        boolean isExcludeSourceVectorsFinal
     ) {
         super(simpleName, mappedFieldType, params);
         this.indexOptions = indexOptions;
         this.indexCreatedVersion = indexCreatedVersion;
-        this.isSyntheticVector = isSyntheticVector;
+        this.isExcludeSourceVectors = isExcludeSourceVectorsFinal;
     }
 
     @Override
@@ -2985,7 +2985,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this);
+        return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors).init(this);
     }
 
     private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) {
@@ -3041,7 +3041,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
 
     @Override
     public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() {
-        if (isSyntheticVector) {
+        if (isExcludeSourceVectors) {
             var syntheticField = new IndexedSyntheticFieldLoader(indexCreatedVersion, fieldType().similarity);
             return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorAsList);
         }

+ 16 - 69
server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

@@ -12,11 +12,8 @@ package org.elasticsearch.index.mapper.vectors;
 import org.apache.lucene.document.FeatureField;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.TermVectors;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
@@ -42,8 +39,6 @@ import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.inference.WeightedToken;
 import org.elasticsearch.inference.WeightedTokensUtils;
-import org.elasticsearch.search.fetch.StoredFieldsSpec;
-import org.elasticsearch.search.lookup.Source;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
 import org.elasticsearch.xcontent.DeprecationHandler;
 import org.elasticsearch.xcontent.NamedXContentRegistry;
@@ -62,7 +57,7 @@ import java.util.Map;
 import java.util.Objects;
 import java.util.stream.Stream;
 
-import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING;
+import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING;
 import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
 import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
@@ -94,8 +89,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
     public static class Builder extends FieldMapper.Builder {
         private final IndexVersion indexVersionCreated;
-
-        private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
+        private final Parameter<Boolean> stored;
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
         private final Parameter<SparseVectorIndexOptions> indexOptions = new Parameter<>(
             SPARSE_VECTOR_INDEX_OPTIONS,
@@ -107,12 +101,13 @@ public class SparseVectorFieldMapper extends FieldMapper {
             Objects::toString
         ).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck);
 
-        private boolean isSyntheticVector;
+        private final boolean isExcludeSourceVectors;
 
-        public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) {
+        public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) {
             super(name);
+            this.stored = Parameter.boolParam("store", false, m -> toType(m).fieldType().isStored(), () -> isExcludeSourceVectors);
             this.indexVersionCreated = indexVersionCreated;
-            this.isSyntheticVector = isSyntheticVector;
+            this.isExcludeSourceVectors = isExcludeSourceVectors;
         }
 
         public Builder setStored(boolean value) {
@@ -132,19 +127,18 @@ public class SparseVectorFieldMapper extends FieldMapper {
                 builderIndexOptions = SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated);
             }
 
-            final boolean syntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector;
-            final boolean storedFinal = stored.getValue() || syntheticVectorFinal;
+            final boolean isExcludeSourceVectorsFinal = isExcludeSourceVectors && context.isSourceSynthetic() == false && stored.get();
             return new SparseVectorFieldMapper(
                 leafName(),
                 new SparseVectorFieldType(
                     indexVersionCreated,
                     context.buildFullName(leafName()),
-                    storedFinal,
+                    stored.get(),
                     meta.getValue(),
                     builderIndexOptions
                 ),
                 builderParams(this, context),
-                syntheticVectorFinal
+                isExcludeSourceVectorsFinal
             );
         }
 
@@ -206,7 +200,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
         return new Builder(
             n,
             c.indexVersionCreated(),
-            INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
+            INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
         );
     }, notInMultiFields(CONTENT_TYPE));
 
@@ -251,9 +245,6 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            if (isStored()) {
-                return new SparseVectorValueFetcher(name());
-            }
             return SourceValueFetcher.identity(name(), context, format);
         }
 
@@ -313,16 +304,17 @@ public class SparseVectorFieldMapper extends FieldMapper {
         }
     }
 
-    private final boolean isSyntheticVector;
+    private final boolean isExcludeSourceVectors;
 
     private SparseVectorFieldMapper(
         String simpleName,
         MappedFieldType mappedFieldType,
         BuilderParams builderParams,
-        boolean isSyntheticVector
+        boolean isExcludeSourceVectors
     ) {
         super(simpleName, mappedFieldType, builderParams);
-        this.isSyntheticVector = isSyntheticVector;
+        assert isExcludeSourceVectors == false || fieldType().isStored();
+        this.isExcludeSourceVectors = isExcludeSourceVectors;
     }
 
     @Override
@@ -335,7 +327,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
     @Override
     public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() {
-        if (isSyntheticVector) {
+        if (isExcludeSourceVectors) {
             var syntheticField = new SparseVectorSyntheticFieldLoader(fullPath(), leafName());
             return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyAsMap);
         }
@@ -349,7 +341,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isSyntheticVector).init(this);
+        return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isExcludeSourceVectors).init(this);
     }
 
     @Override
@@ -433,51 +425,6 @@ public class SparseVectorFieldMapper extends FieldMapper {
             || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0));
     }
 
-    private static class SparseVectorValueFetcher implements ValueFetcher {
-        private final String fieldName;
-        private TermVectors termVectors;
-
-        private SparseVectorValueFetcher(String fieldName) {
-            this.fieldName = fieldName;
-        }
-
-        @Override
-        public void setNextReader(LeafReaderContext context) {
-            try {
-                termVectors = context.reader().termVectors();
-            } catch (IOException exc) {
-                throw new UncheckedIOException(exc);
-            }
-        }
-
-        @Override
-        public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValues) throws IOException {
-            if (termVectors == null) {
-                return List.of();
-            }
-            var terms = termVectors.get(doc, fieldName);
-            if (terms == null) {
-                return List.of();
-            }
-
-            var termsEnum = terms.iterator();
-            PostingsEnum postingsScratch = null;
-            Map<String, Float> result = new LinkedHashMap<>();
-            while (termsEnum.next() != null) {
-                postingsScratch = termsEnum.postings(postingsScratch);
-                postingsScratch.nextDoc();
-                result.put(termsEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(postingsScratch.freq()));
-                assert postingsScratch.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
-            }
-            return List.of(result);
-        }
-
-        @Override
-        public StoredFieldsSpec storedFieldsSpec() {
-            return StoredFieldsSpec.NO_REQUIREMENTS;
-        }
-    }
-
     private static class SparseVectorSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader {
         private final String fullPath;
         private final String leafName;

+ 43 - 12
server/src/main/java/org/elasticsearch/index/translog/Translog.java

@@ -22,6 +22,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.lucene.uid.Versions;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.Releasable;
@@ -35,6 +36,8 @@ import org.elasticsearch.index.seqno.SequenceNumbers;
 import org.elasticsearch.index.shard.AbstractIndexShardComponent;
 import org.elasticsearch.index.shard.IndexShardComponent;
 import org.elasticsearch.index.shard.ShardId;
+import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
 
 import java.io.Closeable;
 import java.io.EOFException;
@@ -1226,9 +1229,9 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
         @Override
         public long estimateSize() {
             return (2 * id.length()) + source.length() + (routing != null ? 2 * routing.length() : 0) + (4 * Long.BYTES); // timestamp,
-                                                                                                                          // seq_no,
-                                                                                                                          // primary_term,
-                                                                                                                          // and version
+            // seq_no,
+            // primary_term,
+            // and version
         }
 
         public String id() {
@@ -1275,7 +1278,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
             }
 
             Index other = (Index) o;
-            return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other);
+            return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other, true);
         }
 
         @Override
@@ -1311,15 +1314,43 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
             return autoGeneratedIdTimestamp;
         }
 
-        public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2) {
-            return o1.version == o2.version
-                && o1.seqNo == o2.seqNo
-                && o1.primaryTerm == o2.primaryTerm
-                && o1.id.equals(o2.id)
-                && o1.source.equals(o2.source)
-                && Objects.equals(o1.routing, o2.routing);
-        }
+        public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2, boolean checkSourceBytes) {
+            if (o1.version != o2.version
+                || o1.seqNo != o2.seqNo
+                || o1.primaryTerm != o2.primaryTerm
+                || o1.id.equals(o2.id) == false
+                || Objects.equals(o1.routing, o2.routing) == false) {
+                return false;
+            }
+
+            if (checkSourceBytes) {
+                return o1.source.equals(o2.source);
+            }
 
+            var s1 = Source.fromBytes(o1.source);
+            var s2 = Source.fromBytes(o2.source);
+            try (
+                var actualParser = XContentHelper.createParserNotCompressed(
+                    XContentParserConfiguration.EMPTY,
+                    s1.internalSourceRef(),
+                    s1.sourceContentType()
+                )
+            ) {
+                var actualMap = actualParser.map();
+                try (
+                    var expectedParser = XContentHelper.createParserNotCompressed(
+                        XContentParserConfiguration.EMPTY,
+                        s2.internalSourceRef(),
+                        s2.sourceContentType()
+                    )
+                ) {
+                    var expectedMap = expectedParser.map();
+                    return expectedMap.equals(actualMap);
+                }
+            } catch (IOException exc) {
+                return false;
+            }
+        }
     }
 
     public static final class Delete extends Operation {

+ 2 - 6
server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

@@ -12,8 +12,6 @@ package org.elasticsearch.rest.action.search;
 import java.util.HashSet;
 import java.util.Set;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
-
 /**
  * A {@link Set} of "capabilities" supported by the {@link RestSearchAction}.
  */
@@ -55,10 +53,10 @@ public final class SearchCapabilities {
     private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param";
     private static final String DENSE_VECTOR_UPDATABLE_BBQ = "dense_vector_updatable_bbq";
     private static final String FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV = "field_exists_query_for_text_fields_no_index_or_dv";
-    private static final String SYNTHETIC_VECTORS_SETTING = "synthetic_vectors_setting";
     private static final String UPDATE_FIELD_TO_BBQ_DISK = "update_field_to_bbq_disk";
     private static final String KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY = "knn_filter_on_nested_fields";
     private static final String BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR = "bucket_script_parent_multi_bucket_error";
+    private static final String EXCLUDE_SOURCE_VECTORS_SETTING = "exclude_source_vectors_setting";
 
     public static final Set<String> CAPABILITIES;
     static {
@@ -86,9 +84,7 @@ public final class SearchCapabilities {
         capabilities.add(UPDATE_FIELD_TO_BBQ_DISK);
         capabilities.add(KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY);
         capabilities.add(BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR);
-        if (SYNTHETIC_VECTORS) {
-            capabilities.add(SYNTHETIC_VECTORS_SETTING);
-        }
+        capabilities.add(EXCLUDE_SOURCE_VECTORS_SETTING);
         CAPABILITIES = Set.copyOf(capabilities);
     }
 }

+ 1 - 16
server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

@@ -2479,22 +2479,7 @@ public class DenseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase
         DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) ft;
         return switch (vectorFieldType.getElementType()) {
             case BYTE -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions());
-            case FLOAT -> {
-                float[] floats = new float[vectorFieldType.getVectorDimensions()];
-                float magnitude = 0;
-                for (int i = 0; i < floats.length; i++) {
-                    float f = randomFloat();
-                    floats[i] = f;
-                    magnitude += f * f;
-                }
-                magnitude = (float) Math.sqrt(magnitude);
-                if (VectorSimilarity.DOT_PRODUCT.equals(vectorFieldType.getSimilarity())) {
-                    for (int i = 0; i < floats.length; i++) {
-                        floats[i] /= magnitude;
-                    }
-                }
-                yield floats;
-            }
+            case FLOAT -> randomNormalizedVector(vectorFieldType.getVectorDimensions());
             case BIT -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8);
         };
     }

+ 4 - 11
server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

@@ -118,7 +118,7 @@ public class SparseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase
 
     protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException {
         b.field("type", "sparse_vector");
-        b.field("store", false);
+        b.field("store", true);
 
         b.startObject("meta");
         b.endObject();
@@ -421,7 +421,7 @@ public class SparseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase
             .startObject("properties")
             .startObject("field")
             .field("type", "sparse_vector")
-            .field("store", true)
+            .field("store", false)
             .endObject()
             .endObject()
             .endObject()
@@ -474,23 +474,16 @@ public class SparseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) {
-        boolean withStore = randomBoolean();
         return new SyntheticSourceSupport() {
             @Override
             public boolean preservesExactSource() {
-                return withStore == false;
+                return false;
             }
 
             @Override
             public SyntheticSourceExample example(int maxValues) {
                 var sample = getSampleValueForDocument();
-                return new SyntheticSourceExample(sample, sample, b -> {
-                    if (withStore) {
-                        minimalStoreMapping(b);
-                    } else {
-                        minimalMapping(b);
-                    }
-                });
+                return new SyntheticSourceExample(sample, sample, b -> { minimalMapping(b); });
             }
 
             @Override

+ 1 - 3
server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java

@@ -38,7 +38,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.hamcrest.Matchers.equalTo;
 
 public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase {
@@ -69,7 +68,7 @@ public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase {
             builder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.SYNTHETIC.name());
             builder.put(IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE_SETTING.getKey(), useSyntheticRecovery);
         }
-        builder.put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true);
+        builder.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true);
         return builder.build();
     }
 
@@ -113,7 +112,6 @@ public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase {
     }
 
     public void testSnapshotRecovery() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         List<Translog.Index> expectedOperations = new ArrayList<>();
         int size = randomIntBetween(10, 50);
         for (int i = 0; i < size; i++) {

+ 1 - 8
server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java

@@ -26,12 +26,10 @@ import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
 
 public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     public void testSyntheticVectorsMinimalValidDocument() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDoc(type, true, true, false, false, false);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -39,7 +37,6 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     public void testSyntheticVectorsFullDocument() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDoc(type, true, true, true, true, false);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -47,7 +44,6 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     public void testSyntheticVectorsWithUnmappedFields() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDoc(type, true, true, true, true, true);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -55,7 +51,6 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     public void testSyntheticVectorsMissingRootFields() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDoc(type, false, false, false, false, false);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -63,7 +58,6 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     public void testSyntheticVectorsPartialNestedContent() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDoc(type, true, true, true, false, false);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -71,7 +65,6 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     public void testFlatPathDocument() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         for (XContentType type : XContentType.values()) {
             BytesReference source = generateRandomDocWithFlatPath(type);
             assertSyntheticVectors(buildVectorMapping(), source, type);
@@ -248,7 +241,7 @@ public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase {
     }
 
     private void assertSyntheticVectors(String mapping, BytesReference source, XContentType xContentType) throws IOException {
-        var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build();
+        var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build();
         MapperService mapperService = createMapperService(settings, mapping);
         var parsedDoc = mapperService.documentMapper().parse(new SourceToParse("0", source, xContentType));
         try (var directory = newDirectory()) {

+ 11 - 4
server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java

@@ -36,7 +36,6 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.function.LongSupplier;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;
 import static org.hamcrest.Matchers.equalTo;
@@ -138,11 +137,19 @@ public class ShardGetServiceTests extends IndexShardTestCase {
                 "foo": "foo"
             }
             """, Arrays.toString(vector));
-        runGetFromTranslogWithOptions(docToIndex, "\"enabled\": true", null, docToIndex, "\"text\"", "foo", "\"dense_vector\"", false);
+        runGetFromTranslogWithOptions(
+            docToIndex,
+            "\"enabled\": true",
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), false).build(),
+            docToIndex,
+            "\"text\"",
+            "foo",
+            "\"dense_vector\"",
+            false
+        );
     }
 
     public void testGetFromTranslogWithSyntheticVector() throws IOException {
-        assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS);
         float[] vector = new float[2048];
         for (int i = 0; i < vector.length; i++) {
             vector[i] = randomByte();
@@ -156,7 +163,7 @@ public class ShardGetServiceTests extends IndexShardTestCase {
         runGetFromTranslogWithOptions(
             docToIndex,
             "\"enabled\": true",
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(),
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build(),
             docToIndex,
             "\"text\"",
             "foo",

+ 2 - 0
x-pack/plugin/build.gradle

@@ -139,6 +139,8 @@ tasks.named("yamlRestCompatTestTransform").configure({ task ->
   task.skipTest("esql/192_lookup_join_on_aliases/alias-pattern-multiple", "Error message changed")
   task.skipTest("esql/192_lookup_join_on_aliases/fails when alias or pattern resolves to multiple", "Error message changed")
   task.skipTest("esql/10_basic/Test wrong LIMIT parameter", "Error message changed")
+  task.skipTest("ml/sparse_vector_search/Search on a sparse_vector field with dots in the field names", "Vectors are no longer returned by default")
+  task.skipTest("ml/sparse_vector_search/Search on a nested sparse_vector field with dots in the field names and conflicting child fields", "Vectors are no longer returned by default")
   task.skipTest("esql/190_lookup_join/lookup-no-key-only-key", "Requires the fix")
 })
 

+ 1 - 1
x-pack/plugin/ml/qa/ml-with-security/build.gradle

@@ -8,7 +8,7 @@ dependencies {
 // bring in machine learning rest test suite
 restResources {
   restApi {
-    include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat'
+    include '_common', 'capabilities', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat'
   }
   restTests {
     includeXpack 'ml'

+ 30 - 1
x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java

@@ -21,6 +21,7 @@ import java.util.Map;
 
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasSize;
 
 /**
@@ -100,6 +101,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
 
     private static final String TOP_LEVEL_KNN_TEMPLATE = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "knn": {
               "field": "%s",
               "k": 5,
@@ -114,6 +118,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
         }""";
     private static final String TOP_LEVEL_KNN_FILTER_TEMPLATE = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "knn": {
               "field": "%s",
               "k": 5,
@@ -129,6 +136,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
         }""";
     private static final String TOP_LEVEL_KNN_HYBRID_ALL = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "knn": {
               "field": "embedding",
               "k": 3,
@@ -146,6 +156,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
         }""";
     private static final String TOP_LEVEL_KNN_HYBRID_MATCH = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "knn": {
               "field": "embedding",
               "k": 3,
@@ -163,6 +176,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
 
     private static final String QUERY_DSL_KNN_TEMPLATE = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "query": {
               "knn" : {
                   "field": "%s",
@@ -178,6 +194,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
         }""";
     private static final String QUERY_DSL_KNN_FILTER_TEMPLATE = """
         {
+          "_source": {
+            "exclude_vectors": false
+          },
           "query": {
               "knn" : {
                   "field": "%s",
@@ -194,6 +213,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
         }""";
     private static final String QUERY_DSL_KNN_HYBRID_ALL = """
         {
+             "_source": {
+                "exclude_vectors": false
+             },
              "query": {
                  "bool": {
                      "should": [
@@ -220,6 +242,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
          }""";
     private static final String QUERY_DSL_KNN_HYBRID_MATCH = """
         {
+             "_source": {
+                "exclude_vectors": false
+             },
              "query": {
                  "bool": {
                      "should": [
@@ -554,7 +579,11 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase {
             // The top hit should have the search prefix
             assertEquals(searchPrefix + "my words", sourceText);
             List<Double> foundEmbedding = (List<Double>) MapHelper.dig("_source.embedding", topHit);
-            assertEquals(embeddings.get(0), foundEmbedding);
+            var expectedEmbeddings = embeddings.get(0);
+            assertThat(foundEmbedding.size(), equalTo(expectedEmbeddings.size()));
+            for (int i = 0; i < foundEmbedding.size(); i++) {
+                assertEquals(expectedEmbeddings.get(i), foundEmbedding.get(i), 0.01f);
+            }
         }
     }
 

+ 3 - 0
x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java

@@ -276,6 +276,9 @@ public class TextExpansionQueryIT extends PyTorchModelRestTestCase {
 
         request.setJsonEntity(Strings.format("""
             {
+                "_source": {
+                    "exclude_vectors": false
+                },
                 "query": {
                   "text_expansion": {
                     "%s": {

+ 9 - 3
x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml

@@ -980,19 +980,25 @@ setup:
 
 ---
 "rrf retriever with inner_hits for sub-retriever":
+  - skip:
+      features: [ "headers" ]
   - requires:
       capabilities:
         - method: POST
           path: /_search
-          capabilities: [ nested_retriever_inner_hits_support ]
+          capabilities: [ nested_retriever_inner_hits_support, exclude_source_vectors_setting ]
       test_runner_features: capabilities
       reason: "Support for propagating nested retrievers' inner hits to the top-level compound retriever is required"
 
   - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the floating-point score as a double
+        Content-Type: application/json
       search:
-        _source: false
         index: test
         body:
+          _source:
+            exclude_vectors: false
           retriever:
             rrf:
               retrievers: [
@@ -1058,7 +1064,7 @@ setup:
   - match: { hits.hits.0.inner_hits.nested_data_field.hits.total.value: 1 }
   - match: { hits.hits.0.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: foo }
   - match: { hits.hits.0.inner_hits.nested_vector_field.hits.total.value: 1 }
-  - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1 ] }
+  - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1.0 ] }
 
   - match: { hits.hits.1.inner_hits.nested_data_field.hits.total.value: 3 }
   - match: { hits.hits.1.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: bar }

+ 2 - 2
x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java

@@ -20,7 +20,7 @@ import org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper;
 
 import java.util.Map;
 
-import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING;
+import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING;
 import static org.elasticsearch.index.mapper.FieldMapper.notInMultiFields;
 import static org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper.CONTENT_TYPE;
 
@@ -41,7 +41,7 @@ public class RankVectorsPlugin extends Plugin implements MapperPlugin {
                 n,
                 c.indexVersionCreated(),
                 getLicenseState(),
-                INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
+                INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
             );
         }, notInMultiFields(CONTENT_TYPE)));
     }

+ 10 - 10
x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java

@@ -113,13 +113,13 @@ public class RankVectorsFieldMapper extends FieldMapper {
 
         private final IndexVersion indexCreatedVersion;
         private final XPackLicenseState licenseState;
-        private final boolean isSyntheticVector;
+        private final boolean isExcludeSourceVectors;
 
-        public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isSyntheticVector) {
+        public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isExcludeSourceVectors) {
             super(name);
             this.indexCreatedVersion = indexCreatedVersion;
             this.licenseState = licenseState;
-            this.isSyntheticVector = isSyntheticVector;
+            this.isExcludeSourceVectors = isExcludeSourceVectors;
         }
 
         public Builder dimensions(int dimensions) {
@@ -141,7 +141,7 @@ public class RankVectorsFieldMapper extends FieldMapper {
             // Validate again here because the dimensions or element type could have been set programmatically,
             // which affects index option validity
             validate();
-            boolean isSyntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector;
+            boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && isExcludeSourceVectors;
             return new RankVectorsFieldMapper(
                 leafName(),
                 new RankVectorsFieldType(
@@ -154,7 +154,7 @@ public class RankVectorsFieldMapper extends FieldMapper {
                 builderParams(this, context),
                 indexCreatedVersion,
                 licenseState,
-                isSyntheticVectorFinal
+                isExcludeSourceVectorsFinal
             );
         }
     }
@@ -252,7 +252,7 @@ public class RankVectorsFieldMapper extends FieldMapper {
 
     private final IndexVersion indexCreatedVersion;
     private final XPackLicenseState licenseState;
-    private final boolean isSyntheticVector;
+    private final boolean isExcludeSourceVectors;
 
     private RankVectorsFieldMapper(
         String simpleName,
@@ -260,12 +260,12 @@ public class RankVectorsFieldMapper extends FieldMapper {
         BuilderParams params,
         IndexVersion indexCreatedVersion,
         XPackLicenseState licenseState,
-        boolean isSyntheticVector
+        boolean isExcludeSourceVectors
     ) {
         super(simpleName, fieldType, params);
         this.indexCreatedVersion = indexCreatedVersion;
         this.licenseState = licenseState;
-        this.isSyntheticVector = isSyntheticVector;
+        this.isExcludeSourceVectors = isExcludeSourceVectors;
     }
 
     @Override
@@ -396,7 +396,7 @@ public class RankVectorsFieldMapper extends FieldMapper {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName(), indexCreatedVersion, licenseState, isSyntheticVector).init(this);
+        return new Builder(leafName(), indexCreatedVersion, licenseState, isExcludeSourceVectors).init(this);
     }
 
     @Override
@@ -406,7 +406,7 @@ public class RankVectorsFieldMapper extends FieldMapper {
 
     @Override
     public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() {
-        if (isSyntheticVector) {
+        if (isExcludeSourceVectors) {
             var syntheticField = new DocValuesSyntheticFieldLoader();
             return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorsAsList);
         }

+ 19 - 10
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml

@@ -372,6 +372,11 @@ teardown:
   - requires:
       cluster_features: [ "gte_v8.16.0" ]
       reason: dots in field names allowed starting in in 8.16.0
+      test_runner_features: [ capabilities, "close_to" ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_source_vectors_setting ]
 
   - do:
       indices.create:
@@ -409,17 +414,14 @@ teardown:
       get:
         index: index-with-sparse-vector2
         id: "has-dots"
+        _source_exclude_vectors: false
 
-  - match:
-      _source:
-        ml:
-          tokens:
-            running: 2.4097164
-            good: 2.170997
-            run: 2.052153
-            race: 1.4575411
-            for: 1.1908325
-            5.0k: 2.489943
+  - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } }
+  - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } }
+  - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } }
+  - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } }
+  - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } }
+  - close_to: { _source.ml.tokens.5\\.0k: { value: 2.489943, error: 0.01 } }
 
   - do:
       search:
@@ -439,6 +441,11 @@ teardown:
   - requires:
       cluster_features: [ "gte_v8.16.0" ]
       reason: dots in field names allowed starting in in 8.16.0
+      test_runner_features: [ capabilities, "close_to" ]
+      capabilities:
+        - method: GET
+          path: /_search
+          capabilities: [ exclude_source_vectors_setting ]
 
   - do:
       indices.create:
@@ -479,6 +486,7 @@ teardown:
       get:
         index: index-with-sparse-vector3
         id: "parent-foo"
+        _source_exclude_vectors: false
 
   - match:
       _source:
@@ -491,6 +499,7 @@ teardown:
       get:
         index: index-with-sparse-vector3
         id: "parent-foo-bar"
+        _source_exclude_vectors: false
 
   - match:
       _source:

+ 1 - 3
x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml

@@ -6,7 +6,7 @@ setup:
       capabilities:
         - method: GET
           path: /_search
-          capabilities: [ synthetic_vectors_setting ]
+          capabilities: [ exclude_source_vectors_setting ]
   - skip:
       features: "headers"
 
@@ -14,8 +14,6 @@ setup:
       indices.create:
         index: test
         body:
-          settings:
-            index.mapping.synthetic_vectors: true
           mappings:
             properties:
               name: