Browse Source

Allow including semantic field embeddings in _source (#134717)

Adds support for returning `_inference_fields` (embeddings for `semantic_text` fields)
as part of `_source` when `_source.exclude_vectors` is explicitly set to `false`.
This enables use cases like reindexing documents without recomputing embeddings.
By default, embeddings remain excluded.
Jim Ferenczi 3 weeks ago
parent
commit
8933c15cc6

+ 5 - 0
docs/changelog/134717.yaml

@@ -0,0 +1,5 @@
+pr: 134717
+summary: Allow including semantic field embeddings in `_source`
+area: Vector Search
+type: enhancement
+issues: []

+ 89 - 0
docs/reference/elasticsearch/mapping-reference/semantic-text.md

@@ -413,6 +413,95 @@ If you want to avoid unnecessary inference and keep existing embeddings:
     * Use **partial updates through the Bulk API**.
     * Omit any `semantic_text` fields that did not change from the `doc` object in your request.
 
+## Returning semantic field embeddings in `_source`
+
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, the embeddings generated for `semantic_text` fields are stored internally and **not included in `_source`** when retrieving documents.
+
+To include the full inference fields, including their embeddings, in `_source`, set the `_source.exclude_vectors` option to `false`.
+This works with the
+[Get](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-get),
+[Search](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search),
+and
+[Reindex](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-reindex)
+APIs.
+
+```console
+POST my-index/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  },
+  "query": {
+    "match_all": {}
+  }
+}
+```
+
+The embeddings will appear under `_inference_fields` in `_source`.
+
+**Use cases**
+Including embeddings in `_source` is useful when you want to:
+
+* Reindex documents into another index **with the same `inference_id`** without re-running inference.
+* Export or migrate documents while preserving their embeddings.
+* Inspect or debug the raw embeddings generated for your content.
+
+### Example: Reindex while preserving embeddings
+
+```console
+POST _reindex
+{
+  "source": {
+    "index": "my-index-src",
+    "_source": {
+      "exclude_vectors": false            <1>
+    }
+  },
+  "dest": {
+    "index": "my-index-dest"
+  }
+}
+```
+
+1. Sends the source documents with their stored embeddings to the destination index.
+
+::::{warning}
+If the target index’s `semantic_text` field does **not** use the **same `inference_id`** as the source index,
+the documents will **fail the reindex task**.
+Matching `inference_id` values are required to reuse the existing embeddings.
+::::
+
+This allows documents to be re-indexed without triggering inference again, **as long as the target `semantic_text` field uses the same `inference_id` as the source**.
+
+::::{note}
+**For versions prior to 9.2.0**
+
+Older versions do not support the `exclude_vectors` option to retrieve the embeddings of the semantic text fields.
+To return the `_inference_fields`, use the `fields` option in a search request instead:
+
+```console
+POST test-index/_search
+{
+  "query": {
+    "match": {
+      "my_semantic_field": "Which country is Paris in?"
+    }
+  },
+  "fields": [
+    "_inference_fields"
+  ]
+}
+```
+
+This returns the chunked embeddings used for semantic search under `_inference_fields` in `_source`.
+Note that the `fields` option is **not** available for the Reindex API.
+::::
+
 ## Customizing `semantic_text` indexing [custom-indexing]
 
 `semantic_text` uses defaults for indexing data based on the {{infer}} endpoint

+ 1 - 1
modules/reindex/src/main/java/org/elasticsearch/reindex/AbstractAsyncBulkByScrollAction.java

@@ -214,7 +214,7 @@ public abstract class AbstractAsyncBulkByScrollAction<
             // always include vectors in the response unless explicitly set
             var fetchSource = sourceBuilder.fetchSource();
             if (fetchSource == null) {
-                sourceBuilder.fetchSource(FetchSourceContext.FETCH_ALL_SOURCE);
+                sourceBuilder.fetchSource(FetchSourceContext.FETCH_ALL_SOURCE_EXCLUDE_INFERENCE_FIELDS);
             } else if (fetchSource.excludeVectors() == null) {
                 sourceBuilder.excludeVectors(false);
             }

+ 1 - 0
server/src/main/java/org/elasticsearch/TransportVersions.java

@@ -330,6 +330,7 @@ public class TransportVersions {
     public static final TransportVersion NEW_SEMANTIC_QUERY_INTERCEPTORS = def(9_162_0_00);
     public static final TransportVersion ESQL_LOOKUP_JOIN_ON_EXPRESSION = def(9_163_0_00);
     public static final TransportVersion INFERENCE_REQUEST_ADAPTIVE_RATE_LIMITING_REMOVED = def(9_164_0_00);
+    public static final TransportVersion SEARCH_SOURCE_EXCLUDE_INFERENCE_FIELDS_PARAM = def(9_165_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,

+ 8 - 14
server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java

@@ -49,11 +49,9 @@ import org.elasticsearch.index.engine.Engine;
 import org.elasticsearch.index.engine.VersionConflictEngineException;
 import org.elasticsearch.index.get.GetResult;
 import org.elasticsearch.index.mapper.DocumentMapper;
-import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper;
 import org.elasticsearch.index.mapper.MapperException;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MappingLookup;
-import org.elasticsearch.index.mapper.RoutingFieldMapper;
 import org.elasticsearch.index.mapper.SourceToParse;
 import org.elasticsearch.index.seqno.SequenceNumbers;
 import org.elasticsearch.index.shard.IndexShard;
@@ -65,6 +63,7 @@ import org.elasticsearch.injection.guice.Inject;
 import org.elasticsearch.node.NodeClosedException;
 import org.elasticsearch.plugins.internal.DocumentParsingProvider;
 import org.elasticsearch.plugins.internal.XContentMeteringParserDecorator;
+import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportRequestOptions;
 import org.elasticsearch.transport.TransportService;
@@ -366,8 +365,13 @@ public class TransportShardBulkAction extends TransportWriteAction<BulkShardRequ
         if (opType == DocWriteRequest.OpType.UPDATE) {
             final UpdateRequest updateRequest = (UpdateRequest) context.getCurrent();
             try {
-                var gFields = getStoredFieldsSpec(context.getPrimary());
-                updateResult = updateHelper.prepare(updateRequest, context.getPrimary(), nowInMillisSupplier, gFields);
+                updateResult = updateHelper.prepare(
+                    updateRequest,
+                    context.getPrimary(),
+                    nowInMillisSupplier,
+                    // Include inference fields so that partial updates can still retrieve embeddings for fields that weren't updated.
+                    FetchSourceContext.FETCH_ALL_SOURCE
+                );
             } catch (Exception failure) {
                 // we may fail translating a update to index or delete operation
                 // we use index result to communicate failure while translating update request
@@ -443,16 +447,6 @@ public class TransportShardBulkAction extends TransportWriteAction<BulkShardRequ
         return true;
     }
 
-    private static String[] getStoredFieldsSpec(IndexShard indexShard) {
-        if (InferenceMetadataFieldsMapper.isEnabled(indexShard.mapperService().mappingLookup())) {
-            if (indexShard.mapperService().mappingLookup().inferenceFields().size() > 0) {
-                // Retrieves the inference metadata field containing the inference results for all semantic fields defined in the mapping.
-                return new String[] { RoutingFieldMapper.NAME, InferenceMetadataFieldsMapper.NAME };
-            }
-        }
-        return new String[] { RoutingFieldMapper.NAME };
-    }
-
     private static boolean handleMappingUpdateRequired(
         BulkPrimaryExecutionContext context,
         MappingUpdatePerformer mappingUpdater,

+ 9 - 1
server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java

@@ -54,6 +54,7 @@ import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.indices.IndicesService;
 import org.elasticsearch.injection.guice.Inject;
 import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.threadpool.ThreadPool.Names;
@@ -215,7 +216,14 @@ public class TransportUpdateAction extends TransportInstanceSingleOperationActio
             assert ThreadPool.assertCurrentThreadPool(Names.SYSTEM_WRITE, Names.WRITE);
             return deleteInferenceResults(
                 request,
-                updateHelper.prepare(request, indexShard, threadPool::absoluteTimeInMillis), // Gets the doc using the engine
+                // Gets the doc using the engine
+                updateHelper.prepare(
+                    request,
+                    indexShard,
+                    threadPool::absoluteTimeInMillis,
+                    // Exclude inference fields to ensure embeddings are recomputed.
+                    FetchSourceContext.FETCH_ALL_SOURCE_EXCLUDE_INFERENCE_FIELDS
+                ),
                 indexService.getMetadata(),
                 mappingLookup
             );

+ 5 - 10
server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java

@@ -33,6 +33,7 @@ import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.script.UpdateCtxMap;
 import org.elasticsearch.script.UpdateScript;
 import org.elasticsearch.script.UpsertCtxMap;
+import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.search.lookup.Source;
 import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.xcontent.XContentType;
@@ -58,16 +59,10 @@ public class UpdateHelper {
     /**
      * Prepares an update request by converting it into an index or delete request or an update response (no action).
      */
-    public Result prepare(UpdateRequest request, IndexShard indexShard, LongSupplier nowInMillis) throws IOException {
-        // TODO: Don't hard-code gFields
-        return prepare(request, indexShard, nowInMillis, new String[] { RoutingFieldMapper.NAME });
-    }
-
-    /**
-     * Prepares an update request by converting it into an index or delete request or an update response (no action).
-     */
-    public Result prepare(UpdateRequest request, IndexShard indexShard, LongSupplier nowInMillis, String[] gFields) throws IOException {
-        final GetResult getResult = indexShard.getService().getForUpdate(request.id(), request.ifSeqNo(), request.ifPrimaryTerm(), gFields);
+    public Result prepare(UpdateRequest request, IndexShard indexShard, LongSupplier nowInMillis, FetchSourceContext fetchSourceContext)
+        throws IOException {
+        final GetResult getResult = indexShard.getService()
+            .getForUpdate(request.id(), request.ifSeqNo(), request.ifPrimaryTerm(), fetchSourceContext);
         return prepare(indexShard, request, getResult, nowInMillis);
     }
 

+ 30 - 21
server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

@@ -58,7 +58,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
-import java.util.stream.Collectors;
 
 import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
@@ -217,16 +216,16 @@ public final class ShardGetService extends AbstractIndexShardComponent {
         );
     }
 
-    public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, String[] gFields) throws IOException {
+    public GetResult getForUpdate(String id, long ifSeqNo, long ifPrimaryTerm, FetchSourceContext fetchSourceContext) throws IOException {
         return doGet(
             id,
-            gFields,
+            new String[] { RoutingFieldMapper.NAME },
             true,
             Versions.MATCH_ANY,
             VersionType.INTERNAL,
             ifSeqNo,
             ifPrimaryTerm,
-            FetchSourceContext.FETCH_ALL_SOURCE,
+            fetchSourceContext,
             false,
             indexShard::get
         );
@@ -290,14 +289,8 @@ public final class ShardGetService extends AbstractIndexShardComponent {
         // check first if stored fields to be loaded don't contain an object field
         MappingLookup mappingLookup = mapperService.mappingLookup();
         final Set<String> storedFieldSet = new HashSet<>();
-        boolean hasInferenceMetadataFields = false;
         if (storedFields != null) {
             for (String field : storedFields) {
-                if (field.equals(InferenceMetadataFieldsMapper.NAME)
-                    && InferenceMetadataFieldsMapper.isEnabled(indexShard.mapperService().mappingLookup())) {
-                    hasInferenceMetadataFields = true;
-                    continue;
-                }
                 Mapper fieldMapper = mappingLookup.getMapper(field);
                 if (fieldMapper == null) {
                     if (mappingLookup.objectMappers().get(field) != null) {
@@ -313,10 +306,16 @@ public final class ShardGetService extends AbstractIndexShardComponent {
         Map<String, DocumentField> metadataFields = null;
         DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
 
-        var res = maybeExcludeSyntheticVectorFields(mappingLookup, indexSettings, fetchSourceContext, null);
+        var res = maybeExcludeVectorFields(mappingLookup, indexSettings, fetchSourceContext, null);
         if (res.v1() != fetchSourceContext) {
             fetchSourceContext = res.v1();
         }
+
+        if (mappingLookup.inferenceFields().isEmpty() == false
+            && shouldExcludeInferenceFieldsFromSource(indexSettings, fetchSourceContext) == false) {
+            storedFieldSet.add(InferenceMetadataFieldsMapper.NAME);
+        }
+
         var sourceFilter = res.v2();
         SourceLoader loader = forceSyntheticSource
             ? new SourceLoader.Synthetic(
@@ -389,7 +388,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
                 source = source.filter(filter);
             }
 
-            if (hasInferenceMetadataFields) {
+            if (storedFieldSet.contains(InferenceMetadataFieldsMapper.NAME)) {
                 /**
                  * Adds the {@link InferenceMetadataFieldsMapper#NAME} field from the document fields
                  * to the original _source if it has been requested.
@@ -417,10 +416,21 @@ public final class ShardGetService extends AbstractIndexShardComponent {
      * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise.
      */
     public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) {
-        if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) {
-            return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings());
-        }
-        return fetchSourceContext.excludeVectors();
+        var explicit = shouldExcludeVectorsFromSourceExplicit(fetchSourceContext);
+        return explicit != null ? explicit : INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings());
+    }
+
+    private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext fetchSourceContext) {
+        return fetchSourceContext != null ? fetchSourceContext.excludeVectors() : null;
+    }
+
+    public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) {
+        var explicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext);
+        return explicit != null ? explicit : INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings());
+    }
+
+    private static Boolean shouldExcludeInferenceFieldsFromSourceExplicit(FetchSourceContext fetchSourceContext) {
+        return fetchSourceContext != null ? fetchSourceContext.excludeInferenceFields() : null;
     }
 
     /**
@@ -428,7 +438,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
      * unless vectors are explicitly requested to be included in the source.
      * Returns {@code null} when vectors should not be filtered out.
      */
-    public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeSyntheticVectorFields(
+    public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeVectorFields(
         MappingLookup mappingLookup,
         IndexSettings indexSettings,
         FetchSourceContext fetchSourceContext,
@@ -457,7 +467,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
             }
             // Exclude vectors from semantic text fields, as they are processed separately
             return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false;
-        }).map(f -> f.name()).collect(Collectors.toList());
+        }).map(MappedFieldType::name).toList();
 
         var sourceFilter = excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new));
         if (lateExcludes.size() > 0) {
@@ -466,15 +476,14 @@ public final class ShardGetService extends AbstractIndexShardComponent {
              * This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}.
              */
             if (fetchSourceContext != null && fetchSourceContext.excludes() != null) {
-                for (var exclude : fetchSourceContext.excludes()) {
-                    lateExcludes.add(exclude);
-                }
+                lateExcludes.addAll(Arrays.asList(fetchSourceContext.excludes()));
             }
             var newFetchSourceContext = fetchSourceContext == null
                 ? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new))
                 : FetchSourceContext.of(
                     fetchSourceContext.fetchSource(),
                     fetchSourceContext.excludeVectors(),
+                    fetchSourceContext.excludeInferenceFields(),
                     fetchSourceContext.includes(),
                     lateExcludes.toArray(String[]::new)
                 );

+ 1 - 0
server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java

@@ -945,6 +945,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
         this.fetchSourceContext = FetchSourceContext.of(
             fetchSourceContext.fetchSource(),
             excludeVectors,
+            fetchSourceContext.excludeInferenceFields(),
             fetchSourceContext.includes(),
             fetchSourceContext.excludes()
         );

+ 20 - 2
server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java

@@ -17,6 +17,7 @@ import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
 import org.elasticsearch.index.mapper.IdLoader;
+import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper;
 import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.search.LeafNestedDocuments;
 import org.elasticsearch.search.NestedDocuments;
@@ -25,6 +26,8 @@ import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
+import org.elasticsearch.search.fetch.subphase.FetchFieldsContext;
+import org.elasticsearch.search.fetch.subphase.FieldAndFormat;
 import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
 import org.elasticsearch.search.fetch.subphase.InnerHitsPhase;
 import org.elasticsearch.search.internal.SearchContext;
@@ -46,7 +49,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.function.Supplier;
 
-import static org.elasticsearch.index.get.ShardGetService.maybeExcludeSyntheticVectorFields;
+import static org.elasticsearch.index.get.ShardGetService.maybeExcludeVectorFields;
+import static org.elasticsearch.index.get.ShardGetService.shouldExcludeInferenceFieldsFromSource;
 
 /**
  * Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified
@@ -113,12 +117,14 @@ public final class FetchPhase {
     }
 
     private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Profiler profiler, RankDocShardInfo rankDocs) {
+        var lookup = context.getSearchExecutionContext().getMappingLookup();
+
         // Optionally remove sparse and dense vector fields early to:
         // - Reduce the in-memory size of the source
         // - Speed up retrieval of the synthetic source
         // Note: These vectors will no longer be accessible via _source for any sub-fetch processors,
         // but they are typically accessed through doc values instead (e.g: re-scorer).
-        var res = maybeExcludeSyntheticVectorFields(
+        var res = maybeExcludeVectorFields(
             context.getSearchExecutionContext().getMappingLookup(),
             context.getSearchExecutionContext().getIndexSettings(),
             context.fetchSourceContext(),
@@ -127,6 +133,18 @@ public final class FetchPhase {
         if (context.fetchSourceContext() != res.v1()) {
             context.fetchSourceContext(res.v1());
         }
+
+        if (lookup.inferenceFields().isEmpty() == false
+            && shouldExcludeInferenceFieldsFromSource(context.indexShard().indexSettings(), context.fetchSourceContext()) == false) {
+            // Rehydrate the inference fields into the {@code _source} because they were explicitly requested.
+            var fetchFieldsContext = context.fetchFieldsContext();
+            if (fetchFieldsContext == null) {
+                fetchFieldsContext = new FetchFieldsContext(new ArrayList<>());
+            }
+            fetchFieldsContext.fields().add(new FieldAndFormat(InferenceMetadataFieldsMapper.NAME, null));
+            context.fetchFieldsContext(fetchFieldsContext);
+        }
+
         SourceLoader sourceLoader = context.newSourceLoader(res.v2());
         FetchContext fetchContext = new FetchContext(context, sourceLoader);
 

+ 66 - 8
server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java

@@ -33,6 +33,12 @@ import java.util.Objects;
 
 /**
  * Context used to fetch the {@code _source}.
+ *
+ * The {@code exclude_vectors} and {@code exclude_inference_fields} flags control whether vectors and inference fields metadata
+ * are rehydrated into the _source. By default, these are not included in the _source.
+ *
+ * The {@code exclude_inference_fields} flag defaults to the value of exclude_vectors at the REST layer, but it is exposed
+ * at the transport layer to allow internal APIs (such as reindex) to make more granular decisions.
  */
 public class FetchSourceContext implements Writeable, ToXContentObject {
 
@@ -40,12 +46,32 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
     public static final ParseField INCLUDES_FIELD = new ParseField("includes", "include");
     public static final ParseField EXCLUDES_FIELD = new ParseField("excludes", "exclude");
 
-    public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true, null, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
-    public static final FetchSourceContext FETCH_ALL_SOURCE = new FetchSourceContext(true, false, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
+    public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(
+        true,
+        null,
+        null,
+        Strings.EMPTY_ARRAY,
+        Strings.EMPTY_ARRAY
+    );
+    public static final FetchSourceContext FETCH_ALL_SOURCE = new FetchSourceContext(
+        true,
+        false,
+        false,
+        Strings.EMPTY_ARRAY,
+        Strings.EMPTY_ARRAY
+    );
+    public static final FetchSourceContext FETCH_ALL_SOURCE_EXCLUDE_INFERENCE_FIELDS = new FetchSourceContext(
+        true,
+        false,
+        true,
+        Strings.EMPTY_ARRAY,
+        Strings.EMPTY_ARRAY
+    );
 
     public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext(
         false,
         null,
+        null,
         Strings.EMPTY_ARRAY,
         Strings.EMPTY_ARRAY
     );
@@ -53,6 +79,7 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
     private final String[] includes;
     private final String[] excludes;
     private final Boolean excludeVectors;
+    private final Boolean excludeInferenceFields;
 
     public static FetchSourceContext of(boolean fetchSource) {
         return fetchSource ? FETCH_SOURCE : DO_NOT_FETCH_SOURCE;
@@ -68,15 +95,35 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
         @Nullable String[] includes,
         @Nullable String[] excludes
     ) {
-        if (excludeVectors == null && (includes == null || includes.length == 0) && (excludes == null || excludes.length == 0)) {
+        return of(fetchSource, excludeVectors, null, includes, excludes);
+    }
+
+    public static FetchSourceContext of(
+        boolean fetchSource,
+        Boolean excludeVectors,
+        Boolean excludeInferenceFields,
+        @Nullable String[] includes,
+        @Nullable String[] excludes
+    ) {
+        if (excludeVectors == null
+            && excludeInferenceFields == null
+            && (includes == null || includes.length == 0)
+            && (excludes == null || excludes.length == 0)) {
             return of(fetchSource);
         }
-        return new FetchSourceContext(fetchSource, excludeVectors, includes, excludes);
+        return new FetchSourceContext(fetchSource, excludeVectors, excludeInferenceFields, includes, excludes);
     }
 
-    private FetchSourceContext(boolean fetchSource, Boolean excludeVectors, @Nullable String[] includes, @Nullable String[] excludes) {
+    private FetchSourceContext(
+        boolean fetchSource,
+        @Nullable Boolean excludeVectors,
+        @Nullable Boolean excludeInferenceFields,
+        @Nullable String[] includes,
+        @Nullable String[] excludes
+    ) {
         this.fetchSource = fetchSource;
         this.excludeVectors = excludeVectors;
+        this.excludeInferenceFields = excludeInferenceFields;
         this.includes = includes == null ? Strings.EMPTY_ARRAY : includes;
         this.excludes = excludes == null ? Strings.EMPTY_ARRAY : excludes;
     }
@@ -84,9 +131,11 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
     public static FetchSourceContext readFrom(StreamInput in) throws IOException {
         final boolean fetchSource = in.readBoolean();
         final Boolean excludeVectors = isVersionCompatibleWithExcludeVectors(in.getTransportVersion()) ? in.readOptionalBoolean() : null;
+        final Boolean excludeInferenceFields = in.getTransportVersion()
+            .onOrAfter(TransportVersions.SEARCH_SOURCE_EXCLUDE_INFERENCE_FIELDS_PARAM) ? in.readOptionalBoolean() : null;
         final String[] includes = in.readStringArray();
         final String[] excludes = in.readStringArray();
-        return of(fetchSource, excludeVectors, includes, excludes);
+        return of(fetchSource, excludeVectors, excludeInferenceFields, includes, excludes);
     }
 
     @Override
@@ -95,6 +144,9 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
         if (isVersionCompatibleWithExcludeVectors(out.getTransportVersion())) {
             out.writeOptionalBoolean(excludeVectors);
         }
+        if (out.getTransportVersion().onOrAfter(TransportVersions.SEARCH_SOURCE_EXCLUDE_INFERENCE_FIELDS_PARAM)) {
+            out.writeOptionalBoolean(excludeInferenceFields);
+        }
         out.writeStringArray(includes);
         out.writeStringArray(excludes);
     }
@@ -112,6 +164,10 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
         return this.excludeVectors;
     }
 
+    public Boolean excludeInferenceFields() {
+        return this.excludeInferenceFields;
+    }
+
     public String[] includes() {
         return this.includes;
     }
@@ -251,7 +307,8 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
                 parser.getTokenLocation()
             );
         }
-        return FetchSourceContext.of(fetchSource, excludeVectors, includes, excludes);
+        // The exclude_inference_fields option is not exposed at the REST layer and defaults to the exclude_vectors value.
+        return FetchSourceContext.of(fetchSource, excludeVectors, excludeVectors, includes, excludes);
     }
 
     private static String[] parseStringArray(XContentParser parser, String currentFieldName) throws IOException {
@@ -298,6 +355,7 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
 
         if (fetchSource != that.fetchSource) return false;
         if (excludeVectors != that.excludeVectors) return false;
+        if (excludeInferenceFields != that.excludeInferenceFields) return false;
         if (Arrays.equals(excludes, that.excludes) == false) return false;
         if (Arrays.equals(includes, that.includes) == false) return false;
 
@@ -306,7 +364,7 @@ public class FetchSourceContext implements Writeable, ToXContentObject {
 
     @Override
     public int hashCode() {
-        int result = Objects.hash(fetchSource, excludeVectors);
+        int result = Objects.hash(fetchSource, excludeVectors, excludeInferenceFields);
         result = 31 * result + Arrays.hashCode(includes);
         result = 31 * result + Arrays.hashCode(excludes);
         return result;

+ 1 - 1
server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java

@@ -43,7 +43,7 @@ import static org.hamcrest.Matchers.equalTo;
 public class ShardGetServiceTests extends IndexShardTestCase {
 
     private GetResult getForUpdate(IndexShard indexShard, String id, long ifSeqNo, long ifPrimaryTerm) throws IOException {
-        return indexShard.getService().getForUpdate(id, ifSeqNo, ifPrimaryTerm, new String[] { RoutingFieldMapper.NAME });
+        return indexShard.getService().getForUpdate(id, ifSeqNo, ifPrimaryTerm, FetchSourceContext.FETCH_ALL_SOURCE);
     }
 
     public void testGetForUpdate() throws IOException {

+ 9 - 7
server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourceContextTests.java

@@ -33,9 +33,11 @@ public class FetchSourceContextTests extends AbstractXContentSerializingTestCase
 
     @Override
     protected FetchSourceContext createTestInstance() {
+        Boolean excludeVectors = randomBoolean() ? null : randomBoolean();
         return FetchSourceContext.of(
             true,
-            randomBoolean() ? null : randomBoolean(),
+            excludeVectors,
+            excludeVectors,
             randomArray(0, 5, String[]::new, () -> randomAlphaOfLength(5)),
             randomArray(0, 5, String[]::new, () -> randomAlphaOfLength(5))
         );
@@ -44,21 +46,21 @@ public class FetchSourceContextTests extends AbstractXContentSerializingTestCase
     @Override
     protected FetchSourceContext mutateInstance(FetchSourceContext instance) {
         return switch (randomInt(2)) {
-            case 0 -> FetchSourceContext.of(
-                true,
-                instance.excludeVectors() != null ? instance.excludeVectors() == false : randomBoolean(),
-                instance.includes(),
-                instance.excludes()
-            );
+            case 0 -> {
+                boolean excludeVectorsMod = instance.excludeVectors() != null ? instance.excludeVectors() == false : randomBoolean();
+                yield FetchSourceContext.of(true, excludeVectorsMod, excludeVectorsMod, instance.includes(), instance.excludes());
+            }
             case 1 -> FetchSourceContext.of(
                 true,
                 instance.excludeVectors(),
+                instance.excludeVectors(),
                 randomArray(instance.includes().length + 1, instance.includes().length + 5, String[]::new, () -> randomAlphaOfLength(5)),
                 instance.excludes()
             );
             case 2 -> FetchSourceContext.of(
                 true,
                 instance.excludeVectors(),
+                instance.excludeVectors(),
                 instance.includes(),
                 randomArray(instance.excludes().length + 1, instance.excludes().length + 5, String[]::new, () -> randomAlphaOfLength(5))
             );

+ 143 - 19
x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml

@@ -71,7 +71,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -105,7 +106,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -139,7 +141,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -173,7 +176,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -246,7 +250,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             nested:
               path: sparse_field.inference.chunks
@@ -293,7 +298,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             nested:
               path: dense_field.inference.chunks
@@ -364,9 +370,114 @@ setup:
         body:
           source:
             index: test-index
+            _source:
+              exclude_vectors: false
           dest:
             index: destination-index
 
+  - match: { created: 1 }
+  - match: { updated: 0 }
+  - match: { version_conflicts: 0 }
+  - match: { failures: [ ] }
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      get:
+        index: destination-index
+        id: doc_1
+
+  - match: { _source.sparse_field: "inference test" }
+  - match: { _source.dense_field: "another inference test" }
+  - match: { _source.non_inference_field: "non inference test" }
+
+  - do:
+      search:
+        index: destination-index
+        body:
+          _source:
+            exclude_vectors: false
+          query:
+            match_all: {}
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.total.relation: eq }
+
+  - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks: 1 }
+  - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field: 1 }
+  - exists: hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.embeddings
+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.start_offset: 0 }
+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.end_offset: 14 }
+
+  - length: { hits.hits.0._source._inference_fields.dense_field.inference.chunks: 1 }
+  - length: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field: 1 }
+  - exists: hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.embeddings
+  - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.start_offset: 0 }
+  - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.end_offset: 22 }
+
+---
+"Reindex recomputes embeddings by default":
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          sparse_field: "inference test"
+          dense_field: "another inference test"
+          non_inference_field: "non inference test"
+        refresh: true
+
+  - do:
+      indices.create:
+        index: destination-index
+        body:
+          settings:
+            index:
+              mapping:
+                semantic_text:
+                  use_legacy_format: false
+          mappings:
+            properties:
+              sparse_field:
+                type: semantic_text
+                inference_id: dense-inference-id
+              dense_field:
+                type: semantic_text
+                inference_id: sparse-inference-id
+              non_inference_field:
+                type: text
+
+  # reindex fails when preserving embeddings
+  - do:
+      catch: bad_request
+      reindex:
+        wait_for_completion: true
+        body:
+          source:
+            index: test-index
+            _source:
+              exclude_vectors: false
+          dest:
+            index: destination-index
+
+  - length: { failures: 1 }
+
+  # reindex succeeds when embeddings are recomputed
+  - do:
+      reindex:
+        wait_for_completion: true
+        body:
+          source:
+            index: test-index
+          dest:
+            index: destination-index
+
+  - match: { created: 1 }
+  - match: { updated: 0 }
+  - match: { version_conflicts: 0 }
+  - match: { failures: [ ] }
+
   - do:
       indices.refresh: { }
 
@@ -383,7 +494,8 @@ setup:
       search:
         index: destination-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -472,7 +584,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -518,7 +631,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -601,7 +715,8 @@ setup:
       search:
         index: mapping-update-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -636,7 +751,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -670,7 +786,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -721,7 +838,8 @@ setup:
       search:
         index: test-elser-2-default-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -782,7 +900,8 @@ setup:
       search:
         index: test-in-object-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: {}
 
@@ -997,7 +1116,8 @@ setup:
             semantic:
               field: sparse_field
               query: "inference test"
-          fields: [ "_inference_fields" ]
+          _source:
+            exclude_vectors: false
 
   - match: { hits.total.value: 1 }
   - match: { hits.total.relation: eq }
@@ -1032,7 +1152,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -1092,7 +1213,8 @@ setup:
       search:
         index: destination-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -1137,7 +1259,8 @@ setup:
       search:
         index: test-multi-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -1162,7 +1285,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 

+ 26 - 0
x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_bwc.yml

@@ -676,6 +676,32 @@ setup:
   - match: { hits.total.value: 1 }
   - not_exists: hits.hits.0._source._inference_fields
 
+---
+"Skip fetching _inference_fields with exclude_vectors":
+  - requires:
+      cluster_features: semantic_text.skip_inference_fields
+      reason: Skip _inference_fields when search is performed on legacy semantic_text format.
+
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          sparse_field: "test value"
+        refresh: true
+
+  - do:
+      search:
+        index: test-index
+        body:
+          _source:
+            exclude_vectors: false
+          query:
+            match_all: { }
+
+  - match: { hits.total.value: 1 }
+  - not_exists: hits.hits.0._source._inference_fields
+
 ---
 "Empty semantic_text field skips embedding generation":
   - requires:

+ 44 - 22
x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml

@@ -77,7 +77,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -102,7 +103,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -127,7 +129,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -150,7 +153,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -177,7 +181,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -200,7 +205,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -252,7 +258,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -281,7 +288,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -318,7 +326,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -413,7 +422,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -446,7 +456,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -507,7 +518,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -538,7 +550,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -607,7 +620,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -668,7 +682,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -710,7 +725,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -767,7 +783,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -800,7 +817,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -845,7 +863,8 @@ setup:
       search:
         index: test-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -922,7 +941,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -974,7 +994,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }
 
@@ -1013,7 +1034,8 @@ setup:
       search:
         index: test-copy-to-index
         body:
-          fields: [ _inference_fields ]
+          _source:
+            exclude_vectors: false
           query:
             match_all: { }