7 mēneši atpakaļ · f0d5220178
--- a/docs/changelog/123763.yaml
+++ b/docs/changelog/123763.yaml
@@ -0,0 +1,5 @@
 
				+pr: 123763
			
 
				+summary: Skip semantic_text embedding generation when no content is provided.
			
 
				+area: Relevance
			
 
				+type: enhancement
			
 
				+issues: []
			
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@@ -50,7 +50,8 @@ public class InferenceFeatures implements FeatureSpecification {
 
				             SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
			
 
				             SEMANTIC_KNN_FILTER_FIX,
			
 
				             TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE,
			
 
				-            SemanticTextFieldMapper.SEMANTIC_TEXT_BIT_VECTOR_SUPPORT
			
 
				+            SemanticTextFieldMapper.SEMANTIC_TEXT_BIT_VECTOR_SUPPORT,
			
 
				+            SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT
			
 
				         );
			
 
				     }
			
 
				 }
			
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java
@@ -563,7 +563,7 @@ public class ShardBulkInferenceActionFilter implements MappedActionFilter {
 
				                             }
			
 
				                             continue;
			
 
				                         }
			
 
				-                        ensureResponseAccumulatorSlot(itemIndex);
			
 
				+                        var slot = ensureResponseAccumulatorSlot(itemIndex);
			
 
				                         final List<String> values;
			
 
				                         try {
			
 
				                             values = SemanticTextUtils.nodeStringValues(field, valueObj);
			
@@ -580,7 +580,13 @@ public class ShardBulkInferenceActionFilter implements MappedActionFilter {
 
				                         List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>());
			
 
				                         int offsetAdjustment = 0;
			
 
				                         for (String v : values) {
			
 
				-                            fieldRequests.add(new FieldInferenceRequest(itemIndex, field, sourceField, v, order++, offsetAdjustment));
			
 
				+                            if (v.isBlank()) {
			
 
				+                                slot.addOrUpdateResponse(
			
 
				+                                    new FieldInferenceResponse(field, sourceField, v, order++, 0, null, EMPTY_CHUNKED_INFERENCE)
			
 
				+                                );
			
 
				+                            } else {
			
 
				+                                fieldRequests.add(new FieldInferenceRequest(itemIndex, field, sourceField, v, order++, offsetAdjustment));
			
 
				+                            }
			
 
				 
			
 
				                             // When using the inference metadata fields format, all the input values are concatenated so that the
			
 
				                             // chunk text offsets are expressed in the context of a single string. Calculate the offset adjustment
			
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
@@ -117,6 +117,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
 
				     public static final NodeFeature SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX = new NodeFeature(
			
 
				         "semantic_text.always_emit_inference_id_fix"
			
 
				     );
			
 
				+    public static final NodeFeature SEMANTIC_TEXT_HANDLE_EMPTY_INPUT = new NodeFeature("semantic_text.handle_empty_input");
			
 
				     public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields");
			
 
				     public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support");
			
 
				 
			
@@ -403,7 +404,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
 
				         }
			
 
				 
			
 
				         final SemanticTextFieldMapper mapper;
			
 
				-        if (fieldType().getModelSettings() == null) {
			
 
				+        if (fieldType().getModelSettings() == null && field.inference().modelSettings() != null) {
			
 
				             mapper = addDynamicUpdate(context, field);
			
 
				         } else {
			
 
				             Conflicts conflicts = new Conflicts(fullFieldName);
			
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java
@@ -335,7 +335,7 @@ public class ShardBulkInferenceActionFilterTests extends ESTestCase {
 
				                 // item 3
			
 
				                 assertNull(bulkShardRequest.items()[3].getPrimaryResponse());
			
 
				                 actualRequest = getIndexRequestOrNull(bulkShardRequest.items()[3].request());
			
 
				-                assertInferenceResults(useLegacyFormat, actualRequest, "obj.field1", EXPLICIT_NULL, 0);
			
 
				+                assertInferenceResults(useLegacyFormat, actualRequest, "obj.field1", EXPLICIT_NULL, null);
			
 
				 
			
 
				                 // item 4
			
 
				                 assertNull(bulkShardRequest.items()[4].getPrimaryResponse());
			
@@ -368,6 +368,59 @@ public class ShardBulkInferenceActionFilterTests extends ESTestCase {
 
				         awaitLatch(chainExecuted, 10, TimeUnit.SECONDS);
			
 
				     }
			
 
				 
			
 
				+    @SuppressWarnings({ "unchecked", "rawtypes" })
			
 
				+    public void testHandleEmptyInput() throws Exception {
			
 
				+        StaticModel model = StaticModel.createRandomInstance();
			
 
				+        ShardBulkInferenceActionFilter filter = createFilter(
			
 
				+            threadPool,
			
 
				+            Map.of(model.getInferenceEntityId(), model),
			
 
				+            randomIntBetween(1, 10),
			
 
				+            useLegacyFormat,
			
 
				+            true
			
 
				+        );
			
 
				+
			
 
				+        CountDownLatch chainExecuted = new CountDownLatch(1);
			
 
				+        ActionFilterChain actionFilterChain = (task, action, request, listener) -> {
			
 
				+            try {
			
 
				+                BulkShardRequest bulkShardRequest = (BulkShardRequest) request;
			
 
				+                assertNull(bulkShardRequest.getInferenceFieldMap());
			
 
				+                assertThat(bulkShardRequest.items().length, equalTo(3));
			
 
				+
			
 
				+                // Create with Empty string
			
 
				+                assertNull(bulkShardRequest.items()[0].getPrimaryResponse());
			
 
				+                IndexRequest actualRequest = getIndexRequestOrNull(bulkShardRequest.items()[0].request());
			
 
				+                assertInferenceResults(useLegacyFormat, actualRequest, "semantic_text_field", "", 0);
			
 
				+
			
 
				+                // Create with whitespace only
			
 
				+                assertNull(bulkShardRequest.items()[1].getPrimaryResponse());
			
 
				+                actualRequest = getIndexRequestOrNull(bulkShardRequest.items()[1].request());
			
 
				+                assertInferenceResults(useLegacyFormat, actualRequest, "semantic_text_field", " ", 0);
			
 
				+
			
 
				+                // Update with multiple Whitespaces
			
 
				+                assertNull(bulkShardRequest.items()[2].getPrimaryResponse());
			
 
				+                actualRequest = getIndexRequestOrNull(bulkShardRequest.items()[2].request());
			
 
				+                assertInferenceResults(useLegacyFormat, actualRequest, "semantic_text_field", "  ", 0);
			
 
				+            } finally {
			
 
				+                chainExecuted.countDown();
			
 
				+            }
			
 
				+        };
			
 
				+        ActionListener actionListener = mock(ActionListener.class);
			
 
				+        Task task = mock(Task.class);
			
 
				+        Map<String, InferenceFieldMetadata> inferenceFieldMap = Map.of(
			
 
				+            "semantic_text_field",
			
 
				+            new InferenceFieldMetadata("semantic_text_field", model.getInferenceEntityId(), new String[] { "semantic_text_field" })
			
 
				+        );
			
 
				+
			
 
				+        BulkItemRequest[] items = new BulkItemRequest[3];
			
 
				+        items[0] = new BulkItemRequest(0, new IndexRequest("index").source(Map.of("semantic_text_field", "")));
			
 
				+        items[1] = new BulkItemRequest(1, new IndexRequest("index").source(Map.of("semantic_text_field", " ")));
			
 
				+        items[2] = new BulkItemRequest(2, new UpdateRequest().doc(new IndexRequest("index").source(Map.of("semantic_text_field", "  "))));
			
 
				+        BulkShardRequest request = new BulkShardRequest(new ShardId("test", "test", 0), WriteRequest.RefreshPolicy.NONE, items);
			
 
				+        request.setInferenceFieldMap(inferenceFieldMap);
			
 
				+        filter.apply(task, TransportShardBulkAction.ACTION_NAME, request, actionListener, actionFilterChain);
			
 
				+        awaitLatch(chainExecuted, 10, TimeUnit.SECONDS);
			
 
				+    }
			
 
				+
			
 
				     @SuppressWarnings({ "unchecked", "rawtypes" })
			
 
				     public void testManyRandomDocs() throws Exception {
			
 
				         Map<String, StaticModel> inferenceModelMap = new HashMap<>();
			
@@ -591,7 +644,7 @@ public class ShardBulkInferenceActionFilterTests extends ESTestCase {
 
				         IndexRequest request,
			
 
				         String fieldName,
			
 
				         Object expectedOriginalValue,
			
 
				-        int expectedChunkCount
			
 
				+        Integer expectedChunkCount
			
 
				     ) {
			
 
				         final Map<String, Object> requestMap = request.sourceAsMap();
			
 
				         if (useLegacyFormat) {
			
@@ -601,13 +654,11 @@ public class ShardBulkInferenceActionFilterTests extends ESTestCase {
 
				             );
			
 
				 
			
 
				             List<Object> chunks = (List<Object>) XContentMapValues.extractValue(getChunksFieldName(fieldName), requestMap);
			
 
				-            if (expectedChunkCount > 0) {
			
 
				+            if (expectedChunkCount == null) {
			
 
				+                assertNull(chunks);
			
 
				+            } else {
			
 
				                 assertNotNull(chunks);
			
 
				                 assertThat(chunks.size(), equalTo(expectedChunkCount));
			
 
				-            } else {
			
 
				-                // If the expected chunk count is 0, we expect that no inference has been performed. In this case, the source should not be
			
 
				-                // transformed, and thus the semantic text field structure should not be created.
			
 
				-                assertNull(chunks);
			
 
				             }
			
 
				         } else {
			
 
				             assertThat(XContentMapValues.extractValue(fieldName, requestMap, EXPLICIT_NULL), equalTo(expectedOriginalValue));
			
@@ -627,8 +678,11 @@ public class ShardBulkInferenceActionFilterTests extends ESTestCase {
 
				                 inferenceMetadataFields,
			
 
				                 EXPLICIT_NULL
			
 
				             );
			
 
				+
			
 
				+            // When using the new format, the chunks field should always exist
			
 
				+            int expectedSize = expectedChunkCount == null ? 0 : expectedChunkCount;
			
 
				             assertNotNull(chunks);
			
 
				-            assertThat(chunks.size(), equalTo(expectedChunkCount));
			
 
				+            assertThat(chunks.size(), equalTo(expectedSize));
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml
@@ -1005,3 +1005,174 @@ setup:
 
				   - match: { hits.hits.0._source.dense_field: "another inference test" }
			
 
				   - match: { hits.hits.0._source.non_inference_field: "non inference test" }
			
 
				   - exists: hits.hits.0._source._inference_fields
			
 
				+
			
 
				+---
			
 
				+"Empty semantic_text field skips embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          sparse_field: ""
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_2
			
 
				+        body:
			
 
				+          sparse_field: "   "
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-index
			
 
				+        body:
			
 
				+          fields: [ _inference_fields ]
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - match: { hits.total.value: 2 }
			
 
				+  - match: { hits.hits.0._source.sparse_field: "" }
			
 
				+  - match: { hits.hits.1._source.sparse_field: "   " }
			
 
				+  - not_exists: hits.hits.0._source._inference_fields
			
 
				+  - not_exists: hits.hits.1._source._inference_fields
			
 
				+
			
 
				+---
			
 
				+"Reindexing with empty or whitespace semantic_text skips embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          sparse_field: "  "
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: destination-index
			
 
				+        body:
			
 
				+          settings:
			
 
				+            index:
			
 
				+              mapping:
			
 
				+                semantic_text:
			
 
				+                  use_legacy_format: false
			
 
				+          mappings:
			
 
				+            properties:
			
 
				+              sparse_field:
			
 
				+                type: semantic_text
			
 
				+                inference_id: sparse-inference-id
			
 
				+
			
 
				+  - do:
			
 
				+      reindex:
			
 
				+        wait_for_completion: true
			
 
				+        body:
			
 
				+          source:
			
 
				+            index: test-index
			
 
				+          dest:
			
 
				+            index: destination-index
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      get:
			
 
				+        index: destination-index
			
 
				+        id: doc_1
			
 
				+
			
 
				+  - match: { _source.sparse_field: "  " }
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: destination-index
			
 
				+        body:
			
 
				+          fields: [ _inference_fields ]
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - not_exists: hits.hits.0._source._inference_fields
			
 
				+
			
 
				+---
			
 
				+"Empty Multi-Field skips embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: test-multi-index
			
 
				+        body:
			
 
				+          settings:
			
 
				+            index:
			
 
				+              mapping:
			
 
				+                semantic_text:
			
 
				+                  use_legacy_format: false
			
 
				+          mappings:
			
 
				+            properties:
			
 
				+              field:
			
 
				+                type: semantic_text
			
 
				+                inference_id: sparse-inference-id
			
 
				+                fields:
			
 
				+                  sparse:
			
 
				+                    type: semantic_text
			
 
				+                    inference_id: sparse-inference-id
			
 
				+
			
 
				+  - do:
			
 
				+      bulk:
			
 
				+        index: test-multi-index
			
 
				+        refresh: true
			
 
				+        body: |
			
 
				+          {"index":{"_id": "1"}}
			
 
				+          {"field": ["you know, for testing", "now with chunks"]}
			
 
				+          {"index":{"_id": "2"}}
			
 
				+          {"field": ["", "  "]}
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-multi-index
			
 
				+        body:
			
 
				+          fields: [ _inference_fields ]
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - exists: hits.hits.0._source._inference_fields
			
 
				+  - not_exists: hits.hits.1._source._inference_fields
			
 
				+
			
 
				+---
			
 
				+"Multi chunks skips empty input embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          sparse_field: ["some test data", "    ", "now with chunks"]
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-index
			
 
				+        body:
			
 
				+          fields: [ _inference_fields ]
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - match: { hits.total.value: 1 }
			
 
				+
			
 
				+  - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks: 1 }
			
 
				+  - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field: 2 }
			
 
				+  - exists: hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.embeddings
			
 
				+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.start_offset: 0 }
			
 
				+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.end_offset: 14 }
			
 
				+  - exists: hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.1.embeddings
			
 
				+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.1.start_offset: 20 }
			
 
				+  - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.1.end_offset: 35 }
			
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_bwc.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_bwc.yml
@@ -675,3 +675,67 @@ setup:
 
				 
			
 
				   - match: { hits.total.value: 1 }
			
 
				   - not_exists: hits.hits.0._source._inference_fields
			
 
				+
			
 
				+---
			
 
				+"Empty semantic_text field skips embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          sparse_field: ""
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_2
			
 
				+        body:
			
 
				+          sparse_field: "  "
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-index
			
 
				+        body:
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - match: { hits.total.value: 2 }
			
 
				+  - match: { hits.hits.0._source.sparse_field.text: "" }
			
 
				+  - length: { hits.hits.0._source.sparse_field.inference.chunks: 0 }
			
 
				+  - match: { hits.hits.1._source.sparse_field.text: "  " }
			
 
				+  - length: { hits.hits.1._source.sparse_field.inference.chunks: 0 }
			
 
				+
			
 
				+---
			
 
				+"Multi chunks skips empty input embedding generation":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          sparse_field: ["some test data", "    ", "now with chunks"]
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-index
			
 
				+        body:
			
 
				+          query:
			
 
				+            match_all: { }
			
 
				+
			
 
				+  - match: { hits.total.value: 1 }
			
 
				+
			
 
				+  - length: { hits.hits.0._source.sparse_field.inference.chunks: 2 }
			
 
				+  - match: { hits.hits.0._source.sparse_field.inference.chunks.0.text: "some test data" }
			
 
				+  - exists: hits.hits.0._source.sparse_field.inference.chunks.0.embeddings
			
 
				+  - match: { hits.hits.0._source.sparse_field.inference.chunks.1.text: "now with chunks" }
			
 
				+  - exists: hits.hits.0._source.sparse_field.inference.chunks.1.embeddings
			
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml
@@ -291,3 +291,48 @@ setup:
 
				   - match:      { hits.hits.0._id: "doc_1" }
			
 
				   - not_exists: hits.hits.0.highlight.title
			
 
				 
			
 
				+---
			
 
				+"Highlighting and multi chunks with empty input":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				+
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: test-multi-chunk-index
			
 
				+        body:
			
 
				+          settings:
			
 
				+            index.mapping.semantic_text.use_legacy_format: false
			
 
				+          mappings:
			
 
				+            properties:
			
 
				+              semantic_text_field:
			
 
				+                type: semantic_text
			
 
				+                inference_id: sparse-inference-id
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-multi-chunk-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          semantic_text_field: ["some test data", "    ", "now with chunks"]
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-multi-chunk-index
			
 
				+        body:
			
 
				+          query:
			
 
				+            semantic:
			
 
				+              field: "semantic_text_field"
			
 
				+              query: "test"
			
 
				+          highlight:
			
 
				+            fields:
			
 
				+              semantic_text_field:
			
 
				+                type: "semantic"
			
 
				+                number_of_fragments: 3
			
 
				+
			
 
				+  - match: { hits.total.value: 1 }
			
 
				+  - match: { hits.hits.0._id: "doc_1" }
			
 
				+  - length: { hits.hits.0.highlight.semantic_text_field: 2 }
			
 
				+  - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
			
 
				+  - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
			
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml
@@ -243,4 +243,48 @@ setup:
 
				   - match:  { hits.hits.0.highlight.body.0: "You Know, for Search!" }
			
 
				   - match:  { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
			
 
				 
			
 
				+---
			
 
				+"Highlighting and multi chunks with empty input":
			
 
				+  - requires:
			
 
				+      cluster_features: "semantic_text.handle_empty_input"
			
 
				+      reason: Skips embedding generation when semantic_text is empty or contains only whitespace, effective from 8.19 and 9.1.0.
			
 
				 
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: test-multi-chunk-index
			
 
				+        body:
			
 
				+          settings:
			
 
				+            index.mapping.semantic_text.use_legacy_format: true
			
 
				+          mappings:
			
 
				+            properties:
			
 
				+              semantic_text_field:
			
 
				+                type: semantic_text
			
 
				+                inference_id: sparse-inference-id
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index: test-multi-chunk-index
			
 
				+        id: doc_1
			
 
				+        body:
			
 
				+          semantic_text_field: ["some test data", "    ", "now with chunks"]
			
 
				+        refresh: true
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test-multi-chunk-index
			
 
				+        body:
			
 
				+          query:
			
 
				+            semantic:
			
 
				+              field: "semantic_text_field"
			
 
				+              query: "test"
			
 
				+          highlight:
			
 
				+            fields:
			
 
				+              semantic_text_field:
			
 
				+                type: "semantic"
			
 
				+                number_of_fragments: 3
			
 
				+
			
 
				+  - match: { hits.total.value: 1 }
			
 
				+  - match: { hits.hits.0._id: "doc_1" }
			
 
				+  - length: { hits.hits.0.highlight.semantic_text_field: 2 }
			
 
				+  - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
			
 
				+  - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }