8 months ago · 0db2f0a027
--- a/docs/reference/mapping/types/semantic-text.asciidoc
+++ b/docs/reference/mapping/types/semantic-text.asciidoc
@@ -133,14 +133,13 @@ You can extract the most relevant fragments from a semantic text field by using
 
															 POST test-index/_search
														
 
															 {
														
 
															     "query": {
														
 
															-        "semantic": {
														
 
															-            "field": "my_semantic_field"
														
 
															+        "match": {
														
 
															+            "my_semantic_field": "Which country is Paris in?"
														
 
															         }
														
 
															     },
														
 
															     "highlight": {
														
 
															         "fields": {
														
 
															             "my_semantic_field": {
														
 
															-                "type": "semantic",
														
 
															                 "number_of_fragments": 2,  <1>
														
 
															                 "order": "score"           <2>
														
 
															             }
														
@@ -152,6 +151,33 @@ POST test-index/_search
 
															 <1> Specifies the maximum number of fragments to return.
														
 
															 <2> Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: none).
														
 
															+Highlighting is supported on fields other than semantic_text.
														
 
															+However, if you want to restrict highlighting to the semantic highlighter and return no fragments when the field is not of type semantic_text,
														
 
															+you can explicitly enforce the `semantic` highlighter in the query:
														
 
															+
														
 
															+[source,console]
														
 
															+------------------------------------------------------------
														
 
															+PUT test-index
														
 
															+{
														
 
															+    "query": {
														
 
															+        "match": {
														
 
															+            "my_field": "Which country is Paris in?"
														
 
															+        }
														
 
															+    },
														
 
															+    "highlight": {
														
 
															+        "fields": {
														
 
															+            "my_field": {
														
 
															+                "type": "semantic",         <1>
														
 
															+                "number_of_fragments": 2,
														
 
															+                "order": "score"
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+------------------------------------------------------------
														
 
															+// TEST[skip:Requires inference endpoint]
														
 
															+<1> Ensures that highlighting is applied exclusively to semantic_text fields.
														
 
															+
														
 
															 [discrete]
														
 
															 [[custom-indexing]]
														
 
															 ==== Customizing `semantic_text` indexing
														
--- a/docs/reference/search/search-your-data/highlighting.asciidoc
+++ b/docs/reference/search/search-your-data/highlighting.asciidoc
@@ -37,8 +37,8 @@ GET /_search
 
															 // TEST[setup:my_index]
														
 
															 {es} supports three highlighters: `unified`, `plain`, and `fvh` (fast vector
														
 
															-highlighter). You can specify the highlighter `type` you want to use
														
 
															-for each field.
														
 
															+highlighter) for `text` and `keyword` fields and the `semantic` highlighter for `semantic_text` fields.
														
 
															+You can specify the highlighter `type` you want to use for each field or rely on the field type's default highlighter.
														
 
															 [discrete]
														
 
															 [[unified-highlighter]]
														
@@ -48,7 +48,19 @@ highlighter breaks the text into sentences and uses the BM25 algorithm to score
 
															 individual sentences as if they were documents in the corpus. It also supports
														
 
															 accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. The `unified`
														
 
															 highlighter can combine matches from multiple fields into one result (see
														
 
															-`matched_fields`). This is the default highlighter.
														
 
															+`matched_fields`).
														
 
															+
														
 
															+This is the default highlighter for all `text` and `keyword` fields.
														
 
															+
														
 
															+[discrete]
														
 
															+[[semantic-highlighter]]
														
 
															+==== Semantic Highlighter
														
 
															+
														
 
															+The `semantic` highlighter is specifically designed for use with the <<semantic-text, `semantic_text`>> field.
														
 
															+It identifies and extracts the most relevant fragments from the field based on semantic
														
 
															+similarity between the query and each fragment.
														
 
															+
														
 
															+By default, <<semantic-text, `semantic_text`>> fields use the semantic highlighter.
														
 
															 [discrete]
														
 
															 [[plain-highlighter]]
														
--- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -45,6 +45,7 @@ import org.elasticsearch.index.query.QueryShardException;
 
															 import org.elasticsearch.index.query.SearchExecutionContext;
														
 
															 import org.elasticsearch.search.DocValueFormat;
														
 
															 import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
														
 
															+import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
														
 
															 import org.elasticsearch.search.lookup.SearchLookup;
														
 
															 import java.io.IOException;
														
@@ -221,6 +222,13 @@ public abstract class MappedFieldType {
 
															         return null;
														
 
															     }
														
 
															+    /**
														
 
															+     * Returns the default highlighter type to use when highlighting the field.
														
 
															+     */
														
 
															+    public String getDefaultHighlighter() {
														
 
															+        return DefaultHighlighter.NAME;
														
 
															+    }
														
 
															+
														
 
															     /** Generates a query that will only match documents that contain the given value.
														
 
															      *  The default implementation returns a {@link TermQuery} over the value bytes
														
 
															      *  @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type or if the field is not searchable
														
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -936,7 +936,7 @@ public class SearchModule {
 
															         NamedRegistry<Highlighter> highlighters = new NamedRegistry<>("highlighter");
														
 
															         highlighters.register("fvh", new FastVectorHighlighter(settings));
														
 
															         highlighters.register("plain", new PlainHighlighter());
														
 
															-        highlighters.register("unified", new DefaultHighlighter());
														
 
															+        highlighters.register(DefaultHighlighter.NAME, new DefaultHighlighter());
														
 
															         highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters);
														
 
															         return unmodifiableMap(highlighters.getRegistry());
														
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -49,7 +49,7 @@ import java.util.function.Predicate;
 
															 import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
														
 
															 public class DefaultHighlighter implements Highlighter {
														
 
															-
														
 
															+    public static final String NAME = "unified";
														
 
															     public static final NodeFeature UNIFIED_HIGHLIGHTER_MATCHED_FIELDS = new NodeFeature("unified_highlighter_matched_fields", true);
														
 
															     @Override
														
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
@@ -66,7 +66,7 @@ public class HighlightPhase implements FetchSubPhase {
 
															                 Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders = fieldContext.builders;
														
 
															                 for (String field : contextBuilders.keySet()) {
														
 
															                     FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext);
														
 
															-                    Highlighter highlighter = getHighlighter(fieldContext.field);
														
 
															+                    Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType);
														
 
															                     HighlightField highlightField = highlighter.highlight(fieldContext);
														
 
															                     if (highlightField != null) {
														
 
															                         // Note that we make sure to use the original field name in the response. This is because the
														
@@ -80,10 +80,10 @@ public class HighlightPhase implements FetchSubPhase {
 
															         };
														
 
															     }
														
 
															-    private Highlighter getHighlighter(SearchHighlightContext.Field field) {
														
 
															+    private Highlighter getHighlighter(SearchHighlightContext.Field field, MappedFieldType fieldType) {
														
 
															         String highlighterType = field.fieldOptions().highlighterType();
														
 
															         if (highlighterType == null) {
														
 
															-            highlighterType = "unified";
														
 
															+            highlighterType = fieldType.getDefaultHighlighter();
														
 
															         }
														
 
															         Highlighter highlighter = highlighters.get(highlighterType);
														
 
															         if (highlighter == null) {
														
@@ -103,8 +103,6 @@ public class HighlightPhase implements FetchSubPhase {
 
															         Map<String, Function<HitContext, FieldHighlightContext>> builders = new LinkedHashMap<>();
														
 
															         StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS;
														
 
															         for (SearchHighlightContext.Field field : highlightContext.fields()) {
														
 
															-            Highlighter highlighter = getHighlighter(field);
														
 
															-
														
 
															             Collection<String> fieldNamesToHighlight = context.getSearchExecutionContext().getMatchingFieldNames(field.field());
														
 
															             boolean fieldNameContainsWildcards = field.field().contains("*");
														
@@ -112,6 +110,7 @@ public class HighlightPhase implements FetchSubPhase {
 
															             boolean sourceRequired = false;
														
 
															             for (String fieldName : fieldNamesToHighlight) {
														
 
															                 MappedFieldType fieldType = context.getSearchExecutionContext().getFieldType(fieldName);
														
 
															+                Highlighter highlighter = getHighlighter(field, fieldType);
														
 
															                 // We should prevent highlighting if a field is anything but a text, match_only_text,
														
 
															                 // or keyword field.
														
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@@ -37,6 +37,7 @@ public class InferenceFeatures implements FeatureSpecification {
 
															     }
														
 
															     private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
														
 
															+    private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
														
 
															     @Override
														
 
															     public Set<NodeFeature> getTestFeatures() {
														
@@ -52,7 +53,8 @@ public class InferenceFeatures implements FeatureSpecification {
 
															             SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES,
														
 
															             SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED,
														
 
															             TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
														
 
															-            SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT
														
 
															+            SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
														
 
															+            SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT
														
 
															         );
														
 
															     }
														
 
															 }
														
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
@@ -73,6 +73,7 @@ import org.elasticsearch.xcontent.XContentType;
 
															 import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
														
 
															 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
														
 
															 import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
														
 
															+import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter;
														
 
															 import java.io.IOException;
														
 
															 import java.io.UncheckedIOException;
														
@@ -582,6 +583,11 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
 
															             return TextFieldMapper.CONTENT_TYPE;
														
 
															         }
														
 
															+        @Override
														
 
															+        public String getDefaultHighlighter() {
														
 
															+            return SemanticTextHighlighter.NAME;
														
 
															+        }
														
 
															+
														
 
															         public String getInferenceId() {
														
 
															             return inferenceId;
														
 
															         }
														
--- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml
+++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml
@@ -55,22 +55,32 @@ setup:
 
															             index.mapping.semantic_text.use_legacy_format: false
														
 
															           mappings:
														
 
															             properties:
														
 
															+              title:
														
 
															+                type: text
														
 
															               body:
														
 
															                 type: semantic_text
														
 
															                 inference_id: dense-inference-id
														
 
															----
														
 
															-"Highlighting using a sparse embedding model":
														
 
															   - do:
														
 
															       index:
														
 
															         index: test-sparse-index
														
 
															         id: doc_1
														
 
															         body:
														
 
															+          title: "Elasticsearch"
														
 
															           body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
														
 
															         refresh: true
														
 
															-  - match: { result: created }
														
 
															+  - do:
														
 
															+      index:
														
 
															+        index: test-dense-index
														
 
															+        id: doc_1
														
 
															+        body:
														
 
															+          title: "Elasticsearch"
														
 
															+          body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
														
 
															+        refresh: true
														
 
															+---
														
 
															+"Highlighting using a sparse embedding model":
														
 
															   - do:
														
 
															       search:
														
 
															         index: test-sparse-index
														
@@ -153,16 +163,6 @@ setup:
 
															 ---
														
 
															 "Highlighting using a dense embedding model":
														
 
															-  - do:
														
 
															-      index:
														
 
															-        index: test-dense-index
														
 
															-        id: doc_1
														
 
															-        body:
														
 
															-          body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
														
 
															-        refresh: true
														
 
															-
														
 
															-  - match: { result: created }
														
 
															-
														
 
															   - do:
														
 
															       search:
														
 
															         index: test-dense-index
														
@@ -243,4 +243,51 @@ setup:
 
															   - match:  { hits.hits.0.highlight.body.0: "You Know, for Search!" }
														
 
															   - match:  { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
														
 
															+---
														
 
															+"Default highlighter for fields":
														
 
															+  - requires:
														
 
															+        cluster_features: "semantic_text.highlighter.default"
														
 
															+        reason: semantic text field defaults to the semantic highlighter
														
 
															+
														
 
															+  - do:
														
 
															+      search:
														
 
															+        index: test-dense-index
														
 
															+        body:
														
 
															+          query:
														
 
															+            match:
														
 
															+              body: "What is Elasticsearch?"
														
 
															+          highlight:
														
 
															+            fields:
														
 
															+              body:
														
 
															+                order: "score"
														
 
															+                number_of_fragments: 2
														
 
															+
														
 
															+  - match:  { hits.total.value: 1 }
														
 
															+  - match:  { hits.hits.0._id: "doc_1" }
														
 
															+  - length: { hits.hits.0.highlight.body: 2 }
														
 
															+  - match:  { hits.hits.0.highlight.body.0: "You Know, for Search!" }
														
 
															+  - match:  { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
														
 
															+
														
 
															+---
														
 
															+"semantic highlighter ignores non-inference fields":
														
 
															+  - requires:
														
 
															+      cluster_features: "semantic_text.highlighter.default"
														
 
															+      reason: semantic text field defaults to the semantic highlighter
														
 
															+
														
 
															+  - do:
														
 
															+      search:
														
 
															+        index: test-dense-index
														
 
															+        body:
														
 
															+          query:
														
 
															+            match:
														
 
															+              title: "Elasticsearch"
														
 
															+          highlight:
														
 
															+            fields:
														
 
															+              title:
														
 
															+                type: semantic
														
 
															+                number_of_fragments: 2
														
 
															+
														
 
															+  - match:      { hits.total.value: 1 }
														
 
															+  - match:      { hits.hits.0._id: "doc_1" }
														
 
															+  - not_exists: hits.hits.0.highlight.title