Browse Source

Add option to store `sparse_vector` outside `_source` (#117917) (#118018)

This PR introduces an option for `sparse_vector` to store its values separately from `_source` by using term vectors.
This capability is primarly needed by the semantic text field.
Jim Ferenczi 10 months ago
parent
commit
e1304593b2

+ 5 - 0
docs/changelog/117917.yaml

@@ -0,0 +1,5 @@
+pr: 117917
+summary: Add option to store `sparse_vector` outside `_source`
+area: Mapping
+type: feature
+issues: []

+ 17 - 0
docs/reference/mapping/types/sparse-vector.asciidoc

@@ -26,6 +26,23 @@ PUT my-index
 
 See <<semantic-search-elser, semantic search with ELSER>> for a complete example on adding documents to a `sparse_vector` mapped field using ELSER.
 
+[[sparse-vectors-params]]
+==== Parameters for `sparse_vector` fields
+
+The following parameters are accepted by `sparse_vector` fields:
+
+[horizontal]
+
+<<mapping-store,store>>::
+
+Indicates whether the field value should be stored and retrievable independently of the <<mapping-source-field,_source>> field.
+Accepted values: true or false (default).
+The field's data is stored using term vectors, a disk-efficient structure compared to the original JSON input.
+The input map can be retrieved during a search request via the <<search-fields-param,`fields` parameter>>.
+To benefit from reduced disk usage, you must either:
+  * Exclude the field from <<source-filtering, _source>>.
+  * Use <<synthetic-source,synthetic `_source`>>.
+
 [[index-multi-value-sparse-vectors]]
 ==== Multi-value sparse vectors
 

+ 117 - 0
rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml

@@ -472,3 +472,120 @@
 
   - match:
       _source.ml.tokens: {}
+
+---
+"stored sparse_vector":
+
+  - requires:
+      cluster_features: [ "mapper.sparse_vector.store_support" ]
+      reason: "sparse_vector supports store parameter"
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            properties:
+              ml.tokens:
+                type: sparse_vector
+                store: true
+
+  - match: { acknowledged: true }
+  - do:
+      index:
+        index: test
+        id: "1"
+        body:
+          ml:
+            tokens:
+              running: 2
+              good: 3
+              run: 5
+              race: 7
+              for: 9
+
+  - match: { result: "created" }
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      search:
+        index: test
+        body:
+          fields: [ "ml.tokens" ]
+
+  - length: { hits.hits.0.fields.ml\\.tokens: 1 }
+  - length: { hits.hits.0.fields.ml\\.tokens.0: 5 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.running: 2.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.good: 3.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.run: 5.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.race: 7.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.for: 9.0 }
+
+---
+"stored sparse_vector synthetic source":
+
+  - requires:
+      cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ]
+      reason: "sparse_vector supports store parameter"
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            index:
+              mapping.source.mode: synthetic
+          mappings:
+            properties:
+              ml.tokens:
+                type: sparse_vector
+                store: true
+
+  - match: { acknowledged: true }
+
+  - do:
+      index:
+        index: test
+        id: "1"
+        body:
+          ml:
+            tokens:
+              running: 2
+              good: 3
+              run: 5
+              race: 7
+              for: 9
+
+  - match: { result: "created" }
+
+  - do:
+      indices.refresh: { }
+
+  - do:
+      search:
+        index: test
+        body:
+          fields: [ "ml.tokens" ]
+
+  - match:
+      hits.hits.0._source: {
+        ml: {
+          tokens: {
+            running: 2.0,
+            good: 3.0,
+            run: 5.0,
+            race: 7.0,
+            for: 9.0
+          }
+        }
+      }
+
+  - length: { hits.hits.0.fields.ml\\.tokens: 1 }
+  - length: { hits.hits.0.fields.ml\\.tokens.0: 5 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.running: 2.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.good: 3.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.run: 5.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.race: 7.0 }
+  - match:  { hits.hits.0.fields.ml\\.tokens.0.for: 9.0 }

+ 3 - 1
server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

@@ -56,6 +56,7 @@ public class MapperFeatures implements FeatureSpecification {
     );
 
     public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
+    public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support");
 
     @Override
     public Set<NodeFeature> getTestFeatures() {
@@ -68,7 +69,8 @@ public class MapperFeatures implements FeatureSpecification {
             MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT,
             DocumentParser.FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE,
             CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX,
-            META_FETCH_FIELDS_ERROR_CODE_CHANGED
+            META_FETCH_FIELDS_ERROR_CODE_CHANGED,
+            SPARSE_VECTOR_STORE_SUPPORT
         );
     }
 }

+ 147 - 8
server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

@@ -11,6 +11,12 @@ package org.elasticsearch.index.mapper.vectors;
 
 import org.apache.lucene.document.FeatureField;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermVectors;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
@@ -25,14 +31,22 @@ import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
+import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser.Token;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.stream.Stream;
 
 import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
 
@@ -52,8 +66,12 @@ public class SparseVectorFieldMapper extends FieldMapper {
     static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR;
     static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT;
 
-    public static class Builder extends FieldMapper.Builder {
+    private static SparseVectorFieldMapper toType(FieldMapper in) {
+        return (SparseVectorFieldMapper) in;
+    }
 
+    public static class Builder extends FieldMapper.Builder {
+        private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
         private final Parameter<Map<String, String>> meta = Parameter.metaParam();
 
         public Builder(String name) {
@@ -62,14 +80,14 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
         @Override
         protected Parameter<?>[] getParameters() {
-            return new Parameter<?>[] { meta };
+            return new Parameter<?>[] { stored, meta };
         }
 
         @Override
         public SparseVectorFieldMapper build(MapperBuilderContext context) {
             return new SparseVectorFieldMapper(
                 leafName(),
-                new SparseVectorFieldType(context.buildFullName(leafName()), meta.getValue()),
+                new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue()),
                 builderParams(this, context)
             );
         }
@@ -87,8 +105,8 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
     public static final class SparseVectorFieldType extends MappedFieldType {
 
-        public SparseVectorFieldType(String name, Map<String, String> meta) {
-            super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
+        public SparseVectorFieldType(String name, boolean isStored, Map<String, String> meta) {
+            super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
         }
 
         @Override
@@ -103,6 +121,9 @@ public class SparseVectorFieldMapper extends FieldMapper {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
+            if (isStored()) {
+                return new SparseVectorValueFetcher(name());
+            }
             return SourceValueFetcher.identity(name(), context, format);
         }
 
@@ -135,6 +156,14 @@ public class SparseVectorFieldMapper extends FieldMapper {
         super(simpleName, mappedFieldType, builderParams);
     }
 
+    @Override
+    protected SyntheticSourceSupport syntheticSourceSupport() {
+        if (fieldType().isStored()) {
+            return new SyntheticSourceSupport.Native(new SparseVectorSyntheticFieldLoader(fullPath(), leafName()));
+        }
+        return super.syntheticSourceSupport();
+    }
+
     @Override
     public Map<String, NamedAnalyzer> indexAnalyzers() {
         return Map.of(mappedFieldType.name(), Lucene.KEYWORD_ANALYZER);
@@ -189,9 +218,9 @@ public class SparseVectorFieldMapper extends FieldMapper {
                     // based on recommendations from this paper: https://arxiv.org/pdf/2305.18494.pdf
                     IndexableField currentField = context.doc().getByKey(key);
                     if (currentField == null) {
-                        context.doc().addWithKey(key, new FeatureField(fullPath(), feature, value));
-                    } else if (currentField instanceof FeatureField && ((FeatureField) currentField).getFeatureValue() < value) {
-                        ((FeatureField) currentField).setFeatureValue(value);
+                        context.doc().addWithKey(key, new XFeatureField(fullPath(), feature, value, fieldType().isStored()));
+                    } else if (currentField instanceof XFeatureField && ((XFeatureField) currentField).getFeatureValue() < value) {
+                        ((XFeatureField) currentField).setFeatureValue(value);
                     }
                 } else {
                     throw new IllegalArgumentException(
@@ -219,4 +248,114 @@ public class SparseVectorFieldMapper extends FieldMapper {
         return CONTENT_TYPE;
     }
 
+    private static class SparseVectorValueFetcher implements ValueFetcher {
+        private final String fieldName;
+        private TermVectors termVectors;
+
+        private SparseVectorValueFetcher(String fieldName) {
+            this.fieldName = fieldName;
+        }
+
+        @Override
+        public void setNextReader(LeafReaderContext context) {
+            try {
+                termVectors = context.reader().termVectors();
+            } catch (IOException exc) {
+                throw new UncheckedIOException(exc);
+            }
+        }
+
+        @Override
+        public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValues) throws IOException {
+            if (termVectors == null) {
+                return List.of();
+            }
+            var terms = termVectors.get(doc, fieldName);
+            if (terms == null) {
+                return List.of();
+            }
+
+            var termsEnum = terms.iterator();
+            PostingsEnum postingsScratch = null;
+            Map<String, Float> result = new LinkedHashMap<>();
+            while (termsEnum.next() != null) {
+                postingsScratch = termsEnum.postings(postingsScratch);
+                postingsScratch.nextDoc();
+                result.put(termsEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(postingsScratch.freq()));
+                assert postingsScratch.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
+            }
+            return List.of(result);
+        }
+
+        @Override
+        public StoredFieldsSpec storedFieldsSpec() {
+            return StoredFieldsSpec.NO_REQUIREMENTS;
+        }
+    }
+
+    private static class SparseVectorSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader {
+        private final String fullPath;
+        private final String leafName;
+
+        private TermsEnum termsDocEnum;
+
+        private SparseVectorSyntheticFieldLoader(String fullPath, String leafName) {
+            this.fullPath = fullPath;
+            this.leafName = leafName;
+        }
+
+        @Override
+        public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {
+            return Stream.of();
+        }
+
+        @Override
+        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+            var fieldInfos = leafReader.getFieldInfos().fieldInfo(fullPath);
+            if (fieldInfos == null || fieldInfos.hasVectors() == false) {
+                return null;
+            }
+            return docId -> {
+                var terms = leafReader.termVectors().get(docId, fullPath);
+                if (terms == null) {
+                    return false;
+                }
+                termsDocEnum = terms.iterator();
+                if (termsDocEnum.next() == null) {
+                    termsDocEnum = null;
+                    return false;
+                }
+                return true;
+            };
+        }
+
+        @Override
+        public boolean hasValue() {
+            return termsDocEnum != null;
+        }
+
+        @Override
+        public void write(XContentBuilder b) throws IOException {
+            assert termsDocEnum != null;
+            PostingsEnum reuse = null;
+            b.startObject(leafName);
+            do {
+                reuse = termsDocEnum.postings(reuse);
+                reuse.nextDoc();
+                b.field(termsDocEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(reuse.freq()));
+            } while (termsDocEnum.next() != null);
+            b.endObject();
+        }
+
+        @Override
+        public String fieldName() {
+            return leafName;
+        }
+
+        @Override
+        public void reset() {
+            termsDocEnum = null;
+        }
+    }
+
 }

+ 177 - 0
server/src/main/java/org/elasticsearch/index/mapper/vectors/XFeatureField.java

@@ -0,0 +1,177 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.elasticsearch.index.mapper.vectors;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
+import org.apache.lucene.document.FeatureField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
+
+/**
+ * This class is forked from the Lucene {@link FeatureField} implementation to enable support for storing term vectors.
+ * It should be removed once apache/lucene#14034 becomes available.
+ */
+public final class XFeatureField extends Field {
+    private static final FieldType FIELD_TYPE = new FieldType();
+    private static final FieldType FIELD_TYPE_STORE_TERM_VECTORS = new FieldType();
+
+    static {
+        FIELD_TYPE.setTokenized(false);
+        FIELD_TYPE.setOmitNorms(true);
+        FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+
+        FIELD_TYPE_STORE_TERM_VECTORS.setTokenized(false);
+        FIELD_TYPE_STORE_TERM_VECTORS.setOmitNorms(true);
+        FIELD_TYPE_STORE_TERM_VECTORS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+        FIELD_TYPE_STORE_TERM_VECTORS.setStoreTermVectors(true);
+    }
+
+    private float featureValue;
+
+    /**
+     * Create a feature.
+     *
+     * @param fieldName The name of the field to store the information into. All features may be
+     *     stored in the same field.
+     * @param featureName The name of the feature, eg. 'pagerank`. It will be indexed as a term.
+     * @param featureValue The value of the feature, must be a positive, finite, normal float.
+     */
+    public XFeatureField(String fieldName, String featureName, float featureValue) {
+        this(fieldName, featureName, featureValue, false);
+    }
+
+    /**
+     * Create a feature.
+     *
+     * @param fieldName    The name of the field to store the information into. All features may be
+     *                     stored in the same field.
+     * @param featureName  The name of the feature, eg. 'pagerank`. It will be indexed as a term.
+     * @param featureValue The value of the feature, must be a positive, finite, normal float.
+     */
+    public XFeatureField(String fieldName, String featureName, float featureValue, boolean storeTermVectors) {
+        super(fieldName, featureName, storeTermVectors ? FIELD_TYPE_STORE_TERM_VECTORS : FIELD_TYPE);
+        setFeatureValue(featureValue);
+    }
+
+    /**
+     * Update the feature value of this field.
+     */
+    public void setFeatureValue(float featureValue) {
+        if (Float.isFinite(featureValue) == false) {
+            throw new IllegalArgumentException(
+                "featureValue must be finite, got: " + featureValue + " for feature " + fieldsData + " on field " + name
+            );
+        }
+        if (featureValue < Float.MIN_NORMAL) {
+            throw new IllegalArgumentException(
+                "featureValue must be a positive normal float, got: "
+                    + featureValue
+                    + " for feature "
+                    + fieldsData
+                    + " on field "
+                    + name
+                    + " which is less than the minimum positive normal float: "
+                    + Float.MIN_NORMAL
+            );
+        }
+        this.featureValue = featureValue;
+    }
+
+    @Override
+    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+        FeatureTokenStream stream;
+        if (reuse instanceof FeatureTokenStream) {
+            stream = (FeatureTokenStream) reuse;
+        } else {
+            stream = new FeatureTokenStream();
+        }
+
+        int freqBits = Float.floatToIntBits(featureValue);
+        stream.setValues((String) fieldsData, freqBits >>> 15);
+        return stream;
+    }
+
+    /**
+     * This is useful if you have multiple features sharing a name and you want to take action to
+     * deduplicate them.
+     *
+     * @return the feature value of this field.
+     */
+    public float getFeatureValue() {
+        return featureValue;
+    }
+
+    private static final class FeatureTokenStream extends TokenStream {
+        private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+        private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
+        private boolean used = true;
+        private String value = null;
+        private int freq = 0;
+
+        private FeatureTokenStream() {}
+
+        /**
+         * Sets the values
+         */
+        void setValues(String value, int freq) {
+            this.value = value;
+            this.freq = freq;
+        }
+
+        @Override
+        public boolean incrementToken() {
+            if (used) {
+                return false;
+            }
+            clearAttributes();
+            termAttribute.append(value);
+            freqAttribute.setTermFrequency(freq);
+            used = true;
+            return true;
+        }
+
+        @Override
+        public void reset() {
+            used = false;
+        }
+
+        @Override
+        public void close() {
+            value = null;
+        }
+    }
+
+    static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15;
+
+    static float decodeFeatureValue(float freq) {
+        if (freq > MAX_FREQ) {
+            // This is never used in practice but callers of the SimScorer API might
+            // occasionally call it on eg. Float.MAX_VALUE to compute the max score
+            // so we need to be consistent.
+            return Float.MAX_VALUE;
+        }
+        int tf = (int) freq; // lossless
+        int featureBits = tf << 15;
+        return Float.intBitsToFloat(featureBits);
+    }
+}

+ 119 - 16
server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

@@ -11,19 +11,26 @@ package org.elasticsearch.index.mapper.vectors;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
-import org.apache.lucene.document.FeatureField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.DocumentParsingException;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
 import org.elasticsearch.index.mapper.SourceToParse;
+import org.elasticsearch.search.lookup.Source;
 import org.elasticsearch.test.index.IndexVersionUtils;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
@@ -33,18 +40,25 @@ import org.junit.AssumptionViolatedException;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION;
 import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION;
+import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
 
 public class SparseVectorFieldMapperTests extends MapperTestCase {
 
     @Override
     protected Object getSampleValueForDocument() {
-        return Map.of("ten", 10, "twenty", 20);
+        Map<String, Float> map = new LinkedHashMap<>();
+        map.put("ten", 10f);
+        map.put("twenty", 20f);
+        return map;
     }
 
     @Override
@@ -92,14 +106,18 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
 
         List<IndexableField> fields = doc1.rootDoc().getFields("field");
         assertEquals(2, fields.size());
-        assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
-        FeatureField featureField1 = null;
-        FeatureField featureField2 = null;
+        if (IndexVersion.current().luceneVersion().major == 10) {
+            // TODO: Update to use Lucene's FeatureField after upgrading to Lucene 10.1.
+            assertThat(IndexVersion.current().luceneVersion().minor, equalTo(0));
+        }
+        assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
+        XFeatureField featureField1 = null;
+        XFeatureField featureField2 = null;
         for (IndexableField field : fields) {
             if (field.stringValue().equals("ten")) {
-                featureField1 = (FeatureField) field;
+                featureField1 = (XFeatureField) field;
             } else if (field.stringValue().equals("twenty")) {
-                featureField2 = (FeatureField) field;
+                featureField2 = (XFeatureField) field;
             } else {
                 throw new UnsupportedOperationException();
             }
@@ -116,14 +134,14 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
 
         List<IndexableField> fields = parsedDocument.rootDoc().getFields("field");
         assertEquals(2, fields.size());
-        assertThat(fields.get(0), Matchers.instanceOf(FeatureField.class));
-        FeatureField featureField1 = null;
-        FeatureField featureField2 = null;
+        assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
+        XFeatureField featureField1 = null;
+        XFeatureField featureField2 = null;
         for (IndexableField field : fields) {
             if (field.stringValue().equals("foo.bar")) {
-                featureField1 = (FeatureField) field;
+                featureField1 = (XFeatureField) field;
             } else if (field.stringValue().equals("foobar")) {
-                featureField2 = (FeatureField) field;
+                featureField2 = (XFeatureField) field;
             } else {
                 throw new UnsupportedOperationException();
             }
@@ -171,13 +189,13 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
         }));
 
         // then validate that the generate document stored both values appropriately and we have only the max value stored
-        FeatureField barField = ((FeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
+        XFeatureField barField = ((XFeatureField) doc1.rootDoc().getByKey("foo.field\\.bar"));
         assertEquals(20, barField.getFeatureValue(), 1);
 
-        FeatureField storedBarField = ((FeatureField) doc1.rootDoc().getFields("foo.field").get(1));
+        XFeatureField storedBarField = ((XFeatureField) doc1.rootDoc().getFields("foo.field").get(1));
         assertEquals(20, storedBarField.getFeatureValue(), 1);
 
-        assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof FeatureField).count());
+        assertEquals(3, doc1.rootDoc().getFields().stream().filter((f) -> f instanceof XFeatureField).count());
     }
 
     public void testCannotBeUsedInMultiFields() {
@@ -192,6 +210,53 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
         assertThat(e.getMessage(), containsString("Field [feature] of type [sparse_vector] can't be used in multifields"));
     }
 
+    public void testStoreIsNotUpdateable() throws IOException {
+        var mapperService = createMapperService(fieldMapping(this::minimalMapping));
+        XContentBuilder mapping = jsonBuilder().startObject()
+            .startObject("_doc")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "sparse_vector")
+            .field("store", true)
+            .endObject()
+            .endObject()
+            .endObject()
+            .endObject();
+        var exc = expectThrows(
+            Exception.class,
+            () -> mapperService.merge("_doc", new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE)
+        );
+        assertThat(exc.getMessage(), containsString("Cannot update parameter [store]"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testValueFetcher() throws Exception {
+        for (boolean store : new boolean[] { true, false }) {
+            var mapperService = createMapperService(fieldMapping(store ? this::minimalStoreMapping : this::minimalMapping));
+            var mapper = mapperService.documentMapper();
+            try (Directory directory = newDirectory()) {
+                RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+                var sourceToParse = source(this::writeField);
+                ParsedDocument doc1 = mapper.parse(sourceToParse);
+                iw.addDocument(doc1.rootDoc());
+                iw.close();
+                try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
+                    LeafReader leafReader = getOnlyLeafReader(reader);
+                    var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(leafReader));
+                    var fieldType = mapper.mappers().getFieldType("field");
+                    var valueFetcher = fieldType.valueFetcher(searchContext, null);
+                    valueFetcher.setNextReader(leafReader.getContext());
+
+                    var source = Source.fromBytes(sourceToParse.source());
+                    var result = valueFetcher.fetchValues(source, 0, List.of());
+                    assertThat(result.size(), equalTo(1));
+                    assertThat(result.get(0), instanceOf(Map.class));
+                    assertThat(toFloats((Map<String, ?>) result.get(0)), equalTo(toFloats((Map<String, ?>) source.source().get("field"))));
+                }
+            }
+        }
+    }
+
     @Override
     protected Object generateRandomInputValue(MappedFieldType ft) {
         assumeFalse("Test implemented in a follow up", true);
@@ -205,7 +270,29 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) {
-        throw new AssumptionViolatedException("not supported");
+        boolean withStore = randomBoolean();
+        return new SyntheticSourceSupport() {
+            @Override
+            public boolean preservesExactSource() {
+                return withStore == false;
+            }
+
+            @Override
+            public SyntheticSourceExample example(int maxValues) {
+                return new SyntheticSourceExample(getSampleValueForDocument(), getSampleValueForDocument(), b -> {
+                    if (withStore) {
+                        minimalStoreMapping(b);
+                    } else {
+                        minimalMapping(b);
+                    }
+                });
+            }
+
+            @Override
+            public List<SyntheticSourceInvalidExample> invalidExample() {
+                return List.of();
+            }
+        };
     }
 
     @Override
@@ -276,4 +363,20 @@ public class SparseVectorFieldMapperTests extends MapperTestCase {
         })));
         assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X));
     }
+
+    /**
+     * Handles float/double conversion when reading/writing with xcontent by converting all numbers to floats.
+     */
+    private Map<String, Float> toFloats(Map<String, ?> value) {
+        // preserve order
+        Map<String, Float> result = new LinkedHashMap<>();
+        for (var entry : value.entrySet()) {
+            if (entry.getValue() instanceof Number num) {
+                result.put(entry.getKey(), num.floatValue());
+            } else {
+                throw new IllegalArgumentException("Expected Number, got: " + value.getClass().getSimpleName());
+            }
+        }
+        return result;
+    }
 }

+ 2 - 2
server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java

@@ -18,13 +18,13 @@ import java.util.Collections;
 public class SparseVectorFieldTypeTests extends FieldTypeTestCase {
 
     public void testDocValuesDisabled() {
-        MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap());
+        MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap());
         assertFalse(fieldType.hasDocValues());
         expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder(FieldDataContext.noRuntimeFields("test")));
     }
 
     public void testIsNotAggregatable() {
-        MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", Collections.emptyMap());
+        MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap());
         assertFalse(fieldType.isAggregatable());
     }
 }

+ 2 - 2
x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

@@ -7,7 +7,6 @@
 
 package org.elasticsearch.xpack.inference.mapper;
 
-import org.apache.lucene.document.FeatureField;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexableField;
@@ -47,6 +46,7 @@ import org.elasticsearch.index.mapper.ParsedDocument;
 import org.elasticsearch.index.mapper.SourceToParse;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
+import org.elasticsearch.index.mapper.vectors.XFeatureField;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
 import org.elasticsearch.inference.Model;
@@ -1130,7 +1130,7 @@ public class SemanticTextFieldMapperTests extends MapperTestCase {
     private static void assertSparseFeatures(LuceneDocument doc, String fieldName, int expectedCount) {
         int count = 0;
         for (IndexableField field : doc.getFields()) {
-            if (field instanceof FeatureField featureField) {
+            if (field instanceof XFeatureField featureField) {
                 assertThat(featureField.name(), equalTo(fieldName));
                 ++count;
             }