Browse Source

Cut over stored fields to ZSTD for compression. (#103374)

This cuts over stored fields with `index.codec: best_speed` (default) to ZSTD with level 0 and blocks of at most 128 documents or 14kB, and `index.codec: best_compression` to ZSTD with level 3 and blocks of at most 2,048 documents or 240kB.

Compared with the current codecs, this would yield similar indexing speed, much better space efficiency and similar retrieval speed. Benchmarks on the `elastic/logs` track suggest 10% better storage efficiency and slightly faster ingestion.

The Lucene codec infrastructure records the codec on a per-segment basis and ensures that this change is backward-compatible. Segments will get progressively migrated to ZSTD as they get merged in the background.

Bindings for ZSTD are provided by the Panama FFI API on JDK21+ and JNA on older JDKs.

ZSTD support is currently behind a feature flag, so it won't be enabled immediately when this feature gets merged, this will need a follow-up change.

Co-authored-by: Mark Vieira <portugee@gmail.com>
Co-authored-by: Ryan Ernst <ryan@iernst.net>
Adrien Grand 1 year ago
parent
commit
49ffa045a6

+ 16 - 0
docs/changelog/103374.yaml

@@ -0,0 +1,16 @@
+pr: 103374
+summary: Cut over stored fields to ZSTD for compression
+area: Search
+type: enhancement
+issues: []
+highlight:
+  title: Stored fields are now compressed with ZStandard instead of LZ4/DEFLATE
+  body: |-
+    Stored fields are now compressed by splitting documents into blocks, which
+    are then compressed independently with ZStandard. `index.codec: default`
+    (default) uses blocks of at most 14kB or 128 documents compressed with level
+    0, while `index.codec: best_compression` uses blocks of at most 240kB or
+    2048 documents compressed at level 3. On most datasets that we tested
+    against, this yielded storage improvements in the order of 10%, slightly
+    faster indexing and similar retrieval latencies.
+  notable: true

+ 3 - 0
server/src/main/java/module-info.java

@@ -6,6 +6,7 @@
  * Side Public License, v 1.
  */
 
+import org.elasticsearch.index.codec.Elasticsearch814Codec;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
 import org.elasticsearch.plugins.internal.RestExtension;
 
@@ -243,6 +244,7 @@ module org.elasticsearch.server {
     exports org.elasticsearch.index.codec;
     exports org.elasticsearch.index.codec.tsdb;
     exports org.elasticsearch.index.codec.bloomfilter;
+    exports org.elasticsearch.index.codec.zstd;
     exports org.elasticsearch.index.engine;
     exports org.elasticsearch.index.fielddata;
     exports org.elasticsearch.index.fielddata.fieldcomparator;
@@ -433,6 +435,7 @@ module org.elasticsearch.server {
         with
             org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,
             org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
+    provides org.apache.lucene.codecs.Codec with Elasticsearch814Codec;
 
     exports org.elasticsearch.cluster.routing.allocation.shards
         to

+ 25 - 5
server/src/main/java/org/elasticsearch/index/codec/CodecService.java

@@ -11,7 +11,9 @@ package org.elasticsearch.index.codec;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.FeatureFlag;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
 import org.elasticsearch.index.mapper.MapperService;
 
 import java.util.HashMap;
@@ -25,22 +27,40 @@ import java.util.Map;
  */
 public class CodecService {
 
+    public static final FeatureFlag ZSTD_STORED_FIELDS_FEATURE_FLAG = new FeatureFlag("zstd_stored_fields");
+
     private final Map<String, Codec> codecs;
 
     public static final String DEFAULT_CODEC = "default";
+    public static final String LEGACY_DEFAULT_CODEC = "legacy_default"; // escape hatch
     public static final String BEST_COMPRESSION_CODEC = "best_compression";
+    public static final String LEGACY_BEST_COMPRESSION_CODEC = "legacy_best_compression"; // escape hatch
+
     /** the raw unfiltered lucene default. useful for testing */
     public static final String LUCENE_DEFAULT_CODEC = "lucene_default";
 
     public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
         final var codecs = new HashMap<String, Codec>();
-        if (mapperService == null) {
-            codecs.put(DEFAULT_CODEC, new Lucene99Codec());
-            codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Lucene99Codec.Mode.BEST_COMPRESSION));
+
+        Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays);
+        if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
+            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
         } else {
-            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays));
-            codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays));
+            codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
         }
+        codecs.put(LEGACY_DEFAULT_CODEC, legacyBestSpeedCodec);
+
+        Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
+        if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
+            codecs.put(
+                BEST_COMPRESSION_CODEC,
+                new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
+            );
+        } else {
+            codecs.put(BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);
+        }
+        codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);
+
         codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
         for (String codec : Codec.availableCodecs()) {
             codecs.put(codec, Codec.forName(codec));

+ 130 - 0
server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java

@@ -0,0 +1,130 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99Codec;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
+
+/**
+ * Elasticsearch codec as of 8.14. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
+ * {@link Zstd814StoredFieldsFormat}.
+ */
+public class Elasticsearch814Codec extends FilterCodec {
+
+    private final StoredFieldsFormat storedFieldsFormat;
+
+    private final PostingsFormat defaultPostingsFormat;
+    private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            return Elasticsearch814Codec.this.getPostingsFormatForField(field);
+        }
+    };
+
+    private final DocValuesFormat defaultDVFormat;
+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return Elasticsearch814Codec.this.getDocValuesFormatForField(field);
+        }
+    };
+
+    private final KnnVectorsFormat defaultKnnVectorsFormat;
+    private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
+        @Override
+        public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+            return Elasticsearch814Codec.this.getKnnVectorsFormatForField(field);
+        }
+    };
+
+    /** Public no-arg constructor, needed for SPI loading at read-time. */
+    public Elasticsearch814Codec() {
+        this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
+    }
+
+    /**
+     * Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
+     * worse space-efficiency or vice-versa.
+     */
+    public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) {
+        super("Elasticsearch814", new Lucene99Codec());
+        this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
+        this.defaultPostingsFormat = new Lucene99PostingsFormat();
+        this.defaultDVFormat = new Lucene90DocValuesFormat();
+        this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+    }
+
+    @Override
+    public StoredFieldsFormat storedFieldsFormat() {
+        return storedFieldsFormat;
+    }
+
+    @Override
+    public final PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
+
+    @Override
+    public final DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
+
+    @Override
+    public final KnnVectorsFormat knnVectorsFormat() {
+        return knnVectorsFormat;
+    }
+
+    /**
+     * Returns the postings format that should be used for writing new segments of <code>field</code>.
+     *
+     * <p>The default implementation always returns "Lucene99".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation,
+     */
+    public PostingsFormat getPostingsFormatForField(String field) {
+        return defaultPostingsFormat;
+    }
+
+    /**
+     * Returns the docvalues format that should be used for writing new segments of <code>field</code>
+     * .
+     *
+     * <p>The default implementation always returns "Lucene99".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+        return defaultDVFormat;
+    }
+
+    /**
+     * Returns the vectors format that should be used for writing new segments of <code>field</code>
+     *
+     * <p>The default implementation always returns "Lucene95".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        return defaultKnnVectorsFormat;
+    }
+}

+ 52 - 0
server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java

@@ -0,0 +1,52 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99Codec;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.mapper.MapperService;
+
+/**
+ * Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
+ * changes in {@link PerFieldMapperCodec}.
+ */
+public final class LegacyPerFieldMapperCodec extends Lucene99Codec {
+
+    private final PerFieldFormatSupplier formatSupplier;
+
+    public LegacyPerFieldMapperCodec(Lucene99Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
+        super(compressionMode);
+        this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
+        // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
+        // codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
+        assert Codec.forName(Lucene.LATEST_CODEC).getClass() == getClass().getSuperclass()
+            : "LegacyPerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
+    }
+
+    @Override
+    public PostingsFormat getPostingsFormatForField(String field) {
+        return formatSupplier.getPostingsFormatForField(field);
+    }
+
+    @Override
+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        return formatSupplier.getKnnVectorsFormatForField(field);
+    }
+
+    @Override
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+        return formatSupplier.getDocValuesFormatForField(field);
+    }
+
+}

+ 123 - 0
server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

@@ -0,0 +1,123 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
+import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
+import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
+
+import java.util.Objects;
+
+/**
+ * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
+ * vectors.
+ */
+public class PerFieldFormatSupplier {
+
+    private final MapperService mapperService;
+    private final BigArrays bigArrays;
+    private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
+    private final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
+    private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
+    private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
+
+    private final ES812PostingsFormat es812PostingsFormat;
+
+    public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays) {
+        this.mapperService = mapperService;
+        this.bigArrays = Objects.requireNonNull(bigArrays);
+        this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
+        this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
+        this.es812PostingsFormat = new ES812PostingsFormat();
+    }
+
+    public PostingsFormat getPostingsFormatForField(String field) {
+        if (useBloomFilter(field)) {
+            return bloomFilterPostingsFormat;
+        }
+        return internalGetPostingsFormatForField(field);
+    }
+
+    private PostingsFormat internalGetPostingsFormatForField(String field) {
+        if (mapperService != null) {
+            final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
+            if (format != null) {
+                return format;
+            }
+        }
+        // return our own posting format using PFOR
+        return es812PostingsFormat;
+    }
+
+    boolean useBloomFilter(String field) {
+        if (mapperService == null) {
+            return false;
+        }
+        IndexSettings indexSettings = mapperService.getIndexSettings();
+        if (mapperService.mappingLookup().isDataStreamTimestampFieldEnabled()) {
+            // In case for time series indices, the _id isn't randomly generated,
+            // but based on dimension fields and timestamp field, so during indexing
+            // version/seq_no/term needs to be looked up and having a bloom filter
+            // can speed this up significantly.
+            return indexSettings.getMode() == IndexMode.TIME_SERIES
+                && IdFieldMapper.NAME.equals(field)
+                && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
+        } else {
+            return IdFieldMapper.NAME.equals(field) && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
+        }
+    }
+
+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        if (mapperService != null) {
+            Mapper mapper = mapperService.mappingLookup().getMapper(field);
+            if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
+                return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat);
+            }
+        }
+        return knnVectorsFormat;
+    }
+
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+        if (useTSDBDocValuesFormat(field)) {
+            return tsdbDocValuesFormat;
+        }
+        return docValuesFormat;
+    }
+
+    boolean useTSDBDocValuesFormat(final String field) {
+        if (excludeFields(field)) {
+            return false;
+        }
+
+        return mapperService != null && isTimeSeriesModeIndex() && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
+    }
+
+    private boolean excludeFields(String fieldName) {
+        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
+        // But _tsid and _ts_routing_hash should always use the tsdb codec.
+        return fieldName.startsWith("_") && fieldName.equals("_tsid") == false && fieldName.equals("_ts_routing_hash") == false;
+    }
+
+    private boolean isTimeSeriesModeIndex() {
+        return mapperService != null && IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
+    }
+
+}

+ 12 - 82
server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java

@@ -12,19 +12,10 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.util.BigArrays;
-import org.elasticsearch.index.IndexMode;
-import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
-import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
-import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
-import org.elasticsearch.index.mapper.IdFieldMapper;
-import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
 import org.elasticsearch.index.mapper.MapperService;
-import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 
 /**
  * {@link PerFieldMapperCodec This Lucene codec} provides the default
@@ -34,93 +25,32 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
  * per index in real time via the mapping API. If no specific postings format or vector format is
  * configured for a specific field the default postings or vector format is used.
  */
-public final class PerFieldMapperCodec extends Lucene99Codec {
+public final class PerFieldMapperCodec extends Elasticsearch814Codec {
 
-    private final MapperService mapperService;
-    private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
-    private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
-    private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
+    private final PerFieldFormatSupplier formatSupplier;
 
-    private final ES812PostingsFormat es812PostingsFormat;
-
-    static {
-        assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class)
-            : "PerFieldMapperCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
-    }
-
-    public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
+    public PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
         super(compressionMode);
-        this.mapperService = mapperService;
-        this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
-        this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
-        this.es812PostingsFormat = new ES812PostingsFormat();
+        this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
+        // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
+        // codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
+        assert Codec.forName(Lucene.LATEST_CODEC).getClass() == delegate.getClass()
+            : "PerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
     }
 
     @Override
     public PostingsFormat getPostingsFormatForField(String field) {
-        if (useBloomFilter(field)) {
-            return bloomFilterPostingsFormat;
-        }
-        return internalGetPostingsFormatForField(field);
-    }
-
-    private PostingsFormat internalGetPostingsFormatForField(String field) {
-        final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
-        if (format != null) {
-            return format;
-        }
-        // return our own posting format using PFOR
-        return es812PostingsFormat;
-    }
-
-    boolean useBloomFilter(String field) {
-        IndexSettings indexSettings = mapperService.getIndexSettings();
-        if (mapperService.mappingLookup().isDataStreamTimestampFieldEnabled()) {
-            // In case for time series indices, they _id isn't randomly generated,
-            // but based on dimension fields and timestamp field, so during indexing
-            // version/seq_no/term needs to be looked up and having a bloom filter
-            // can speed this up significantly.
-            return indexSettings.getMode() == IndexMode.TIME_SERIES
-                && IdFieldMapper.NAME.equals(field)
-                && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
-        } else {
-            return IdFieldMapper.NAME.equals(field) && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
-        }
+        return formatSupplier.getPostingsFormatForField(field);
     }
 
     @Override
     public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
-        Mapper mapper = mapperService.mappingLookup().getMapper(field);
-        if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
-            return vectorMapper.getKnnVectorsFormatForField(super.getKnnVectorsFormatForField(field));
-        }
-        return super.getKnnVectorsFormatForField(field);
+        return formatSupplier.getKnnVectorsFormatForField(field);
     }
 
     @Override
     public DocValuesFormat getDocValuesFormatForField(String field) {
-        if (useTSDBDocValuesFormat(field)) {
-            return tsdbDocValuesFormat;
-        }
-        return docValuesFormat;
-    }
-
-    boolean useTSDBDocValuesFormat(final String field) {
-        if (excludeFields(field)) {
-            return false;
-        }
-
-        return mapperService != null && isTimeSeriesModeIndex() && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
-    }
-
-    private boolean excludeFields(String fieldName) {
-        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
-        // But _tsid and _ts_routing_hash should always use the tsdb codec.
-        return fieldName.startsWith("_") && fieldName.equals("_tsid") == false && fieldName.equals("_ts_routing_hash") == false;
-    }
-
-    private boolean isTimeSeriesModeIndex() {
-        return IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
+        return formatSupplier.getDocValuesFormatForField(field);
     }
 
 }

+ 212 - 0
server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java

@@ -0,0 +1,212 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec.zstd;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.compressing.CompressionMode;
+import org.apache.lucene.codecs.compressing.Compressor;
+import org.apache.lucene.codecs.compressing.Decompressor;
+import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ByteBuffersDataInput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.nativeaccess.CloseableByteBuffer;
+import org.elasticsearch.nativeaccess.NativeAccess;
+import org.elasticsearch.nativeaccess.Zstd;
+
+import java.io.IOException;
+
+/**
+ * {@link org.apache.lucene.codecs.StoredFieldsFormat} that compresses blocks of data using ZStandard.
+ *
+ * Unlike Lucene's default stored fields format, this format does not make use of dictionaries (even though ZStandard has great support for
+ * dictionaries!). This is mostly due to the fact that LZ4/DEFLATE have short sliding windows that they can use to find duplicate strings
+ * (64kB and 32kB respectively). In contrast, ZSTD doesn't have such a limitation and can better take advantage of large compression
+ * buffers.
+ */
+public final class Zstd814StoredFieldsFormat extends Lucene90CompressingStoredFieldsFormat {
+
+    // ZSTD has special optimizations for inputs that are less than 16kB and less than 256kB. So subtract a bit of memory from 16kB and
+    // 256kB to make our inputs unlikely to grow beyond 16kB for BEST_SPEED and 256kB for BEST_COMPRESSION.
+    private static final int BEST_SPEED_BLOCK_SIZE = (16 - 2) * 1_024;
+    private static final int BEST_COMPRESSION_BLOCK_SIZE = (256 - 16) * 1_024;
+
+    /** Attribute key for compression mode. */
+    public static final String MODE_KEY = Zstd814StoredFieldsFormat.class.getSimpleName() + ".mode";
+
+    public enum Mode {
+        BEST_SPEED(0, BEST_SPEED_BLOCK_SIZE, 128),
+        BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048);
+
+        final int level, blockSizeInBytes, blockDocCount;
+
+        Mode(int level, int blockSizeInBytes, int blockDocCount) {
+            this.level = level;
+            this.blockSizeInBytes = blockSizeInBytes;
+            this.blockDocCount = blockDocCount;
+        }
+    }
+
+    private final Mode mode;
+
+    public Zstd814StoredFieldsFormat(Mode mode) {
+        super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10);
+        this.mode = mode;
+    }
+
+    @Override
+    public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
+        // Both modes are compatible, we only put an attribute for debug purposes.
+        String previous = si.putAttribute(MODE_KEY, mode.name());
+        if (previous != null && previous.equals(mode.name()) == false) {
+            throw new IllegalStateException(
+                "found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name()
+            );
+        }
+        return super.fieldsWriter(directory, si, context);
+    }
+
+    private static class ZstdCompressionMode extends CompressionMode {
+        private final int level;
+
+        ZstdCompressionMode(int level) {
+            this.level = level;
+        }
+
+        @Override
+        public Compressor newCompressor() {
+            return new ZstdCompressor(level);
+        }
+
+        @Override
+        public Decompressor newDecompressor() {
+            return new ZstdDecompressor();
+        }
+
+        @Override
+        public String toString() {
+            return "ZSTD(level=" + level + ")";
+        }
+    }
+
+    private static final class ZstdDecompressor extends Decompressor {
+
+        // Buffer for copying between the DataInput and native memory. No hard science behind this number, it just tries to be high enough
+        // to benefit from bulk copying and low enough to keep heap usage under control.
+        final byte[] copyBuffer = new byte[4096];
+
+        ZstdDecompressor() {}
+
+        @Override
+        public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
+            if (originalLength == 0) {
+                bytes.offset = 0;
+                bytes.length = 0;
+                return;
+            }
+
+            final NativeAccess nativeAccess = NativeAccess.instance();
+            final Zstd zstd = nativeAccess.getZstd();
+
+            final int compressedLength = in.readVInt();
+
+            try (
+                CloseableByteBuffer src = nativeAccess.newBuffer(compressedLength);
+                CloseableByteBuffer dest = nativeAccess.newBuffer(originalLength)
+            ) {
+
+                while (src.buffer().position() < compressedLength) {
+                    final int numBytes = Math.min(copyBuffer.length, compressedLength - src.buffer().position());
+                    in.readBytes(copyBuffer, 0, numBytes);
+                    src.buffer().put(copyBuffer, 0, numBytes);
+                }
+                src.buffer().flip();
+
+                final int decompressedLen = zstd.decompress(dest, src);
+                if (decompressedLen != originalLength) {
+                    throw new CorruptIndexException("Expected " + originalLength + " decompressed bytes, got " + decompressedLen, in);
+                }
+
+                bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, length);
+                dest.buffer().get(offset, bytes.bytes, 0, length);
+                bytes.offset = 0;
+                bytes.length = length;
+            }
+        }
+
+        @Override
+        public Decompressor clone() {
+            return new ZstdDecompressor();
+        }
+    }
+
+    private static class ZstdCompressor extends Compressor {
+
+        final int level;
+        // Buffer for copying between the DataInput and native memory. No hard science behind this number, it just tries to be high enough
+        // to benefit from bulk copying and low enough to keep heap usage under control.
+        final byte[] copyBuffer = new byte[4096];
+
+        ZstdCompressor(int level) {
+            this.level = level;
+        }
+
+        @Override
+        public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException {
+            final NativeAccess nativeAccess = NativeAccess.instance();
+            final Zstd zstd = nativeAccess.getZstd();
+
+            final int srcLen = Math.toIntExact(buffersInput.length());
+            if (srcLen == 0) {
+                return;
+            }
+
+            final int compressBound = zstd.compressBound(srcLen);
+
+            // NOTE: We are allocating/deallocating native buffers on each call. We could save allocations by reusing these buffers, though
+            // this would come at the expense of higher permanent memory usage. Benchmarks suggested that there is some performance to save
+            // there, but it wouldn't be a game changer either.
+            // Also note that calls to #compress implicitly allocate memory under the hood for e.g. hash tables and chain tables that help
+            // identify duplicate strings. So if we wanted to avoid allocating memory on every compress call, we should also look into
+            // reusing compression contexts, which are not small and would increase permanent memory usage as well.
+            try (
+                CloseableByteBuffer src = nativeAccess.newBuffer(srcLen);
+                CloseableByteBuffer dest = nativeAccess.newBuffer(compressBound)
+            ) {
+
+                while (buffersInput.position() < buffersInput.length()) {
+                    final int numBytes = Math.min(copyBuffer.length, (int) (buffersInput.length() - buffersInput.position()));
+                    buffersInput.readBytes(copyBuffer, 0, numBytes);
+                    src.buffer().put(copyBuffer, 0, numBytes);
+                }
+                src.buffer().flip();
+
+                final int compressedLen = zstd.compress(dest, src, level);
+                out.writeVInt(compressedLen);
+
+                for (int written = 0; written < compressedLen;) {
+                    final int numBytes = Math.min(copyBuffer.length, compressedLen - written);
+                    dest.buffer().get(copyBuffer, 0, numBytes);
+                    out.writeBytes(copyBuffer, 0, numBytes);
+                    written += numBytes;
+                    assert written == dest.buffer().position();
+                }
+            }
+        }
+
+        @Override
+        public void close() throws IOException {}
+    }
+}

+ 1 - 0
server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec

@@ -0,0 +1 @@
+org.elasticsearch.index.codec.Elasticsearch814Codec

+ 39 - 21
server/src/test/java/org/elasticsearch/index/codec/CodecTests.java

@@ -12,10 +12,11 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.KeywordField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs;
 import org.elasticsearch.TransportVersion;
@@ -31,6 +32,7 @@ import org.elasticsearch.plugins.MapperPlugin;
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
+import org.hamcrest.Matchers;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -43,35 +45,51 @@ public class CodecTests extends ESTestCase {
     public void testResolveDefaultCodecs() throws Exception {
         CodecService codecService = createCodecService();
         assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class));
-        assertThat(codecService.codec("default"), instanceOf(Lucene99Codec.class));
+        assertThat(codecService.codec("default"), instanceOf(Elasticsearch814Codec.class));
     }
 
     public void testDefault() throws Exception {
         Codec codec = createCodecService().codec("default");
-        assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec);
+        assertEquals(
+            "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=0), chunkSize=14336, maxDocsPerChunk=128, blockShift=10)",
+            codec.storedFieldsFormat().toString()
+        );
     }
 
     public void testBestCompression() throws Exception {
         Codec codec = createCodecService().codec("best_compression");
-        assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec);
+        assertEquals(
+            "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=3), chunkSize=245760, maxDocsPerChunk=2048, blockShift=10)",
+            codec.storedFieldsFormat().toString()
+        );
+    }
+
+    public void testLegacyDefault() throws Exception {
+        Codec codec = createCodecService().codec("legacy_default");
+        assertThat(codec, Matchers.instanceOf(Lucene99Codec.class));
+        assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class));
+        // Make sure the legacy codec is writable
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) {
+            Document doc = new Document();
+            doc.add(new KeywordField("string_field", "abc", Field.Store.YES));
+            doc.add(new IntField("int_field", 42, Field.Store.YES));
+            w.addDocument(doc);
+            try (DirectoryReader r = DirectoryReader.open(w)) {}
+        }
     }
 
-    // write some docs with it, inspect .si to see this was the used compression
-    private void assertStoredFieldsCompressionEquals(Lucene99Codec.Mode expected, Codec actual) throws Exception {
-        Directory dir = newDirectory();
-        IndexWriterConfig iwc = newIndexWriterConfig(null);
-        iwc.setCodec(actual);
-        IndexWriter iw = new IndexWriter(dir, iwc);
-        iw.addDocument(new Document());
-        iw.commit();
-        iw.close();
-        DirectoryReader ir = DirectoryReader.open(dir);
-        SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
-        String v = sr.getSegmentInfo().info.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY);
-        assertNotNull(v);
-        assertEquals(expected, Lucene99Codec.Mode.valueOf(v));
-        ir.close();
-        dir.close();
+    public void testLegacyBestCompression() throws Exception {
+        Codec codec = createCodecService().codec("legacy_best_compression");
+        assertThat(codec, Matchers.instanceOf(Lucene99Codec.class));
+        assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class));
+        // Make sure the legacy codec is writable
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) {
+            Document doc = new Document();
+            doc.add(new KeywordField("string_field", "abc", Field.Store.YES));
+            doc.add(new IntField("int_field", 42, Field.Store.YES));
+            w.addDocument(doc);
+            try (DirectoryReader r = DirectoryReader.open(w)) {}
+        }
     }
 
     private CodecService createCodecService() throws IOException {

+ 17 - 16
server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java

@@ -8,7 +8,6 @@
 
 package org.elasticsearch.index.codec;
 
-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.compress.CompressedXContent;
 import org.elasticsearch.common.settings.Settings;
@@ -63,7 +62,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
         """;
 
     public void testUseBloomFilter() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, randomBoolean(), false);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, randomBoolean(), false);
         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES87BloomFilterPostingsFormat.class));
         assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
@@ -71,7 +70,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
     }
 
     public void testUseBloomFilterWithTimestampFieldEnabled() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, false);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, false);
         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES87BloomFilterPostingsFormat.class));
         assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
@@ -79,13 +78,13 @@ public class PerFieldMapperCodecTests extends ESTestCase {
     }
 
     public void testUseBloomFilterWithTimestampFieldEnabled_noTimeSeriesMode() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, false);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, false);
         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES812PostingsFormat.class));
     }
 
     public void testUseBloomFilterWithTimestampFieldEnabled_disableBloomFilter() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, true);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, true);
         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES812PostingsFormat.class));
         assertWarnings(
@@ -94,28 +93,29 @@ public class PerFieldMapperCodecTests extends ESTestCase {
     }
 
     public void testUseES87TSDBEncodingForTimestampField() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, true);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, true);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
     }
 
     public void testDoNotUseES87TSDBEncodingForTimestampFieldNonTimeSeriesIndex() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, true);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, true);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
     }
 
     public void testEnableES87TSDBCodec() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, MAPPING_1);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, MAPPING_1);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(true));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
     }
 
     public void testDisableES87TSDBCodec() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, true, MAPPING_1);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, true, MAPPING_1);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
     }
 
-    private PerFieldMapperCodec createCodec(boolean timestampField, boolean timeSeries, boolean disableBloomFilter) throws IOException {
+    private PerFieldFormatSupplier createFormatSupplier(boolean timestampField, boolean timeSeries, boolean disableBloomFilter)
+        throws IOException {
         Settings.Builder settings = Settings.builder();
         if (timeSeries) {
             settings.put(IndexSettings.MODE.getKey(), "time_series");
@@ -140,31 +140,32 @@ public class PerFieldMapperCodecTests extends ESTestCase {
                 """;
             mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
         }
-        return new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE);
+        return new PerFieldFormatSupplier(mapperService, BigArrays.NON_RECYCLING_INSTANCE);
     }
 
     public void testUseES87TSDBEncodingSettingDisabled() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, true, MAPPING_2);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, true, MAPPING_2);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(false));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
     }
 
     public void testUseTimeSeriesModeDisabledCodecDisabled() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, MAPPING_2);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, MAPPING_2);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(false));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
     }
 
     public void testUseTimeSeriesModeAndCodecEnabled() throws IOException {
-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, MAPPING_2);
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, MAPPING_2);
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(true));
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(true));
     }
 
-    private PerFieldMapperCodec createCodec(boolean enableES87TSDBCodec, boolean timeSeries, String mapping) throws IOException {
+    private PerFieldFormatSupplier createFormatSupplier(boolean enableES87TSDBCodec, boolean timeSeries, String mapping)
+        throws IOException {
         Settings.Builder settings = Settings.builder();
         if (timeSeries) {
             settings.put(IndexSettings.MODE.getKey(), "time_series");
@@ -173,7 +174,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
         settings.put(IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING.getKey(), enableES87TSDBCodec);
         MapperService mapperService = MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(), settings.build(), "test");
         mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
-        return new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE);
+        return new PerFieldFormatSupplier(mapperService, BigArrays.NON_RECYCLING_INSTANCE);
     }
 
 }

+ 23 - 0
server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java

@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec.zstd;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
+import org.elasticsearch.index.codec.Elasticsearch814Codec;
+
+public class Zstd814BestCompressionStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {
+
+    private final Codec codec = new Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION);
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+}

+ 23 - 0
server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java

@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec.zstd;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
+import org.elasticsearch.index.codec.Elasticsearch814Codec;
+
+public class Zstd814BestSpeedStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {
+
+    private final Codec codec = new Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+}

+ 2 - 2
test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java

@@ -10,7 +10,6 @@ package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -43,6 +42,7 @@ import org.elasticsearch.index.analysis.NameOrDefinition;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
 import org.elasticsearch.index.codec.PerFieldMapperCodec;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.IndexFieldDataCache;
@@ -243,7 +243,7 @@ public abstract class MapperServiceTestCase extends FieldTypeTestCase {
         CheckedConsumer<DirectoryReader, IOException> test
     ) throws IOException {
         IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService)).setCodec(
-            new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE)
+            new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE)
         );
         try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc)) {
             builder.accept(iw);