1 year ago · 49ffa045a6
--- a/docs/changelog/103374.yaml
+++ b/docs/changelog/103374.yaml
@@ -0,0 +1,16 @@
 
				+pr: 103374
			
 
				+summary: Cut over stored fields to ZSTD for compression
			
 
				+area: Search
			
 
				+type: enhancement
			
 
				+issues: []
			
 
				+highlight:
			
 
				+  title: Stored fields are now compressed with ZStandard instead of LZ4/DEFLATE
			
 
				+  body: |-
			
 
				+    Stored fields are now compressed by splitting documents into blocks, which
			
 
				+    are then compressed independently with ZStandard. `index.codec: default`
			
 
				+    (default) uses blocks of at most 14kB or 128 documents compressed with level
			
 
				+    0, while `index.codec: best_compression` uses blocks of at most 240kB or
			
 
				+    2048 documents compressed at level 3. On most datasets that we tested
			
 
				+    against, this yielded storage improvements in the order of 10%, slightly
			
 
				+    faster indexing and similar retrieval latencies.
			
 
				+  notable: true
			
--- a/server/src/main/java/module-info.java
+++ b/server/src/main/java/module-info.java
@@ -6,6 +6,7 @@
 
				  * Side Public License, v 1.
			
 
				  */
			
 
				 
			
 
				+import org.elasticsearch.index.codec.Elasticsearch814Codec;
			
 
				 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
			
 
				 import org.elasticsearch.plugins.internal.RestExtension;
			
 
				 
			
@@ -243,6 +244,7 @@ module org.elasticsearch.server {
 
				     exports org.elasticsearch.index.codec;
			
 
				     exports org.elasticsearch.index.codec.tsdb;
			
 
				     exports org.elasticsearch.index.codec.bloomfilter;
			
 
				+    exports org.elasticsearch.index.codec.zstd;
			
 
				     exports org.elasticsearch.index.engine;
			
 
				     exports org.elasticsearch.index.fielddata;
			
 
				     exports org.elasticsearch.index.fielddata.fieldcomparator;
			
@@ -433,6 +435,7 @@ module org.elasticsearch.server {
 
				         with
			
 
				             org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,
			
 
				             org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
			
 
				+    provides org.apache.lucene.codecs.Codec with Elasticsearch814Codec;
			
 
				 
			
 
				     exports org.elasticsearch.cluster.routing.allocation.shards
			
 
				         to
			
--- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
@@ -11,7 +11,9 @@ package org.elasticsearch.index.codec;
 
				 import org.apache.lucene.codecs.Codec;
			
 
				 import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				 import org.elasticsearch.common.util.BigArrays;
			
 
				+import org.elasticsearch.common.util.FeatureFlag;
			
 
				 import org.elasticsearch.core.Nullable;
			
 
				+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
			
 
				 import org.elasticsearch.index.mapper.MapperService;
			
 
				 
			
 
				 import java.util.HashMap;
			
@@ -25,22 +27,40 @@ import java.util.Map;
 
				  */
			
 
				 public class CodecService {
			
 
				 
			
 
				+    public static final FeatureFlag ZSTD_STORED_FIELDS_FEATURE_FLAG = new FeatureFlag("zstd_stored_fields");
			
 
				+
			
 
				     private final Map<String, Codec> codecs;
			
 
				 
			
 
				     public static final String DEFAULT_CODEC = "default";
			
 
				+    public static final String LEGACY_DEFAULT_CODEC = "legacy_default"; // escape hatch
			
 
				     public static final String BEST_COMPRESSION_CODEC = "best_compression";
			
 
				+    public static final String LEGACY_BEST_COMPRESSION_CODEC = "legacy_best_compression"; // escape hatch
			
 
				+
			
 
				     /** the raw unfiltered lucene default. useful for testing */
			
 
				     public static final String LUCENE_DEFAULT_CODEC = "lucene_default";
			
 
				 
			
 
				     public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
			
 
				         final var codecs = new HashMap<String, Codec>();
			
 
				-        if (mapperService == null) {
			
 
				-            codecs.put(DEFAULT_CODEC, new Lucene99Codec());
			
 
				-            codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Lucene99Codec.Mode.BEST_COMPRESSION));
			
 
				+
			
 
				+        Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays);
			
 
				+        if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
			
 
				+            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
			
 
				         } else {
			
 
				-            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays));
			
 
				-            codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays));
			
 
				+            codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
			
 
				         }
			
 
				+        codecs.put(LEGACY_DEFAULT_CODEC, legacyBestSpeedCodec);
			
 
				+
			
 
				+        Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
			
 
				+        if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
			
 
				+            codecs.put(
			
 
				+                BEST_COMPRESSION_CODEC,
			
 
				+                new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
			
 
				+            );
			
 
				+        } else {
			
 
				+            codecs.put(BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);
			
 
				+        }
			
 
				+        codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);
			
 
				+
			
 
				         codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
			
 
				         for (String codec : Codec.availableCodecs()) {
			
 
				             codecs.put(codec, Codec.forName(codec));
			
--- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java
@@ -0,0 +1,130 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec;
			
 
				+
			
 
				+import org.apache.lucene.codecs.DocValuesFormat;
			
 
				+import org.apache.lucene.codecs.FilterCodec;
			
 
				+import org.apache.lucene.codecs.KnnVectorsFormat;
			
 
				+import org.apache.lucene.codecs.PostingsFormat;
			
 
				+import org.apache.lucene.codecs.StoredFieldsFormat;
			
 
				+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
			
 
				+import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
			
 
				+import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
			
 
				+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
			
 
				+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
			
 
				+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
			
 
				+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
			
 
				+
			
 
				+/**
			
 
				+ * Elasticsearch codec as of 8.14. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
			
 
				+ * {@link Zstd814StoredFieldsFormat}.
			
 
				+ */
			
 
				+public class Elasticsearch814Codec extends FilterCodec {
			
 
				+
			
 
				+    private final StoredFieldsFormat storedFieldsFormat;
			
 
				+
			
 
				+    private final PostingsFormat defaultPostingsFormat;
			
 
				+    private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
			
 
				+        @Override
			
 
				+        public PostingsFormat getPostingsFormatForField(String field) {
			
 
				+            return Elasticsearch814Codec.this.getPostingsFormatForField(field);
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    private final DocValuesFormat defaultDVFormat;
			
 
				+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
			
 
				+        @Override
			
 
				+        public DocValuesFormat getDocValuesFormatForField(String field) {
			
 
				+            return Elasticsearch814Codec.this.getDocValuesFormatForField(field);
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    private final KnnVectorsFormat defaultKnnVectorsFormat;
			
 
				+    private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
			
 
				+        @Override
			
 
				+        public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
			
 
				+            return Elasticsearch814Codec.this.getKnnVectorsFormatForField(field);
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    /** Public no-arg constructor, needed for SPI loading at read-time. */
			
 
				+    public Elasticsearch814Codec() {
			
 
				+        this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
			
 
				+     * worse space-efficiency or vice-versa.
			
 
				+     */
			
 
				+    public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) {
			
 
				+        super("Elasticsearch814", new Lucene99Codec());
			
 
				+        this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
			
 
				+        this.defaultPostingsFormat = new Lucene99PostingsFormat();
			
 
				+        this.defaultDVFormat = new Lucene90DocValuesFormat();
			
 
				+        this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public StoredFieldsFormat storedFieldsFormat() {
			
 
				+        return storedFieldsFormat;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public final PostingsFormat postingsFormat() {
			
 
				+        return postingsFormat;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public final DocValuesFormat docValuesFormat() {
			
 
				+        return docValuesFormat;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public final KnnVectorsFormat knnVectorsFormat() {
			
 
				+        return knnVectorsFormat;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Returns the postings format that should be used for writing new segments of <code>field</code>.
			
 
				+     *
			
 
				+     * <p>The default implementation always returns "Lucene99".
			
 
				+     *
			
 
				+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
			
 
				+     * future version of Lucene are only guaranteed to be able to read the default implementation,
			
 
				+     */
			
 
				+    public PostingsFormat getPostingsFormatForField(String field) {
			
 
				+        return defaultPostingsFormat;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Returns the docvalues format that should be used for writing new segments of <code>field</code>
			
 
				+     * .
			
 
				+     *
			
 
				+     * <p>The default implementation always returns "Lucene99".
			
 
				+     *
			
 
				+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
			
 
				+     * future version of Lucene are only guaranteed to be able to read the default implementation.
			
 
				+     */
			
 
				+    public DocValuesFormat getDocValuesFormatForField(String field) {
			
 
				+        return defaultDVFormat;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Returns the vectors format that should be used for writing new segments of <code>field</code>
			
 
				+     *
			
 
				+     * <p>The default implementation always returns "Lucene95".
			
 
				+     *
			
 
				+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
			
 
				+     * future version of Lucene are only guaranteed to be able to read the default implementation.
			
 
				+     */
			
 
				+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
			
 
				+        return defaultKnnVectorsFormat;
			
 
				+    }
			
 
				+}
			
--- a/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java
@@ -0,0 +1,52 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec;
			
 
				+
			
 
				+import org.apache.lucene.codecs.Codec;
			
 
				+import org.apache.lucene.codecs.DocValuesFormat;
			
 
				+import org.apache.lucene.codecs.KnnVectorsFormat;
			
 
				+import org.apache.lucene.codecs.PostingsFormat;
			
 
				+import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				+import org.elasticsearch.common.lucene.Lucene;
			
 
				+import org.elasticsearch.common.util.BigArrays;
			
 
				+import org.elasticsearch.index.mapper.MapperService;
			
 
				+
			
 
				+/**
			
 
				+ * Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
			
 
				+ * changes in {@link PerFieldMapperCodec}.
			
 
				+ */
			
 
				+public final class LegacyPerFieldMapperCodec extends Lucene99Codec {
			
 
				+
			
 
				+    private final PerFieldFormatSupplier formatSupplier;
			
 
				+
			
 
				+    public LegacyPerFieldMapperCodec(Lucene99Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
			
 
				+        super(compressionMode);
			
 
				+        this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
			
 
				+        // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
			
 
				+        // codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
			
 
				+        assert Codec.forName(Lucene.LATEST_CODEC).getClass() == getClass().getSuperclass()
			
 
				+            : "LegacyPerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public PostingsFormat getPostingsFormatForField(String field) {
			
 
				+        return formatSupplier.getPostingsFormatForField(field);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
			
 
				+        return formatSupplier.getKnnVectorsFormatForField(field);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public DocValuesFormat getDocValuesFormatForField(String field) {
			
 
				+        return formatSupplier.getDocValuesFormatForField(field);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -0,0 +1,123 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec;
			
 
				+
			
 
				+import org.apache.lucene.codecs.DocValuesFormat;
			
 
				+import org.apache.lucene.codecs.KnnVectorsFormat;
			
 
				+import org.apache.lucene.codecs.PostingsFormat;
			
 
				+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
			
 
				+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
			
 
				+import org.elasticsearch.common.util.BigArrays;
			
 
				+import org.elasticsearch.index.IndexMode;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
			
 
				+import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
			
 
				+import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
			
 
				+import org.elasticsearch.index.mapper.IdFieldMapper;
			
 
				+import org.elasticsearch.index.mapper.Mapper;
			
 
				+import org.elasticsearch.index.mapper.MapperService;
			
 
				+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
			
 
				+
			
 
				+import java.util.Objects;
			
 
				+
			
 
				+/**
			
 
				+ * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
			
 
				+ * vectors.
			
 
				+ */
			
 
				+public class PerFieldFormatSupplier {
			
 
				+
			
 
				+    private final MapperService mapperService;
			
 
				+    private final BigArrays bigArrays;
			
 
				+    private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
			
 
				+    private final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
			
 
				+    private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
			
 
				+    private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
			
 
				+
			
 
				+    private final ES812PostingsFormat es812PostingsFormat;
			
 
				+
			
 
				+    public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays) {
			
 
				+        this.mapperService = mapperService;
			
 
				+        this.bigArrays = Objects.requireNonNull(bigArrays);
			
 
				+        this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
			
 
				+        this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
			
 
				+        this.es812PostingsFormat = new ES812PostingsFormat();
			
 
				+    }
			
 
				+
			
 
				+    public PostingsFormat getPostingsFormatForField(String field) {
			
 
				+        if (useBloomFilter(field)) {
			
 
				+            return bloomFilterPostingsFormat;
			
 
				+        }
			
 
				+        return internalGetPostingsFormatForField(field);
			
 
				+    }
			
 
				+
			
 
				+    private PostingsFormat internalGetPostingsFormatForField(String field) {
			
 
				+        if (mapperService != null) {
			
 
				+            final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
			
 
				+            if (format != null) {
			
 
				+                return format;
			
 
				+            }
			
 
				+        }
			
 
				+        // return our own posting format using PFOR
			
 
				+        return es812PostingsFormat;
			
 
				+    }
			
 
				+
			
 
				+    boolean useBloomFilter(String field) {
			
 
				+        if (mapperService == null) {
			
 
				+            return false;
			
 
				+        }
			
 
				+        IndexSettings indexSettings = mapperService.getIndexSettings();
			
 
				+        if (mapperService.mappingLookup().isDataStreamTimestampFieldEnabled()) {
			
 
				+            // In case for time series indices, the _id isn't randomly generated,
			
 
				+            // but based on dimension fields and timestamp field, so during indexing
			
 
				+            // version/seq_no/term needs to be looked up and having a bloom filter
			
 
				+            // can speed this up significantly.
			
 
				+            return indexSettings.getMode() == IndexMode.TIME_SERIES
			
 
				+                && IdFieldMapper.NAME.equals(field)
			
 
				+                && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
			
 
				+        } else {
			
 
				+            return IdFieldMapper.NAME.equals(field) && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
			
 
				+        if (mapperService != null) {
			
 
				+            Mapper mapper = mapperService.mappingLookup().getMapper(field);
			
 
				+            if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
			
 
				+                return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat);
			
 
				+            }
			
 
				+        }
			
 
				+        return knnVectorsFormat;
			
 
				+    }
			
 
				+
			
 
				+    public DocValuesFormat getDocValuesFormatForField(String field) {
			
 
				+        if (useTSDBDocValuesFormat(field)) {
			
 
				+            return tsdbDocValuesFormat;
			
 
				+        }
			
 
				+        return docValuesFormat;
			
 
				+    }
			
 
				+
			
 
				+    boolean useTSDBDocValuesFormat(final String field) {
			
 
				+        if (excludeFields(field)) {
			
 
				+            return false;
			
 
				+        }
			
 
				+
			
 
				+        return mapperService != null && isTimeSeriesModeIndex() && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
			
 
				+    }
			
 
				+
			
 
				+    private boolean excludeFields(String fieldName) {
			
 
				+        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
			
 
				+        // But _tsid and _ts_routing_hash should always use the tsdb codec.
			
 
				+        return fieldName.startsWith("_") && fieldName.equals("_tsid") == false && fieldName.equals("_ts_routing_hash") == false;
			
 
				+    }
			
 
				+
			
 
				+    private boolean isTimeSeriesModeIndex() {
			
 
				+        return mapperService != null && IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
@@ -12,19 +12,10 @@ import org.apache.lucene.codecs.Codec;
 
				 import org.apache.lucene.codecs.DocValuesFormat;
			
 
				 import org.apache.lucene.codecs.KnnVectorsFormat;
			
 
				 import org.apache.lucene.codecs.PostingsFormat;
			
 
				-import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
			
 
				-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				 import org.elasticsearch.common.lucene.Lucene;
			
 
				 import org.elasticsearch.common.util.BigArrays;
			
 
				-import org.elasticsearch.index.IndexMode;
			
 
				-import org.elasticsearch.index.IndexSettings;
			
 
				-import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
			
 
				-import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
			
 
				-import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
			
 
				-import org.elasticsearch.index.mapper.IdFieldMapper;
			
 
				-import org.elasticsearch.index.mapper.Mapper;
			
 
				+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
			
 
				 import org.elasticsearch.index.mapper.MapperService;
			
 
				-import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
			
 
				 
			
 
				 /**
			
 
				  * {@link PerFieldMapperCodec This Lucene codec} provides the default
			
@@ -34,93 +25,32 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 
				  * per index in real time via the mapping API. If no specific postings format or vector format is
			
 
				  * configured for a specific field the default postings or vector format is used.
			
 
				  */
			
 
				-public final class PerFieldMapperCodec extends Lucene99Codec {
			
 
				+public final class PerFieldMapperCodec extends Elasticsearch814Codec {
			
 
				 
			
 
				-    private final MapperService mapperService;
			
 
				-    private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
			
 
				-    private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
			
 
				-    private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
			
 
				+    private final PerFieldFormatSupplier formatSupplier;
			
 
				 
			
 
				-    private final ES812PostingsFormat es812PostingsFormat;
			
 
				-
			
 
				-    static {
			
 
				-        assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class)
			
 
				-            : "PerFieldMapperCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
			
 
				-    }
			
 
				-
			
 
				-    public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
			
 
				+    public PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
			
 
				         super(compressionMode);
			
 
				-        this.mapperService = mapperService;
			
 
				-        this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
			
 
				-        this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
			
 
				-        this.es812PostingsFormat = new ES812PostingsFormat();
			
 
				+        this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
			
 
				+        // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
			
 
				+        // codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
			
 
				+        assert Codec.forName(Lucene.LATEST_CODEC).getClass() == delegate.getClass()
			
 
				+            : "PerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     public PostingsFormat getPostingsFormatForField(String field) {
			
 
				-        if (useBloomFilter(field)) {
			
 
				-            return bloomFilterPostingsFormat;
			
 
				-        }
			
 
				-        return internalGetPostingsFormatForField(field);
			
 
				-    }
			
 
				-
			
 
				-    private PostingsFormat internalGetPostingsFormatForField(String field) {
			
 
				-        final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field);
			
 
				-        if (format != null) {
			
 
				-            return format;
			
 
				-        }
			
 
				-        // return our own posting format using PFOR
			
 
				-        return es812PostingsFormat;
			
 
				-    }
			
 
				-
			
 
				-    boolean useBloomFilter(String field) {
			
 
				-        IndexSettings indexSettings = mapperService.getIndexSettings();
			
 
				-        if (mapperService.mappingLookup().isDataStreamTimestampFieldEnabled()) {
			
 
				-            // In case for time series indices, they _id isn't randomly generated,
			
 
				-            // but based on dimension fields and timestamp field, so during indexing
			
 
				-            // version/seq_no/term needs to be looked up and having a bloom filter
			
 
				-            // can speed this up significantly.
			
 
				-            return indexSettings.getMode() == IndexMode.TIME_SERIES
			
 
				-                && IdFieldMapper.NAME.equals(field)
			
 
				-                && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
			
 
				-        } else {
			
 
				-            return IdFieldMapper.NAME.equals(field) && IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.get(indexSettings.getSettings());
			
 
				-        }
			
 
				+        return formatSupplier.getPostingsFormatForField(field);
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
			
 
				-        Mapper mapper = mapperService.mappingLookup().getMapper(field);
			
 
				-        if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
			
 
				-            return vectorMapper.getKnnVectorsFormatForField(super.getKnnVectorsFormatForField(field));
			
 
				-        }
			
 
				-        return super.getKnnVectorsFormatForField(field);
			
 
				+        return formatSupplier.getKnnVectorsFormatForField(field);
			
 
				     }
			
 
				 
			
 
				     @Override
			
 
				     public DocValuesFormat getDocValuesFormatForField(String field) {
			
 
				-        if (useTSDBDocValuesFormat(field)) {
			
 
				-            return tsdbDocValuesFormat;
			
 
				-        }
			
 
				-        return docValuesFormat;
			
 
				-    }
			
 
				-
			
 
				-    boolean useTSDBDocValuesFormat(final String field) {
			
 
				-        if (excludeFields(field)) {
			
 
				-            return false;
			
 
				-        }
			
 
				-
			
 
				-        return mapperService != null && isTimeSeriesModeIndex() && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
			
 
				-    }
			
 
				-
			
 
				-    private boolean excludeFields(String fieldName) {
			
 
				-        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
			
 
				-        // But _tsid and _ts_routing_hash should always use the tsdb codec.
			
 
				-        return fieldName.startsWith("_") && fieldName.equals("_tsid") == false && fieldName.equals("_ts_routing_hash") == false;
			
 
				-    }
			
 
				-
			
 
				-    private boolean isTimeSeriesModeIndex() {
			
 
				-        return IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
			
 
				+        return formatSupplier.getDocValuesFormatForField(field);
			
 
				     }
			
 
				 
			
 
				 }
			
--- a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java
@@ -0,0 +1,212 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec.zstd;
			
 
				+
			
 
				+import org.apache.lucene.codecs.StoredFieldsWriter;
			
 
				+import org.apache.lucene.codecs.compressing.CompressionMode;
			
 
				+import org.apache.lucene.codecs.compressing.Compressor;
			
 
				+import org.apache.lucene.codecs.compressing.Decompressor;
			
 
				+import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
			
 
				+import org.apache.lucene.index.CorruptIndexException;
			
 
				+import org.apache.lucene.index.SegmentInfo;
			
 
				+import org.apache.lucene.store.ByteBuffersDataInput;
			
 
				+import org.apache.lucene.store.DataInput;
			
 
				+import org.apache.lucene.store.DataOutput;
			
 
				+import org.apache.lucene.store.Directory;
			
 
				+import org.apache.lucene.store.IOContext;
			
 
				+import org.apache.lucene.util.ArrayUtil;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.elasticsearch.nativeaccess.CloseableByteBuffer;
			
 
				+import org.elasticsearch.nativeaccess.NativeAccess;
			
 
				+import org.elasticsearch.nativeaccess.Zstd;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * {@link org.apache.lucene.codecs.StoredFieldsFormat} that compresses blocks of data using ZStandard.
			
 
				+ *
			
 
				+ * Unlike Lucene's default stored fields format, this format does not make use of dictionaries (even though ZStandard has great support for
			
 
				+ * dictionaries!). This is mostly due to the fact that LZ4/DEFLATE have short sliding windows that they can use to find duplicate strings
			
 
				+ * (64kB and 32kB respectively). In contrast, ZSTD doesn't have such a limitation and can better take advantage of large compression
			
 
				+ * buffers.
			
 
				+ */
			
 
				+public final class Zstd814StoredFieldsFormat extends Lucene90CompressingStoredFieldsFormat {
			
 
				+
			
 
				+    // ZSTD has special optimizations for inputs that are less than 16kB and less than 256kB. So subtract a bit of memory from 16kB and
			
 
				+    // 256kB to make our inputs unlikely to grow beyond 16kB for BEST_SPEED and 256kB for BEST_COMPRESSION.
			
 
				+    private static final int BEST_SPEED_BLOCK_SIZE = (16 - 2) * 1_024;
			
 
				+    private static final int BEST_COMPRESSION_BLOCK_SIZE = (256 - 16) * 1_024;
			
 
				+
			
 
				+    /** Attribute key for compression mode. */
			
 
				+    public static final String MODE_KEY = Zstd814StoredFieldsFormat.class.getSimpleName() + ".mode";
			
 
				+
			
 
				+    public enum Mode {
			
 
				+        BEST_SPEED(0, BEST_SPEED_BLOCK_SIZE, 128),
			
 
				+        BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048);
			
 
				+
			
 
				+        final int level, blockSizeInBytes, blockDocCount;
			
 
				+
			
 
				+        Mode(int level, int blockSizeInBytes, int blockDocCount) {
			
 
				+            this.level = level;
			
 
				+            this.blockSizeInBytes = blockSizeInBytes;
			
 
				+            this.blockDocCount = blockDocCount;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private final Mode mode;
			
 
				+
			
 
				+    public Zstd814StoredFieldsFormat(Mode mode) {
			
 
				+        super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10);
			
 
				+        this.mode = mode;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
			
 
				+        // Both modes are compatible, we only put an attribute for debug purposes.
			
 
				+        String previous = si.putAttribute(MODE_KEY, mode.name());
			
 
				+        if (previous != null && previous.equals(mode.name()) == false) {
			
 
				+            throw new IllegalStateException(
			
 
				+                "found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name()
			
 
				+            );
			
 
				+        }
			
 
				+        return super.fieldsWriter(directory, si, context);
			
 
				+    }
			
 
				+
			
 
				+    private static class ZstdCompressionMode extends CompressionMode {
			
 
				+        private final int level;
			
 
				+
			
 
				+        ZstdCompressionMode(int level) {
			
 
				+            this.level = level;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Compressor newCompressor() {
			
 
				+            return new ZstdCompressor(level);
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Decompressor newDecompressor() {
			
 
				+            return new ZstdDecompressor();
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public String toString() {
			
 
				+            return "ZSTD(level=" + level + ")";
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private static final class ZstdDecompressor extends Decompressor {
			
 
				+
			
 
				+        // Buffer for copying between the DataInput and native memory. No hard science behind this number, it just tries to be high enough
			
 
				+        // to benefit from bulk copying and low enough to keep heap usage under control.
			
 
				+        final byte[] copyBuffer = new byte[4096];
			
 
				+
			
 
				+        ZstdDecompressor() {}
			
 
				+
			
 
				+        @Override
			
 
				+        public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
			
 
				+            if (originalLength == 0) {
			
 
				+                bytes.offset = 0;
			
 
				+                bytes.length = 0;
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+            final NativeAccess nativeAccess = NativeAccess.instance();
			
 
				+            final Zstd zstd = nativeAccess.getZstd();
			
 
				+
			
 
				+            final int compressedLength = in.readVInt();
			
 
				+
			
 
				+            try (
			
 
				+                CloseableByteBuffer src = nativeAccess.newBuffer(compressedLength);
			
 
				+                CloseableByteBuffer dest = nativeAccess.newBuffer(originalLength)
			
 
				+            ) {
			
 
				+
			
 
				+                while (src.buffer().position() < compressedLength) {
			
 
				+                    final int numBytes = Math.min(copyBuffer.length, compressedLength - src.buffer().position());
			
 
				+                    in.readBytes(copyBuffer, 0, numBytes);
			
 
				+                    src.buffer().put(copyBuffer, 0, numBytes);
			
 
				+                }
			
 
				+                src.buffer().flip();
			
 
				+
			
 
				+                final int decompressedLen = zstd.decompress(dest, src);
			
 
				+                if (decompressedLen != originalLength) {
			
 
				+                    throw new CorruptIndexException("Expected " + originalLength + " decompressed bytes, got " + decompressedLen, in);
			
 
				+                }
			
 
				+
			
 
				+                bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, length);
			
 
				+                dest.buffer().get(offset, bytes.bytes, 0, length);
			
 
				+                bytes.offset = 0;
			
 
				+                bytes.length = length;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public Decompressor clone() {
			
 
				+            return new ZstdDecompressor();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private static class ZstdCompressor extends Compressor {
			
 
				+
			
 
				+        final int level;
			
 
				+        // Buffer for copying between the DataInput and native memory. No hard science behind this number, it just tries to be high enough
			
 
				+        // to benefit from bulk copying and low enough to keep heap usage under control.
			
 
				+        final byte[] copyBuffer = new byte[4096];
			
 
				+
			
 
				+        ZstdCompressor(int level) {
			
 
				+            this.level = level;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException {
			
 
				+            final NativeAccess nativeAccess = NativeAccess.instance();
			
 
				+            final Zstd zstd = nativeAccess.getZstd();
			
 
				+
			
 
				+            final int srcLen = Math.toIntExact(buffersInput.length());
			
 
				+            if (srcLen == 0) {
			
 
				+                return;
			
 
				+            }
			
 
				+
			
 
				+            final int compressBound = zstd.compressBound(srcLen);
			
 
				+
			
 
				+            // NOTE: We are allocating/deallocating native buffers on each call. We could save allocations by reusing these buffers, though
			
 
				+            // this would come at the expense of higher permanent memory usage. Benchmarks suggested that there is some performance to save
			
 
				+            // there, but it wouldn't be a game changer either.
			
 
				+            // Also note that calls to #compress implicitly allocate memory under the hood for e.g. hash tables and chain tables that help
			
 
				+            // identify duplicate strings. So if we wanted to avoid allocating memory on every compress call, we should also look into
			
 
				+            // reusing compression contexts, which are not small and would increase permanent memory usage as well.
			
 
				+            try (
			
 
				+                CloseableByteBuffer src = nativeAccess.newBuffer(srcLen);
			
 
				+                CloseableByteBuffer dest = nativeAccess.newBuffer(compressBound)
			
 
				+            ) {
			
 
				+
			
 
				+                while (buffersInput.position() < buffersInput.length()) {
			
 
				+                    final int numBytes = Math.min(copyBuffer.length, (int) (buffersInput.length() - buffersInput.position()));
			
 
				+                    buffersInput.readBytes(copyBuffer, 0, numBytes);
			
 
				+                    src.buffer().put(copyBuffer, 0, numBytes);
			
 
				+                }
			
 
				+                src.buffer().flip();
			
 
				+
			
 
				+                final int compressedLen = zstd.compress(dest, src, level);
			
 
				+                out.writeVInt(compressedLen);
			
 
				+
			
 
				+                for (int written = 0; written < compressedLen;) {
			
 
				+                    final int numBytes = Math.min(copyBuffer.length, compressedLen - written);
			
 
				+                    dest.buffer().get(copyBuffer, 0, numBytes);
			
 
				+                    out.writeBytes(copyBuffer, 0, numBytes);
			
 
				+                    written += numBytes;
			
 
				+                    assert written == dest.buffer().position();
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public void close() throws IOException {}
			
 
				+    }
			
 
				+}
			
--- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -0,0 +1 @@
 
				+org.elasticsearch.index.codec.Elasticsearch814Codec
			
--- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java
@@ -12,10 +12,11 @@ import org.apache.lucene.codecs.Codec;
 
				 import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
			
 
				 import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				 import org.apache.lucene.document.Document;
			
 
				+import org.apache.lucene.document.Field;
			
 
				+import org.apache.lucene.document.IntField;
			
 
				+import org.apache.lucene.document.KeywordField;
			
 
				 import org.apache.lucene.index.DirectoryReader;
			
 
				 import org.apache.lucene.index.IndexWriter;
			
 
				-import org.apache.lucene.index.IndexWriterConfig;
			
 
				-import org.apache.lucene.index.SegmentReader;
			
 
				 import org.apache.lucene.store.Directory;
			
 
				 import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs;
			
 
				 import org.elasticsearch.TransportVersion;
			
@@ -31,6 +32,7 @@ import org.elasticsearch.plugins.MapperPlugin;
 
				 import org.elasticsearch.script.ScriptCompiler;
			
 
				 import org.elasticsearch.test.ESTestCase;
			
 
				 import org.elasticsearch.test.IndexSettingsModule;
			
 
				+import org.hamcrest.Matchers;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				 import java.util.Collections;
			
@@ -43,35 +45,51 @@ public class CodecTests extends ESTestCase {
 
				     public void testResolveDefaultCodecs() throws Exception {
			
 
				         CodecService codecService = createCodecService();
			
 
				         assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class));
			
 
				-        assertThat(codecService.codec("default"), instanceOf(Lucene99Codec.class));
			
 
				+        assertThat(codecService.codec("default"), instanceOf(Elasticsearch814Codec.class));
			
 
				     }
			
 
				 
			
 
				     public void testDefault() throws Exception {
			
 
				         Codec codec = createCodecService().codec("default");
			
 
				-        assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec);
			
 
				+        assertEquals(
			
 
				+            "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=0), chunkSize=14336, maxDocsPerChunk=128, blockShift=10)",
			
 
				+            codec.storedFieldsFormat().toString()
			
 
				+        );
			
 
				     }
			
 
				 
			
 
				     public void testBestCompression() throws Exception {
			
 
				         Codec codec = createCodecService().codec("best_compression");
			
 
				-        assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec);
			
 
				+        assertEquals(
			
 
				+            "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=3), chunkSize=245760, maxDocsPerChunk=2048, blockShift=10)",
			
 
				+            codec.storedFieldsFormat().toString()
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    public void testLegacyDefault() throws Exception {
			
 
				+        Codec codec = createCodecService().codec("legacy_default");
			
 
				+        assertThat(codec, Matchers.instanceOf(Lucene99Codec.class));
			
 
				+        assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class));
			
 
				+        // Make sure the legacy codec is writable
			
 
				+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) {
			
 
				+            Document doc = new Document();
			
 
				+            doc.add(new KeywordField("string_field", "abc", Field.Store.YES));
			
 
				+            doc.add(new IntField("int_field", 42, Field.Store.YES));
			
 
				+            w.addDocument(doc);
			
 
				+            try (DirectoryReader r = DirectoryReader.open(w)) {}
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				-    // write some docs with it, inspect .si to see this was the used compression
			
 
				-    private void assertStoredFieldsCompressionEquals(Lucene99Codec.Mode expected, Codec actual) throws Exception {
			
 
				-        Directory dir = newDirectory();
			
 
				-        IndexWriterConfig iwc = newIndexWriterConfig(null);
			
 
				-        iwc.setCodec(actual);
			
 
				-        IndexWriter iw = new IndexWriter(dir, iwc);
			
 
				-        iw.addDocument(new Document());
			
 
				-        iw.commit();
			
 
				-        iw.close();
			
 
				-        DirectoryReader ir = DirectoryReader.open(dir);
			
 
				-        SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
			
 
				-        String v = sr.getSegmentInfo().info.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY);
			
 
				-        assertNotNull(v);
			
 
				-        assertEquals(expected, Lucene99Codec.Mode.valueOf(v));
			
 
				-        ir.close();
			
 
				-        dir.close();
			
 
				+    public void testLegacyBestCompression() throws Exception {
			
 
				+        Codec codec = createCodecService().codec("legacy_best_compression");
			
 
				+        assertThat(codec, Matchers.instanceOf(Lucene99Codec.class));
			
 
				+        assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class));
			
 
				+        // Make sure the legacy codec is writable
			
 
				+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) {
			
 
				+            Document doc = new Document();
			
 
				+            doc.add(new KeywordField("string_field", "abc", Field.Store.YES));
			
 
				+            doc.add(new IntField("int_field", 42, Field.Store.YES));
			
 
				+            w.addDocument(doc);
			
 
				+            try (DirectoryReader r = DirectoryReader.open(w)) {}
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     private CodecService createCodecService() throws IOException {
			
--- a/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java
@@ -8,7 +8,6 @@
 
				 
			
 
				 package org.elasticsearch.index.codec;
			
 
				 
			
 
				-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				 import org.elasticsearch.cluster.metadata.IndexMetadata;
			
 
				 import org.elasticsearch.common.compress.CompressedXContent;
			
 
				 import org.elasticsearch.common.settings.Settings;
			
@@ -63,7 +62,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				         """;
			
 
				 
			
 
				     public void testUseBloomFilter() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, randomBoolean(), false);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, randomBoolean(), false);
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
			
 
				         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES87BloomFilterPostingsFormat.class));
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
			
@@ -71,7 +70,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				     }
			
 
				 
			
 
				     public void testUseBloomFilterWithTimestampFieldEnabled() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, false);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, false);
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(true));
			
 
				         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES87BloomFilterPostingsFormat.class));
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false));
			
@@ -79,13 +78,13 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				     }
			
 
				 
			
 
				     public void testUseBloomFilterWithTimestampFieldEnabled_noTimeSeriesMode() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, false);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, false);
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
			
 
				         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES812PostingsFormat.class));
			
 
				     }
			
 
				 
			
 
				     public void testUseBloomFilterWithTimestampFieldEnabled_disableBloomFilter() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, true);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, true);
			
 
				         assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false));
			
 
				         assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES812PostingsFormat.class));
			
 
				         assertWarnings(
			
@@ -94,28 +93,29 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				     }
			
 
				 
			
 
				     public void testUseES87TSDBEncodingForTimestampField() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, true);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, true);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
			
 
				     }
			
 
				 
			
 
				     public void testDoNotUseES87TSDBEncodingForTimestampFieldNonTimeSeriesIndex() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, true);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, true);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
			
 
				     }
			
 
				 
			
 
				     public void testEnableES87TSDBCodec() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, MAPPING_1);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, MAPPING_1);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(true));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
			
 
				     }
			
 
				 
			
 
				     public void testDisableES87TSDBCodec() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, true, MAPPING_1);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, true, MAPPING_1);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
			
 
				     }
			
 
				 
			
 
				-    private PerFieldMapperCodec createCodec(boolean timestampField, boolean timeSeries, boolean disableBloomFilter) throws IOException {
			
 
				+    private PerFieldFormatSupplier createFormatSupplier(boolean timestampField, boolean timeSeries, boolean disableBloomFilter)
			
 
				+        throws IOException {
			
 
				         Settings.Builder settings = Settings.builder();
			
 
				         if (timeSeries) {
			
 
				             settings.put(IndexSettings.MODE.getKey(), "time_series");
			
@@ -140,31 +140,32 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				                 """;
			
 
				             mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
			
 
				         }
			
 
				-        return new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE);
			
 
				+        return new PerFieldFormatSupplier(mapperService, BigArrays.NON_RECYCLING_INSTANCE);
			
 
				     }
			
 
				 
			
 
				     public void testUseES87TSDBEncodingSettingDisabled() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(false, true, MAPPING_2);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(false, true, MAPPING_2);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(false));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
			
 
				     }
			
 
				 
			
 
				     public void testUseTimeSeriesModeDisabledCodecDisabled() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, false, MAPPING_2);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, MAPPING_2);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(false));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(false));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(false));
			
 
				     }
			
 
				 
			
 
				     public void testUseTimeSeriesModeAndCodecEnabled() throws IOException {
			
 
				-        PerFieldMapperCodec perFieldMapperCodec = createCodec(true, true, MAPPING_2);
			
 
				+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, true, MAPPING_2);
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("counter")), is(true));
			
 
				         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("gauge")), is(true));
			
 
				     }
			
 
				 
			
 
				-    private PerFieldMapperCodec createCodec(boolean enableES87TSDBCodec, boolean timeSeries, String mapping) throws IOException {
			
 
				+    private PerFieldFormatSupplier createFormatSupplier(boolean enableES87TSDBCodec, boolean timeSeries, String mapping)
			
 
				+        throws IOException {
			
 
				         Settings.Builder settings = Settings.builder();
			
 
				         if (timeSeries) {
			
 
				             settings.put(IndexSettings.MODE.getKey(), "time_series");
			
@@ -173,7 +174,7 @@ public class PerFieldMapperCodecTests extends ESTestCase {
 
				         settings.put(IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING.getKey(), enableES87TSDBCodec);
			
 
				         MapperService mapperService = MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(), settings.build(), "test");
			
 
				         mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE);
			
 
				-        return new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE);
			
 
				+        return new PerFieldFormatSupplier(mapperService, BigArrays.NON_RECYCLING_INSTANCE);
			
 
				     }
			
 
				 
			
 
				 }
			
--- a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java
@@ -0,0 +1,23 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec.zstd;
			
 
				+
			
 
				+import org.apache.lucene.codecs.Codec;
			
 
				+import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
			
 
				+import org.elasticsearch.index.codec.Elasticsearch814Codec;
			
 
				+
			
 
				+public class Zstd814BestCompressionStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {
			
 
				+
			
 
				+    private final Codec codec = new Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION);
			
 
				+
			
 
				+    @Override
			
 
				+    protected Codec getCodec() {
			
 
				+        return codec;
			
 
				+    }
			
 
				+}
			
--- a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java
@@ -0,0 +1,23 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.index.codec.zstd;
			
 
				+
			
 
				+import org.apache.lucene.codecs.Codec;
			
 
				+import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
			
 
				+import org.elasticsearch.index.codec.Elasticsearch814Codec;
			
 
				+
			
 
				+public class Zstd814BestSpeedStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {
			
 
				+
			
 
				+    private final Codec codec = new Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
			
 
				+
			
 
				+    @Override
			
 
				+    protected Codec getCodec() {
			
 
				+        return codec;
			
 
				+    }
			
 
				+}
			
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java
@@ -10,7 +10,6 @@ package org.elasticsearch.index.mapper;
 
				 
			
 
				 import org.apache.lucene.analysis.Analyzer;
			
 
				 import org.apache.lucene.analysis.standard.StandardAnalyzer;
			
 
				-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
			
 
				 import org.apache.lucene.index.DirectoryReader;
			
 
				 import org.apache.lucene.index.IndexReader;
			
 
				 import org.apache.lucene.index.IndexWriterConfig;
			
@@ -43,6 +42,7 @@ import org.elasticsearch.index.analysis.NameOrDefinition;
 
				 import org.elasticsearch.index.analysis.NamedAnalyzer;
			
 
				 import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
			
 
				 import org.elasticsearch.index.codec.PerFieldMapperCodec;
			
 
				+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
			
 
				 import org.elasticsearch.index.fielddata.FieldDataContext;
			
 
				 import org.elasticsearch.index.fielddata.IndexFieldData;
			
 
				 import org.elasticsearch.index.fielddata.IndexFieldDataCache;
			
@@ -243,7 +243,7 @@ public abstract class MapperServiceTestCase extends FieldTypeTestCase {
 
				         CheckedConsumer<DirectoryReader, IOException> test
			
 
				     ) throws IOException {
			
 
				         IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService)).setCodec(
			
 
				-            new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE)
			
 
				+            new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE)
			
 
				         );
			
 
				         try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc)) {
			
 
				             builder.accept(iw);
		`@@ -0,0 +1 @@`
		`+org.elasticsearch.index.codec.Elasticsearch814Codec`