Browse Source

Add ordinal range encode for tsid (#133018)

When a keyword is the primary sort field, we store the starting document 
of each ordinal instead of blocks of ordinals. By default, this is not
enabled if the average number of documents per ordinal is less than 512,
as storing block values may be more efficient and safer. Reading a large
range of documents—a common pattern in ES|QL—can be more efficient with
this approach.
Nhat Nguyen 1 month ago
parent
commit
3d48dd57cc

+ 1 - 1
benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java

@@ -258,7 +258,7 @@ public class TSDBDocValuesMergeBenchmark {
         );
         config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
         config.setMergePolicy(new LogByteSizeMergePolicy());
-        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled);
+        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
         config.setCodec(new Elasticsearch900Lucene101Codec() {
 
             @Override

+ 5 - 0
docs/changelog/133018.yaml

@@ -0,0 +1,5 @@
+pr: 133018
+summary: Add ordinal range encode for tsid
+area: TSDB
+type: enhancement
+issues: []

+ 56 - 12
server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

@@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     final int maxDoc;
     private byte[] termsDictBuffer;
     private final int skipIndexIntervalSize;
+    private final int minDocsPerOrdinalForOrdinalRangeEncoding;
     final boolean enableOptimizedMerge;
+    private final int primarySortFieldNumber;
 
     ES819TSDBDocValuesConsumer(
         SegmentWriteState state,
         int skipIndexIntervalSize,
+        int minDocsPerOrdinalForOrdinalRangeEncoding,
         boolean enableOptimizedMerge,
         String dataCodec,
         String dataExtension,
@@ -75,6 +78,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     ) throws IOException {
         this.termsDictBuffer = new byte[1 << 14];
         this.dir = state.directory;
+        this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
+        this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos);
         this.context = state.context;
         boolean success = false;
         try {
@@ -124,6 +129,12 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
         writeField(field, producer, -1, null);
     }
 
+    private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) {
+        return maxDoc > 1
+            && field.number == primarySortFieldNumber
+            && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding;
+    }
+
     private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator)
         throws IOException {
         int numDocsWithValue = 0;
@@ -149,19 +160,53 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
         try {
             if (numValues > 0) {
                 assert numDocsWithValue > 0;
-                // Special case for maxOrd of 1, signal -1 that no blocks will be written
-                meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1);
                 final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput();
-                final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance(
-                    meta,
-                    new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
-                    1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
-                    ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
-                );
+                DirectMonotonicWriter indexWriter = null;
 
                 final long valuesDataOffset = data.getFilePointer();
-                // Special case for maxOrd of 1, skip writing the blocks
-                if (maxOrd != 1) {
+                if (maxOrd == 1) {
+                    // Special case for maxOrd of 1, signal -1 that no blocks will be written
+                    meta.writeInt(-1);
+                } else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) {
+                    // When a field is sorted, use ordinal range encode for long runs of the same ordinal.
+                    meta.writeInt(-2);
+                    meta.writeVInt(Math.toIntExact(maxOrd));
+                    meta.writeByte((byte) ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT);
+                    values = valuesProducer.getSortedNumeric(field);
+                    if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
+                        disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                    }
+                    DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance(
+                        meta,
+                        data,
+                        maxOrd + 1,
+                        ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT
+                    );
+                    long lastOrd = 0;
+                    startDocs.add(0);
+                    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                        if (disiAccumulator != null) {
+                            disiAccumulator.addDocId(doc);
+                        }
+                        if (offsetsAccumulator != null) {
+                            offsetsAccumulator.addDoc(1);
+                        }
+                        final long nextOrd = values.nextValue();
+                        if (nextOrd != lastOrd) {
+                            lastOrd = nextOrd;
+                            startDocs.add(doc);
+                        }
+                    }
+                    startDocs.add(maxDoc);
+                    startDocs.finish();
+                } else {
+                    indexWriter = DirectMonotonicWriter.getInstance(
+                        meta,
+                        new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
+                        1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
+                        ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
+                    );
+                    meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
                     final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
                     int bufferSize = 0;
                     final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
@@ -204,8 +249,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
                 }
 
                 final long valuesDataLength = data.getFilePointer() - valuesDataOffset;
-                if (maxOrd != 1) {
-                    // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
+                if (indexWriter != null) {
                     indexWriter.finish();
                 }
                 final long indexDataOffset = data.getFilePointer();

+ 17 - 2
server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java

@@ -104,21 +104,35 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
         return Boolean.parseBoolean(System.getProperty(OPTIMIZED_MERGE_ENABLED_NAME, Boolean.TRUE.toString()));
     }
 
+    /**
+     * The default minimum number of documents per ordinal required to use ordinal range encoding.
+     * If the average number of documents per ordinal is below this threshold, it is more efficient to encode doc values in blocks.
+     * A much smaller value may be used in tests to exercise ordinal range encoding more frequently.
+     */
+    public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512;
+
+    /**
+     * The block shift used in DirectMonotonicWriter when encoding the start docs of each ordinal with ordinal range encoding.
+     */
+    public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12;
+
     final int skipIndexIntervalSize;
+    final int minDocsPerOrdinalForRangeEncoding;
     private final boolean enableOptimizedMerge;
 
     /** Default constructor. */
     public ES819TSDBDocValuesFormat() {
-        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT);
+        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT);
     }
 
     /** Doc values fields format with specified skipIndexIntervalSize. */
-    public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, boolean enableOptimizedMerge) {
+    public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) {
         super(CODEC_NAME);
         if (skipIndexIntervalSize < 2) {
             throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
         }
         this.skipIndexIntervalSize = skipIndexIntervalSize;
+        this.minDocsPerOrdinalForRangeEncoding = minDocsPerOrdinalForRangeEncoding;
         this.enableOptimizedMerge = enableOptimizedMerge;
     }
 
@@ -127,6 +141,7 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
         return new ES819TSDBDocValuesConsumer(
             state,
             skipIndexIntervalSize,
+            minDocsPerOrdinalForRangeEncoding,
             enableOptimizedMerge,
             DATA_CODEC,
             DATA_EXTENSION,

+ 188 - 133
server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

@@ -24,6 +24,7 @@ import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedNumericDocValues;
@@ -31,6 +32,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.internal.hppc.IntObjectHashMap;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
@@ -53,7 +55,7 @@ import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.
 
 final class ES819TSDBDocValuesProducer extends DocValuesProducer {
     final IntObjectHashMap<NumericEntry> numerics;
-    private int primarySortFieldNumber = -1;
+    private final int primarySortFieldNumber;
     final IntObjectHashMap<BinaryEntry> binaries;
     final IntObjectHashMap<SortedEntry> sorted;
     final IntObjectHashMap<SortedSetEntry> sortedSets;
@@ -73,11 +75,13 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         this.sortedNumerics = new IntObjectHashMap<>();
         this.skippers = new IntObjectHashMap<>();
         this.maxDoc = state.segmentInfo.maxDoc();
+        this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos);
         this.merging = false;
 
         // read in the entries from the metadata file.
         int version = -1;
         String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+
         try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) {
             Throwable priorE = null;
 
@@ -92,14 +96,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
                 );
 
                 readFields(in, state.fieldInfos);
-                final var indexSort = state.segmentInfo.getIndexSort();
-                if (indexSort != null && indexSort.getSort().length > 0) {
-                    var primarySortField = indexSort.getSort()[0];
-                    var sortField = state.fieldInfos.fieldInfo(primarySortField.getField());
-                    if (sortField != null) {
-                        primarySortFieldNumber = sortField.number;
-                    }
-                }
+
             } catch (Throwable exception) {
                 priorE = exception;
             } finally {
@@ -148,6 +145,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         IndexInput data,
         int maxDoc,
         int version,
+        int primarySortFieldNumber,
         boolean merging
     ) {
         this.numerics = numerics;
@@ -159,6 +157,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         this.data = data.clone();
         this.maxDoc = maxDoc;
         this.version = version;
+        this.primarySortFieldNumber = primarySortFieldNumber;
         this.merging = merging;
     }
 
@@ -174,6 +173,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
             data,
             maxDoc,
             version,
+            primarySortFieldNumber,
             true
         );
     }
@@ -467,6 +467,47 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
     }
 
     abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader {
+        private final int maxDoc;
+        protected int doc = -1;
+
+        BaseDenseNumericValues(int maxDoc) {
+            this.maxDoc = maxDoc;
+        }
+
+        @Override
+        public final int docID() {
+            return doc;
+        }
+
+        @Override
+        public final int nextDoc() throws IOException {
+            return advance(doc + 1);
+        }
+
+        @Override
+        public final int advance(int target) throws IOException {
+            if (target >= maxDoc) {
+                return doc = NO_MORE_DOCS;
+            }
+            return doc = target;
+        }
+
+        @Override
+        public final boolean advanceExact(int target) {
+            doc = target;
+            return true;
+        }
+
+        @Override
+        public final long cost() {
+            return maxDoc;
+        }
+
+        @Override
+        public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
+            return null;
+        }
+
         abstract long lookAheadValueAt(int targetDoc) throws IOException;
 
         BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException {
@@ -474,6 +515,39 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         }
     }
 
+    abstract static class BaseSparseNumericValues extends NumericDocValues {
+        protected final IndexedDISI disi;
+
+        BaseSparseNumericValues(IndexedDISI disi) {
+            this.disi = disi;
+        }
+
+        @Override
+        public final int advance(int target) throws IOException {
+            return disi.advance(target);
+        }
+
+        @Override
+        public final boolean advanceExact(int target) throws IOException {
+            return disi.advanceExact(target);
+        }
+
+        @Override
+        public final int nextDoc() throws IOException {
+            return disi.nextDoc();
+        }
+
+        @Override
+        public final int docID() {
+            return disi.docID();
+        }
+
+        @Override
+        public final long cost() {
+            return disi.cost();
+        }
+    }
+
     abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
 
         final SortedSetEntry entry;
@@ -939,6 +1013,24 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         data.close();
     }
 
+    /**
+     * Returns the field number of the primary sort field for the given segment,
+     * if the field is sorted in ascending order. Returns {@code -1} if not found.
+     */
+    static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos) {
+        final var indexSort = segmentInfo.getIndexSort();
+        if (indexSort != null && indexSort.getSort().length > 0) {
+            SortField sortField = indexSort.getSort()[0];
+            if (sortField.getReverse() == false) {
+                FieldInfo fieldInfo = fieldInfos.fieldInfo(sortField.getField());
+                if (fieldInfo != null) {
+                    return fieldInfo.number;
+                }
+            }
+        }
+        return -1;
+    }
+
     private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
         for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
             FieldInfo info = infos.fieldInfo(fieldNumber);
@@ -988,9 +1080,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         entry.numDocsWithField = meta.readInt();
         if (entry.numValues > 0) {
             final int indexBlockShift = meta.readInt();
-            // Special case, -1 means there are no blocks, so no need to load the metadata for it
-            // -1 is written when there the cardinality of a field is exactly one.
-            if (indexBlockShift != -1) {
+            if (indexBlockShift == -1) {
+                // single ordinal, no block index
+            } else if (indexBlockShift == -2) {
+                // encoded ordinal range, no block index
+                final int numOrds = meta.readVInt();
+                final int blockShift = meta.readByte();
+                entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, blockShift);
+            } else {
                 entry.indexMeta = DirectMonotonicReader.loadMeta(
                     meta,
                     1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
@@ -1103,6 +1200,49 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         abstract long advance(long index) throws IOException;
     }
 
+    static final class SortedOrdinalReader {
+        final long maxOrd;
+        final DirectMonotonicReader startDocs;
+        private long currentIndex = -1;
+        private long rangeEndExclusive = -1;
+
+        SortedOrdinalReader(long maxOrd, DirectMonotonicReader startDocs) {
+            this.maxOrd = maxOrd;
+            this.startDocs = startDocs;
+        }
+
+        long readValueAndAdvance(int doc) {
+            if (doc < rangeEndExclusive) {
+                return currentIndex;
+            }
+            // move to the next range
+            if (doc == rangeEndExclusive) {
+                currentIndex++;
+            } else {
+                currentIndex = searchRange(doc);
+            }
+            rangeEndExclusive = startDocs.get(currentIndex + 1);
+            return currentIndex;
+        }
+
+        private long searchRange(int doc) {
+            long index = startDocs.binarySearch(currentIndex + 1, maxOrd, doc);
+            if (index < 0) {
+                index = -2 - index;
+            }
+            assert index < maxOrd : "invalid range " + index + " for doc " + doc + " in maxOrd " + maxOrd;
+            return index;
+        }
+
+        long lookAheadValue(int targetDoc) {
+            if (targetDoc < rangeEndExclusive) {
+                return currentIndex;
+            } else {
+                return searchRange(targetDoc);
+            }
+        }
+    }
+
     private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOException {
         if (entry.docsWithFieldOffset == -2) {
             // empty
@@ -1113,56 +1253,17 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
             // Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value
             if (entry.docsWithFieldOffset == -1) {
                 // Special case when all docs have a value
-                return new BaseDenseNumericValues() {
-
-                    private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
-                    private int doc = -1;
-
+                return new BaseDenseNumericValues(maxDoc) {
                     @Override
                     public long longValue() {
                         // Only one ordinal!
                         return 0L;
                     }
 
-                    @Override
-                    public int docID() {
-                        return doc;
-                    }
-
-                    @Override
-                    public int nextDoc() throws IOException {
-                        return advance(doc + 1);
-                    }
-
-                    @Override
-                    public int advance(int target) throws IOException {
-                        if (target >= maxDoc) {
-                            return doc = NO_MORE_DOCS;
-                        }
-                        return doc = target;
-                    }
-
-                    @Override
-                    public boolean advanceExact(int target) {
-                        doc = target;
-                        return true;
-                    }
-
-                    @Override
-                    public long cost() {
-                        return maxDoc;
-                    }
-
                     @Override
                     long lookAheadValueAt(int targetDoc) throws IOException {
                         return 0L;  // Only one ordinal!
                     }
-
-                    @Override
-                    public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset)
-                        throws IOException {
-                        return null;
-                    }
                 };
             } else {
                 final IndexedDISI disi = new IndexedDISI(
@@ -1173,36 +1274,47 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
                     entry.denseRankPower,
                     entry.numValues
                 );
-                return new NumericDocValues() {
-
-                    @Override
-                    public int advance(int target) throws IOException {
-                        return disi.advance(target);
-                    }
-
+                return new BaseSparseNumericValues(disi) {
                     @Override
-                    public boolean advanceExact(int target) throws IOException {
-                        return disi.advanceExact(target);
-                    }
-
-                    @Override
-                    public int nextDoc() throws IOException {
-                        return disi.nextDoc();
+                    public long longValue() throws IOException {
+                        return 0L;  // Only one ordinal!
                     }
-
+                };
+            }
+        } else if (entry.sortedOrdinals != null) {
+            final var ordinalsReader = new SortedOrdinalReader(
+                maxOrd,
+                DirectMonotonicReader.getInstance(
+                    entry.sortedOrdinals,
+                    data.randomAccessSlice(entry.valuesOffset, entry.valuesLength),
+                    true
+                )
+            );
+            if (entry.docsWithFieldOffset == -1) {
+                return new BaseDenseNumericValues(maxDoc) {
                     @Override
-                    public int docID() {
-                        return disi.docID();
+                    long lookAheadValueAt(int targetDoc) {
+                        return ordinalsReader.lookAheadValue(targetDoc);
                     }
 
                     @Override
-                    public long cost() {
-                        return disi.cost();
+                    public long longValue() {
+                        return ordinalsReader.readValueAndAdvance(doc);
                     }
-
+                };
+            } else {
+                final var disi = new IndexedDISI(
+                    data,
+                    entry.docsWithFieldOffset,
+                    entry.docsWithFieldLength,
+                    entry.jumpTableEntryCount,
+                    entry.denseRankPower,
+                    entry.numValues
+                );
+                return new BaseSparseNumericValues(disi) {
                     @Override
                     public long longValue() {
-                        return 0L;
+                        return ordinalsReader.readValueAndAdvance(disi.docID());
                     }
                 };
             }
@@ -1218,10 +1330,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
         if (entry.docsWithFieldOffset == -1) {
             // dense
-            return new BaseDenseNumericValues() {
-
-                private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
-                private int doc = -1;
+            return new BaseDenseNumericValues(maxDoc) {
                 private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
                 private long currentBlockIndex = -1;
                 private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
@@ -1230,35 +1339,6 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
                 private long[] lookaheadBlock;
                 private IndexInput lookaheadData = null;
 
-                @Override
-                public int docID() {
-                    return doc;
-                }
-
-                @Override
-                public int nextDoc() throws IOException {
-                    return advance(doc + 1);
-                }
-
-                @Override
-                public int advance(int target) throws IOException {
-                    if (target >= maxDoc) {
-                        return doc = NO_MORE_DOCS;
-                    }
-                    return doc = target;
-                }
-
-                @Override
-                public boolean advanceExact(int target) {
-                    doc = target;
-                    return true;
-                }
-
-                @Override
-                public long cost() {
-                    return maxDoc;
-                }
-
                 @Override
                 public long longValue() throws IOException {
                     final int index = doc;
@@ -1374,37 +1454,11 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
                 entry.denseRankPower,
                 entry.numValues
             );
-            return new NumericDocValues() {
-
+            return new BaseSparseNumericValues(disi) {
                 private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
                 private long currentBlockIndex = -1;
                 private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
 
-                @Override
-                public int advance(int target) throws IOException {
-                    return disi.advance(target);
-                }
-
-                @Override
-                public boolean advanceExact(int target) throws IOException {
-                    return disi.advanceExact(target);
-                }
-
-                @Override
-                public int nextDoc() throws IOException {
-                    return disi.nextDoc();
-                }
-
-                @Override
-                public int docID() {
-                    return disi.docID();
-                }
-
-                @Override
-                public long cost() {
-                    return disi.cost();
-                }
-
                 @Override
                 public long longValue() throws IOException {
                     final int index = disi.index();
@@ -1610,6 +1664,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
         DirectMonotonicReader.Meta indexMeta;
         long valuesOffset;
         long valuesLength;
+        DirectMonotonicReader.Meta sortedOrdinals;
     }
 
     static class BinaryEntry {

+ 5 - 1
server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java

@@ -58,7 +58,11 @@ public class DocValuesCodecDuelTests extends ESTestCase {
             Codec codec = new Elasticsearch900Lucene101Codec() {
 
                 final DocValuesFormat docValuesFormat = randomBoolean()
-                    ? new ES819TSDBDocValuesFormat()
+                    ? new ES819TSDBDocValuesFormat(
+                        ESTestCase.randomIntBetween(1, 4096),
+                        ESTestCase.randomIntBetween(1, 512),
+                        random().nextBoolean()
+                    )
                     : new TestES87TSDBDocValuesFormat();
 
                 @Override

+ 107 - 0
server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java

@@ -13,6 +13,7 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -21,10 +22,14 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
@@ -45,6 +50,9 @@ import java.lang.reflect.Field;
 import java.util.Arrays;
 import java.util.Locale;
 import java.util.Map;
+import java.util.function.IntSupplier;
+
+import static org.hamcrest.Matchers.equalTo;
 
 public class TsdbDocValueBwcTests extends ESTestCase {
 
@@ -260,6 +268,105 @@ public class TsdbDocValueBwcTests extends ESTestCase {
         }
     }
 
+    public void testEncodeOrdinalRange() throws IOException {
+        try (var dir = newDirectory()) {
+            int iters = between(5, 20);
+            for (int iter = 0; iter < iters; iter++) {
+                var config = new IndexWriterConfig();
+                String hostNameField = "host.name";
+                String hostIdField = "host.id";
+                config.setIndexSort(new Sort(new SortField(hostNameField, SortField.Type.STRING, false)));
+                int thresholdRange = random().nextInt(3);
+                IntSupplier nextOrdinalRangeThreshold = () -> {
+                    if (thresholdRange == 0) {
+                        return between(1, 5);
+                    } else if (thresholdRange == 1) {
+                        return between(5, 20);
+                    } else {
+                        return Integer.MAX_VALUE;
+                    }
+                };
+                config.setCodec(
+                    TestUtil.alwaysDocValuesFormat(
+                        new ES819TSDBDocValuesFormat(
+                            random().nextInt(16, 128),
+                            nextOrdinalRangeThreshold.getAsInt(),
+                            random().nextBoolean()
+                        )
+                    )
+                );
+                try (IndexWriter writer = new IndexWriter(dir, config)) {
+                    int numDocs = between(50, 500);
+                    for (int d = 0; d < numDocs; d++) {
+                        Document doc = new Document();
+                        int hostId = random().nextInt(100);
+                        if (random().nextInt(100) <= 10) {
+                            writer.deleteDocuments(LongPoint.newExactQuery(hostIdField, hostId));
+                        } else {
+                            String hostName = String.format(Locale.ROOT, "host-%02d", hostId);
+                            doc.add(new LongPoint("host.id", hostId));
+                            doc.add(new SortedDocValuesField(hostNameField, new BytesRef(hostName)));
+                            doc.add(new NumericDocValuesField(hostIdField, hostId));
+                            writer.addDocument(doc);
+                        }
+
+                        if (random().nextInt(100) <= 5) {
+                            Document dummy = new Document();
+                            dummy.add(new SortedDocValuesField("dummy", new BytesRef("dummy")));
+                            writer.addDocument(dummy);
+                        }
+                        if (random().nextInt(100) <= 10) {
+                            writer.flush();
+                        }
+                        if (random().nextInt(100) <= 5) {
+                            writer.forceMerge(between(1, 10));
+                        }
+                    }
+                }
+                try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                    for (LeafReaderContext leaf : reader.leaves()) {
+                        // sequential
+                        NumericDocValues hostIdDv = leaf.reader().getNumericDocValues(hostIdField);
+                        SortedDocValues hostNameDv = leaf.reader().getSortedDocValues(hostNameField);
+                        if (hostIdDv == null) {
+                            assertNull(hostNameDv);
+                            continue;
+                        }
+                        {
+                            int docId;
+                            while ((docId = hostIdDv.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+                                assertTrue(hostNameDv.advanceExact(docId));
+                                String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString();
+                                String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue());
+                                assertThat(hostName, equalTo(expectedHostName));
+                            }
+                        }
+                        int checkIters = between(1, 20);
+                        int nextDoc = 0;
+                        for (int n = 0; n < checkIters; n++) {
+                            if (nextDoc >= leaf.reader().maxDoc()) {
+                                nextDoc = 0;
+                            }
+                            nextDoc = nextDoc + random().nextInt(leaf.reader().maxDoc() - nextDoc);
+                            if (hostIdDv.docID() == DocIdSetIterator.NO_MORE_DOCS || nextDoc > hostIdDv.docID()) {
+                                hostIdDv = leaf.reader().getNumericDocValues(hostIdField);
+                                hostNameDv = leaf.reader().getSortedDocValues(hostNameField);
+                            }
+                            if (hostIdDv.advanceExact(nextDoc)) {
+                                assertTrue(hostNameDv.advanceExact(nextDoc));
+                                String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString();
+                                String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue());
+                                assertThat(hostName, equalTo(expectedHostName));
+                            } else {
+                                assertFalse(hostNameDv.advanceExact(nextDoc));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField, Codec codec) {
         var config = new IndexWriterConfig();
         config.setIndexSort(

+ 5 - 1
server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java

@@ -65,7 +65,11 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests
 
     private final Codec codec = new Elasticsearch900Lucene101Codec() {
 
-        final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat();
+        final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
+            ESTestCase.randomIntBetween(1, 4096),
+            ESTestCase.randomIntBetween(1, 512),
+            random().nextBoolean()
+        );
 
         @Override
         public DocValuesFormat getDocValuesFormatForField(String field) {

+ 4 - 2
server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java

@@ -18,13 +18,15 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD
     @Override
     protected Codec getCodec() {
         // small interval size to test with many intervals
-        return TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextBoolean()));
+        return TestUtil.alwaysDocValuesFormat(
+            new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean())
+        );
     }
 
     public void testSkipIndexIntervalSize() {
         IllegalArgumentException ex = expectThrows(
             IllegalArgumentException.class,
-            () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextBoolean())
+            () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean())
         );
         assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1"));
     }