Browse Source

Optimize tsdb codec for segment merging (#124392)

By implementing the getMergeInstance() method in ES87TSDBDocValuesProducer
Martijn van Groningen 7 months ago
parent
commit
45f2eb205f

+ 60 - 19
server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java

@@ -51,22 +51,31 @@ import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_IN
 import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
 
 public class ES87TSDBDocValuesProducer extends DocValuesProducer {
-    private final Map<String, NumericEntry> numerics = new HashMap<>();
-    private final Map<String, BinaryEntry> binaries = new HashMap<>();
-    private final Map<String, SortedEntry> sorted = new HashMap<>();
-    private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
-    private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
-    private final Map<String, DocValuesSkipperEntry> skippers = new HashMap<>();
+    private final Map<String, NumericEntry> numerics;
+    private final Map<String, BinaryEntry> binaries;
+    private final Map<String, SortedEntry> sorted;
+    private final Map<String, SortedSetEntry> sortedSets;
+    private final Map<String, SortedNumericEntry> sortedNumerics;
+    private final Map<String, DocValuesSkipperEntry> skippers;
     private final IndexInput data;
     private final int maxDoc;
+    private final int version;
+    private final boolean merging;
 
     ES87TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)
         throws IOException {
-        String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+        this.numerics = new HashMap<>();
+        this.binaries = new HashMap<>();
+        this.sorted = new HashMap<>();
+        this.sortedSets = new HashMap<>();
+        this.sortedNumerics = new HashMap<>();
+        this.skippers = new HashMap<>();
         this.maxDoc = state.segmentInfo.maxDoc();
+        this.merging = false;
 
         // read in the entries from the metadata file.
         int version = -1;
+        String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
         try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) {
             Throwable priorE = null;
 
@@ -112,6 +121,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
             CodecUtil.retrieveChecksum(data);
 
             success = true;
+            this.version = version;
         } finally {
             if (success == false) {
                 IOUtils.closeWhileHandlingException(this.data);
@@ -119,6 +129,35 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
         }
     }
 
+    private ES87TSDBDocValuesProducer(
+        Map<String, NumericEntry> numerics,
+        Map<String, BinaryEntry> binaries,
+        Map<String, SortedEntry> sorted,
+        Map<String, SortedSetEntry> sortedSets,
+        Map<String, SortedNumericEntry> sortedNumerics,
+        Map<String, DocValuesSkipperEntry> skippers,
+        IndexInput data,
+        int maxDoc,
+        int version,
+        boolean merging
+    ) {
+        this.numerics = numerics;
+        this.binaries = binaries;
+        this.sorted = sorted;
+        this.sortedSets = sortedSets;
+        this.sortedNumerics = sortedNumerics;
+        this.skippers = skippers;
+        this.data = data.clone();
+        this.maxDoc = maxDoc;
+        this.version = version;
+        this.merging = merging;
+    }
+
+    @Override
+    public DocValuesProducer getMergeInstance() {
+        return new ES87TSDBDocValuesProducer(numerics, binaries, sorted, sortedSets, sortedNumerics, skippers, data, maxDoc, version, true);
+    }
+
     @Override
     public NumericDocValues getNumeric(FieldInfo field) throws IOException {
         NumericEntry entry = numerics.get(field.name);
@@ -151,7 +190,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
             } else {
                 // variable length
                 final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
-                final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
+                final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
                 return new DenseBinaryDocValues(maxDoc) {
                     final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);
 
@@ -189,7 +228,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
             } else {
                 // variable length
                 final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
-                final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
+                final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
                 return new SparseBinaryDocValues(disi) {
                     final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);
 
@@ -353,7 +392,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
 
         @Override
         public TermsEnum termsEnum() throws IOException {
-            return new TermsDict(entry.termsDictEntry, data);
+            return new TermsDict(entry.termsDictEntry, data, merging);
         }
     }
 
@@ -361,11 +400,13 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
 
         final SortedSetEntry entry;
         final IndexInput data;
+        final boolean merging;
         final TermsEnum termsEnum;
 
-        BaseSortedSetDocValues(SortedSetEntry entry, IndexInput data) throws IOException {
+        BaseSortedSetDocValues(SortedSetEntry entry, IndexInput data, boolean merging) throws IOException {
             this.entry = entry;
             this.data = data;
+            this.merging = merging;
             this.termsEnum = termsEnum();
         }
 
@@ -391,7 +432,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
 
         @Override
         public TermsEnum termsEnum() throws IOException {
-            return new TermsDict(entry.termsDictEntry, data);
+            return new TermsDict(entry.termsDictEntry, data, merging);
         }
     }
 
@@ -412,17 +453,17 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
         long currentCompressedBlockStart = -1;
         long currentCompressedBlockEnd = -1;
 
-        TermsDict(TermsDictEntry entry, IndexInput data) throws IOException {
+        TermsDict(TermsDictEntry entry, IndexInput data, boolean merging) throws IOException {
             this.entry = entry;
             RandomAccessInput addressesSlice = data.randomAccessSlice(entry.termsAddressesOffset, entry.termsAddressesLength);
-            blockAddresses = DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice);
+            blockAddresses = DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice, merging);
             bytes = data.slice("terms", entry.termsDataOffset, entry.termsDataLength);
             blockMask = (1L << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1;
             RandomAccessInput indexAddressesSlice = data.randomAccessSlice(
                 entry.termsIndexAddressesOffset,
                 entry.termsIndexAddressesLength
             );
-            indexAddresses = DirectMonotonicReader.getInstance(entry.termsIndexAddressesMeta, indexAddressesSlice);
+            indexAddresses = DirectMonotonicReader.getInstance(entry.termsIndexAddressesMeta, indexAddressesSlice, merging);
             indexBytes = data.randomAccessSlice(entry.termsIndexOffset, entry.termsIndexLength);
             term = new BytesRef(entry.maxTermLength);
 
@@ -647,7 +688,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
 
         SortedNumericEntry ordsEntry = entry.ordsEntry;
         final SortedNumericDocValues ords = getSortedNumeric(ordsEntry, entry.termsDictEntry.termsDictSize);
-        return new BaseSortedSetDocValues(entry, data) {
+        return new BaseSortedSetDocValues(entry, data, merging) {
 
             int i = 0;
             int count = 0;
@@ -1079,7 +1120,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
         // makes things slower.
 
         final RandomAccessInput indexSlice = data.randomAccessSlice(entry.indexOffset, entry.indexLength);
-        final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice);
+        final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice, merging);
         final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength);
 
         final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
@@ -1210,7 +1251,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
     private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IOException {
         assert entry.numValues > 0;
         final RandomAccessInput indexSlice = data.randomAccessSlice(entry.indexOffset, entry.indexLength);
-        final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice);
+        final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice, merging);
 
         final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength);
         final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
@@ -1247,7 +1288,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer {
         }
 
         final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
-        final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput);
+        final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);
 
         final NumericValues values = getValues(entry, maxOrd);