2 年之前 · 146b605269
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeDecreasingIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeDecreasingIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class DecodeDecreasingIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark decode;
			
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeIncreasingIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeIncreasingIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class DecodeIncreasingIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark decode;
			
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeNonSortedIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/DecodeNonSortedIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class DecodeNonSortedIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark decode;
			
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeDecreasingIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeDecreasingIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class EncodeDecreasingIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark encode;
			
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeIncreasingIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeIncreasingIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class EncodeIncreasingIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark encode;
			
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeNonSortedIntegerBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/EncodeNonSortedIntegerBenchmark.java
@@ -37,7 +37,7 @@ import java.util.concurrent.TimeUnit;
 
				 public class EncodeNonSortedIntegerBenchmark {
			
 
				     private static final int SEED = 17;
			
 
				     private static final int BLOCK_SIZE = 128;
			
 
				-    @Param({ "4", "8", "12", "16", "24", "28", "32", "36", "40", "44", "48", "52", "56", "64" })
			
 
				+    @Param({ "4", "8", "12", "16", "20", "24", "28", "32", "36", "40", "44", "48", "52", "56", "60", "64" })
			
 
				     private int bitsPerValue;
			
 
				 
			
 
				     private final AbstractDocValuesForUtilBenchmark encode;
			
--- a/docs/changelog/93371.yaml
+++ b/docs/changelog/93371.yaml
@@ -0,0 +1,31 @@
 
				+pr: 93371
			
 
				+summary: "Encode using 40, 48 and 56 bits per value"
			
 
				+area: TSDB
			
 
				+type: feature
			
 
				+issues: []
			
 
				+highlight:
			
 
				+  title: "Encode using 40, 48 and 56 bits per value"
			
 
				+  body: |-
			
 
				+    We use the encoding as follows:
			
 
				+    * for values taking [33, 40] bits per value, encode using 40 bits per value
			
 
				+    * for values taking [41, 48] bits per value, encode using 48 bits per value
			
 
				+    * for values taking [49, 56] bits per value, encode using 56 bits per value
			
 
				+
			
 
				+    This is an improvement over the encoding used by ForUtils which does not
			
 
				+    apply any compression for values taking more than 32 bits per value.
			
 
				+
			
 
				+    Note that 40, 48 and 56 bits per value represent exact multiples of
			
 
				+    bytes (5, 6 and 7 bytes per value). As a result, we always write values
			
 
				+    using 3, 2 or 1 byte less than the 8 bytes required for a long value.
			
 
				+
			
 
				+    Looking at the savings in stored bytes, for a block of 128 (long) values we
			
 
				+    would normally store 128 x 8 bytes = 1024 bytes, while now we have the following:
			
 
				+    * 40 bits per value: write 645 bytes instead of 1024, saving 379 bytes (37%)
			
 
				+    * 48 bits per value: write 772 bytes instead of 1024, saving 252 bytes (24%)
			
 
				+    * 56 bits per value: write 897 bytes instead of 1024, saving 127 bytes (12%)
			
 
				+
			
 
				+    We also apply compression to gauge metrics under the assumption that
			
 
				+    compressing values taking more than 32 bits per value works well for
			
 
				+    floating point values, because of the way floating point values are
			
 
				+    represented (IEEE 754 format).
			
 
				+  notable: true
			
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
@@ -123,7 +123,8 @@ public class PerFieldMapperCodec extends Lucene95Codec {
 
				             final MappingLookup mappingLookup = mapperService.mappingLookup();
			
 
				             if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) {
			
 
				                 final MappedFieldType fieldType = mappingLookup.getFieldType(field);
			
 
				-                return TimeSeriesParams.MetricType.COUNTER.equals(fieldType.getMetricType());
			
 
				+                return TimeSeriesParams.MetricType.COUNTER.equals(fieldType.getMetricType())
			
 
				+                    || TimeSeriesParams.MetricType.GAUGE.equals(fieldType.getMetricType());
			
 
				             }
			
 
				         }
			
 
				         return false;
			
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java
@@ -10,12 +10,18 @@ package org.elasticsearch.index.codec.tsdb;
 
				 
			
 
				 import org.apache.lucene.store.DataInput;
			
 
				 import org.apache.lucene.store.DataOutput;
			
 
				+import org.elasticsearch.common.util.ByteUtils;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				 
			
 
				 public class DocValuesForUtil {
			
 
				+    private static final int BITS_IN_FOUR_BYTES = 4 * Byte.SIZE;
			
 
				+    private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
			
 
				+    private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
			
 
				+    private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE;
			
 
				     private final ForUtil forUtil = new ForUtil();
			
 
				     private final int blockSize;
			
 
				+    private final byte[] encoded;
			
 
				 
			
 
				     public DocValuesForUtil() {
			
 
				         this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
			
@@ -23,9 +29,25 @@ public class DocValuesForUtil {
 
				 
			
 
				     private DocValuesForUtil(int blockSize) {
			
 
				         this.blockSize = blockSize;
			
 
				+        this.encoded = new byte[1024];
			
 
				     }
			
 
				 
			
 
				-    public void encode(long[] in, int bitsPerValue, DataOutput out) throws IOException {
			
 
				+    public static int roundBits(int bitsPerValue) {
			
 
				+        if (bitsPerValue > 24 && bitsPerValue <= 32) {
			
 
				+            return BITS_IN_FOUR_BYTES;
			
 
				+        } else if (bitsPerValue > 32 && bitsPerValue <= BITS_IN_FIVE_BYTES) {
			
 
				+            return BITS_IN_FIVE_BYTES;
			
 
				+        } else if (bitsPerValue > BITS_IN_FIVE_BYTES && bitsPerValue <= BITS_IN_SIX_BYTES) {
			
 
				+            return BITS_IN_SIX_BYTES;
			
 
				+        } else if (bitsPerValue > BITS_IN_SIX_BYTES && bitsPerValue <= BITS_IN_SEVEN_BYTES) {
			
 
				+            return BITS_IN_SEVEN_BYTES;
			
 
				+        } else if (bitsPerValue > BITS_IN_SEVEN_BYTES) {
			
 
				+            return Long.BYTES * Byte.SIZE;
			
 
				+        }
			
 
				+        return bitsPerValue;
			
 
				+    }
			
 
				+
			
 
				+    public void encode(long[] in, int bitsPerValue, final DataOutput out) throws IOException {
			
 
				         if (bitsPerValue <= 24) { // these bpvs are handled efficiently by ForUtil
			
 
				             forUtil.encode(in, bitsPerValue, out);
			
 
				         } else if (bitsPerValue <= 32) {
			
@@ -33,24 +55,49 @@ public class DocValuesForUtil {
 
				             for (int i = 0; i < blockSize / 2; ++i) {
			
 
				                 out.writeLong(in[i]);
			
 
				             }
			
 
				+        } else if (bitsPerValue == BITS_IN_FIVE_BYTES || bitsPerValue == BITS_IN_SIX_BYTES || bitsPerValue == BITS_IN_SEVEN_BYTES) {
			
 
				+            encodeFiveSixOrSevenBytesPerValue(in, bitsPerValue, out);
			
 
				         } else {
			
 
				+            assert bitsPerValue > 56 : "bitsPerValue must be greater than 56 but was [" + bitsPerValue + "]";
			
 
				             for (long l : in) {
			
 
				                 out.writeLong(l);
			
 
				             }
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    public void decode(int bitsPerValue, DataInput in, long[] out) throws IOException {
			
 
				+    private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, final DataOutput out) throws IOException {
			
 
				+        int bytesPerValue = bitsPerValue / Byte.SIZE;
			
 
				+        for (int i = 0; i < in.length; ++i) {
			
 
				+            ByteUtils.writeLongLE(in[i], this.encoded, i * bytesPerValue);
			
 
				+        }
			
 
				+        out.writeBytes(this.encoded, bytesPerValue * in.length);
			
 
				+    }
			
 
				+
			
 
				+    public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
			
 
				         if (bitsPerValue <= 24) {
			
 
				             forUtil.decode(bitsPerValue, in, out);
			
 
				         } else if (bitsPerValue <= 32) {
			
 
				             in.readLongs(out, 0, blockSize / 2);
			
 
				             expand32(out);
			
 
				+        } else if (bitsPerValue == BITS_IN_FIVE_BYTES || bitsPerValue == BITS_IN_SIX_BYTES || bitsPerValue == BITS_IN_SEVEN_BYTES) {
			
 
				+            decodeFiveSixOrSevenBytesPerValue(bitsPerValue, in, out);
			
 
				         } else {
			
 
				+            assert bitsPerValue > 56 : "bitsPerValue must be greater than 56 but was [" + bitsPerValue + "]";
			
 
				             in.readLongs(out, 0, blockSize);
			
 
				         }
			
 
				     }
			
 
				 
			
 
				+    private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
			
 
				+        // NOTE: we expect multibyte values to be written "least significant byte" first
			
 
				+        int bytesPerValue = bitsPerValue / Byte.SIZE;
			
 
				+        long mask = (1L << bitsPerValue) - 1;
			
 
				+        byte[] buffer = new byte[bytesPerValue * blockSize + Long.BYTES - bytesPerValue];
			
 
				+        in.readBytes(buffer, 0, bytesPerValue * blockSize);
			
 
				+        for (int i = 0; i < blockSize; ++i) {
			
 
				+            out[i] = ByteUtils.readLongLE(buffer, i * bytesPerValue) & mask;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     private static void collapse32(long[] arr) {
			
 
				         for (int i = 0; i < 64; ++i) {
			
 
				             arr[i] = (arr[i] << 32) | arr[64 + i];
			
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
@@ -125,8 +125,7 @@ public class ES87TSDBDocValuesEncoder {
 
				             or |= l;
			
 
				         }
			
 
				 
			
 
				-        final int bitsPerValue = or == 0 ? 0 : PackedInts.unsignedBitsRequired(or);
			
 
				-
			
 
				+        int bitsPerValue = or == 0 ? 0 : DocValuesForUtil.roundBits(PackedInts.unsignedBitsRequired(or));
			
 
				         out.writeVInt((bitsPerValue << tokenBits) | token);
			
 
				         if (bitsPerValue > 0) {
			
 
				             forUtil.encode(in, bitsPerValue, out);
			
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java
@@ -39,7 +39,7 @@ public class DocValuesForUtilTests extends LuceneTestCase {
 
				 
			
 
				         for (int i = 0; i < iterations; ++i) {
			
 
				             final int bpv = TestUtil.nextInt(random(), 1, 64);
			
 
				-            bpvs[i] = bpv;
			
 
				+            bpvs[i] = DocValuesForUtil.roundBits(bpv);
			
 
				             for (int j = 0; j < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++j) {
			
 
				                 values[i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE + j] = bpv == 64
			
 
				                     ? random().nextLong()
			
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java
@@ -17,6 +17,7 @@ import org.apache.lucene.util.NumericUtils;
 
				 
			
 
				 import java.io.IOException;
			
 
				 import java.util.Arrays;
			
 
				+import java.util.Random;
			
 
				 
			
 
				 public class ES87TSDBDocValuesEncoderTests extends LuceneTestCase {
			
 
				 
			
@@ -135,6 +136,41 @@ public class ES87TSDBDocValuesEncoderTests extends LuceneTestCase {
 
				         doTest(arr, expectedNumBytes);
			
 
				     }
			
 
				 
			
 
				+    public void testFloatingPointValues() throws IOException {
			
 
				+        long[] arr = new long[blockSize];
			
 
				+        // NOTE: these values are crafted in such a way that after applying GCD encoding we get values represented using 36 bits per value.
			
 
				+        for (int i = 0; i < blockSize; ++i) {
			
 
				+            double value = (i % 2 == 1) ? (i * 1956.0) : (i * 356923.5);
			
 
				+            arr[i] = Double.doubleToLongBits(value);
			
 
				+        }
			
 
				+        // NOTE: 36 bits per value strictly required, but we round to 40 bits per value to write exactly 5 bytes per value
			
 
				+        final long expectedNumBytes = 6 // token (2 bytes) + GCD (4 bytes)
			
 
				+            + (blockSize * 40) / Byte.SIZE; // data
			
 
				+        doTest(arr, expectedNumBytes);
			
 
				+    }
			
 
				+
			
 
				+    public void testBitsPerValueFullRange() throws IOException {
			
 
				+        final Random random = new Random(17);
			
 
				+        long[] arr = new long[blockSize];
			
 
				+        long constant = 1;
			
 
				+        for (int bitsPerValue = 0; bitsPerValue <= 64; bitsPerValue++) {
			
 
				+            for (int i = 0; i < blockSize; ++i) {
			
 
				+                if (bitsPerValue == 0) {
			
 
				+                    arr[i] = constant;
			
 
				+                } else {
			
 
				+                    arr[i] = random.nextLong(0, bitsPerValue <= 62 ? 1L << bitsPerValue : Long.MAX_VALUE);
			
 
				+                }
			
 
				+            }
			
 
				+            long actualBitsPerValue = DocValuesForUtil.roundBits(bitsPerValue);
			
 
				+            int actualTokenBytes = bitsPerValue < 16 ? 1 : 2;
			
 
				+            final long expectedNumBytes = bitsPerValue == 0
			
 
				+                ? 2
			
 
				+                : actualTokenBytes // token
			
 
				+                    + (blockSize * actualBitsPerValue) / Byte.SIZE; // data
			
 
				+            doTest(arr, expectedNumBytes);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     private void doTest(long[] arr, long expectedNumBytes) throws IOException {
			
 
				         final long[] expected = arr.clone();
			
 
				         try (Directory dir = newDirectory()) {