فهرست منبع

Merge pull request #14780 from jasontedor/variable-length-long

Add variable-length long encoding
Jason Tedor 10 سال پیش
والد
کامیت
d2ffcba890

+ 15 - 0
core/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java

@@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexFormatTooNewException;
 import org.apache.lucene.index.IndexFormatTooOldException;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.elasticsearch.Version;
@@ -234,6 +235,20 @@ public abstract class StreamInput extends InputStream {
         return i | ((b & 0x7FL) << 56);
     }
 
+    public long readZLong() throws IOException {
+        long accumulator = 0L;
+        int i = 0;
+        long currentByte;
+        while (((currentByte = readByte()) & 0x80L) != 0) {
+            accumulator |= (currentByte & 0x7F) << i;
+            i += 7;
+            if (i > 63) {
+                throw new IOException("variable-length stream is too long");
+            }
+        }
+        return BitUtil.zigZagDecode(accumulator | (currentByte << i));
+    }
+
     @Nullable
     public Text readOptionalText() throws IOException {
         int length = readInt();

+ 21 - 3
core/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java

@@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexFormatTooNewException;
 import org.apache.lucene.index.IndexFormatTooOldException;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.ElasticsearchException;
@@ -172,9 +173,9 @@ public abstract class StreamOutput extends OutputStream {
     }
 
     /**
-     * Writes an long in a variable-length format.  Writes between one and nine
-     * bytes.  Smaller values take fewer bytes.  Negative numbers are not
-     * supported.
+     * Writes a non-negative long in a variable-length format.
+     * Writes between one and nine bytes. Smaller values take fewer bytes.
+     * Negative numbers are not supported.
      */
     public void writeVLong(long i) throws IOException {
         assert i >= 0;
@@ -185,6 +186,23 @@ public abstract class StreamOutput extends OutputStream {
         writeByte((byte) i);
     }
 
+    /**
+     * Writes a long in a variable-length format. Writes between one and ten bytes.
+     * Values are remapped by sliding the sign bit into the lsb and then encoded as an unsigned number
+     * e.g., 0 -;&gt; 0, -1 -;&gt; 1, 1 -;&gt; 2, ..., Long.MIN_VALUE -;&gt; -1, Long.MAX_VALUE -;&gt; -2
+     * Numbers with small absolute value will have a small encoding
+     * If the numbers are known to be non-negative, use {@link #writeVLong(long)}
+     */
+    public void writeZLong(long i) throws IOException {
+        // zig-zag encoding cf. https://developers.google.com/protocol-buffers/docs/encoding?hl=en
+        long value = BitUtil.zigZagEncode(i);
+        while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
+            writeByte((byte)((value & 0x7F) | 0x80));
+            value >>>= 7;
+        }
+        writeByte((byte) (value & 0x7F));
+    }
+
     public void writeOptionalString(@Nullable String str) throws IOException {
         if (str == null) {
             writeBoolean(false);

+ 43 - 0
core/src/test/java/org/elasticsearch/common/io/stream/StreamTests.java

@@ -0,0 +1,43 @@
+package org.elasticsearch.common.io.stream;
+
+import org.elasticsearch.common.bytes.ByteBufferBytesReference;
+import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+public class StreamTests extends ESTestCase {
+    public void testRandomVLongSerialization() throws IOException {
+        for (int i = 0; i < 1024; i++) {
+            long write = randomLong();
+            BytesStreamOutput out = new BytesStreamOutput();
+            out.writeZLong(write);
+            long read = out.bytes().streamInput().readZLong();
+            assertEquals(write, read);
+        }
+    }
+
+    public void testSpecificVLongSerialization() throws IOException {
+        List<Tuple<Long, byte[]>> values =
+                Arrays.asList(
+                        new Tuple<>(0L, new byte[]{0}),
+                        new Tuple<>(-1L, new byte[]{1}),
+                        new Tuple<>(1L, new byte[]{2}),
+                        new Tuple<>(-2L, new byte[]{3}),
+                        new Tuple<>(2L, new byte[]{4}),
+                        new Tuple<>(Long.MIN_VALUE, new byte[]{-1, -1, -1, -1, -1, -1, -1, -1, -1, 1}),
+                        new Tuple<>(Long.MAX_VALUE, new byte[]{-2, -1, -1, -1, -1, -1, -1, -1, -1, 1})
+
+                );
+        for (Tuple<Long, byte[]> value : values) {
+            BytesStreamOutput out = new BytesStreamOutput();
+            out.writeZLong(value.v1());
+            assertArrayEquals(Long.toString(value.v1()), value.v2(), out.bytes().toBytes());
+            ByteBufferBytesReference bytes = new ByteBufferBytesReference(ByteBuffer.wrap(value.v2()));
+            assertEquals(Arrays.toString(value.v2()), (long)value.v1(), bytes.streamInput().readZLong());
+        }
+    }
+}