1
0
Эх сурвалжийг харах

Speed `getIntLE` from `BytesReference` (#90147)

This speeds up `getIntLE` from `BytesReference` which we'll be using in
the upcoming dense representations for aggregations. Here's the
performance:

```
           (type)  Mode  Cnt   Before  Error  After    Error  Units
            array  avgt    7   1.036 ± 0.062   0.261 ± 0.022  ns/op
paged_bytes_array  avgt    7   5.189 ± 0.172   5.317 ± 0.196  ns/op
  composite_256kb  avgt    7  30.792 ± 0.834  11.240 ± 0.387  ns/op
composite_262344b  avgt    7  32.503 ± 1.017  11.155 ± 0.358  ns/op
    composite_1mb  avgt    7  25.189 ± 0.449   8.379 ± 0.193  ns/op
```

The `array` method is how we'll use slices that don't span the edges of
a netty buffer. The `paged_bytes_array` method doesn't really change and
represents the default for internal stuff. I'll bet we could make it
faster too, but I don't know that we use it in the hot path. The
`composite_<size>` method is how we'll be reading large slabs from the
netty byte buffer. We could probably do better if we relied on the sizes
of the buffers being even, but we don't presently do that in the
composite bytes array. The different sizes following `composite` show
that the performance is dominated by the number of slabs in the
composite buffer. `1mb` looks like the largest buffer netty uses.
`256kb` is the smallest. The wild number of bytes intentionally doesn't
line the int up on sensible values. I don't think we'll use sizes like
that but it looks like the performance doesn't make a huge difference.
We're dominated by the buffer choice.
Nik Everett 3 жил өмнө
parent
commit
7ebb09b9f3

+ 11 - 10
benchmarks/README.md

@@ -81,19 +81,20 @@ To get realistic results, you should exercise care when running benchmarks. Here
 
 NOTE: Linux only. Sorry Mac and Windows.
 
-Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`.
-`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked
+Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`.
+`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked
 on 2020-08-01:
 
 ```
-wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz
-tar xf fcml*
-cd fcml*
-./configure
-make
-cd example/hsdis
-make
-sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so
+git clone git@github.com:openjdk/jdk.git
+cd jdk
+git checkout jdk-17-ga
+cd src/utils/hsdis
+# Get a known good binutils
+wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz
+tar xf binutils-2.35.tar.gz
+make BINUTILS=binutils-2.35 ARCH=amd64
+sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/
 ```
 
 If you want to disassemble a single method do something like this:

+ 98 - 0
benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/IntArrayBenchmark.java

@@ -0,0 +1,98 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.benchmark.common.util;
+
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.bytes.CompositeBytesReference;
+import org.elasticsearch.common.bytes.PagedBytesReference;
+import org.elasticsearch.common.bytes.ReleasableBytesReference;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.IntArray;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+@Warmup(iterations = 5)
+@Measurement(iterations = 7)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+@Fork(1)
+@OperationsPerInvocation(2621440)
+public class IntArrayBenchmark {
+    static final int SIZE = IntArrayBenchmark.class.getAnnotation(OperationsPerInvocation.class).value();
+
+    @Param({ "array", "paged_bytes_array", "composite_256kb", "composite_262344b", "composite_1mb" })
+    private String type;
+
+    private IntArray read;
+
+    @Setup
+    public void init() throws IOException {
+        IntArray ints = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(SIZE);
+        for (int i = 0; i < SIZE; i++) {
+            ints.set(i, i);
+        }
+        BytesStreamOutput out = new BytesStreamOutput();
+        ints.writeTo(out);
+        read = IntArray.readFrom(new ReleasableBytesReference(bytesImpl(out.bytes()), () -> {}).streamInput());
+    }
+
+    private BytesReference bytesImpl(BytesReference bytes) {
+        if (type.equals("array")) {
+            return new BytesArray(bytes.toBytesRef());
+        }
+        if (type.equals("paged_bytes_array")) {
+            if (bytes instanceof PagedBytesReference == false) {
+                throw new AssertionError("expected PagedBytesReference but saw [" + bytes.getClass() + "]");
+            }
+            return bytes;
+        }
+        if (type.startsWith("composite_")) {
+            int size = Math.toIntExact(ByteSizeValue.parseBytesSizeValue(type.substring("composite_".length()), "type").getBytes());
+            List<BytesReference> references = new ArrayList<>();
+            for (int from = 0; from < bytes.length(); from += size) {
+                int sliceSize = Math.min(size, bytes.length() - from);
+                references.add(new BytesArray(bytes.slice(from, Math.min(from + size, sliceSize)).toBytesRef()));
+            }
+            BytesReference ref = CompositeBytesReference.of(references.toArray(BytesReference[]::new));
+            if (ref instanceof CompositeBytesReference == false) {
+                throw new AssertionError("expected CompositeBytesReference but saw [" + bytes.getClass() + "]");
+            }
+            return ref;
+        }
+        throw new IllegalArgumentException("unsupported [type] " + type);
+    }
+
+    @Benchmark
+    public long read() {
+        int res = 0;
+        for (int i = 0; i < SIZE; i++) {
+            res = res ^ read.get(i);
+        }
+        return res;
+    }
+}

+ 6 - 0
server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java

@@ -10,6 +10,7 @@ package org.elasticsearch.common.bytes;
 
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.util.ByteUtils;
 
 import java.io.IOException;
 import java.io.OutputStream;
@@ -130,4 +131,9 @@ public final class BytesArray extends AbstractBytesReference {
     public void writeTo(OutputStream os) throws IOException {
         os.write(bytes, offset, length);
     }
+
+    @Override
+    public int getIntLE(int index) {
+        return ByteUtils.readIntLE(bytes, offset + index);
+    }
 }

+ 12 - 0
server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java

@@ -225,4 +225,16 @@ public final class CompositeBytesReference extends AbstractBytesReference {
     public long ramBytesUsed() {
         return ramBytesUsed;
     }
+
+    @Override
+    public int getIntLE(int index) {
+        int i = getOffsetIndex(index);
+        int idx = index - offsets[i];
+        int end = idx + 4;
+        BytesReference wholeIntLivesHere = references[i];
+        if (end <= wholeIntLivesHere.length()) {
+            return wholeIntLivesHere.getIntLE(idx);
+        }
+        return super.getIntLE(index);
+    }
 }

+ 15 - 0
server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java

@@ -12,6 +12,8 @@ import org.hamcrest.Matchers;
 
 import java.io.IOException;
 
+import static org.hamcrest.Matchers.equalTo;
+
 public class BytesArrayTests extends AbstractBytesReferenceTestCase {
 
     @Override
@@ -55,4 +57,17 @@ public class BytesArrayTests extends AbstractBytesReferenceTestCase {
         BytesArray pbr = (BytesArray) newBytesReferenceWithOffsetOfZero(length);
         assertEquals(0, pbr.arrayOffset());
     }
+
+    public void testGetIntLE() {
+        BytesReference ref = new BytesArray(new byte[] { 0x00, 0x12, 0x10, 0x12, 0x00, 0x01 }, 1, 5);
+        assertThat(ref.getIntLE(0), equalTo(0x00121012));
+        assertThat(ref.getIntLE(1), equalTo(0x01001210));
+        Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> ref.getIntLE(2));
+        assertThat(e.getMessage(), equalTo("Index 3 out of bounds for length 3"));
+        /*
+         * Wait. 3!? The array has length 6. Well, the var handle stuff
+         * for arrays just subtracts three - because that's one more than
+         * the number of bytes in an int. Get it? I'm not sure I do either....
+         */
+    }
 }

+ 14 - 0
server/src/test/java/org/elasticsearch/common/bytes/CompositeBytesReferenceTests.java

@@ -149,4 +149,18 @@ public class CompositeBytesReferenceTests extends AbstractBytesReferenceTestCase
             equalTo("CompositeBytesReference cannot hold more than 2GB")
         );
     }
+
+    public void testGetIntLE() {
+        BytesReference[] refs = new BytesReference[] {
+            new BytesArray(new byte[] { 0x12, 0x10, 0x12, 0x00 }),
+            new BytesArray(new byte[] { 0x01, 0x02, 0x03, 0x04 }) };
+        BytesReference comp = CompositeBytesReference.of(refs);
+        assertThat(comp.getIntLE(0), equalTo(0x00121012));
+        assertThat(comp.getIntLE(1), equalTo(0x01001210));
+        assertThat(comp.getIntLE(2), equalTo(0x02010012));
+        assertThat(comp.getIntLE(3), equalTo(0x03020100));
+        assertThat(comp.getIntLE(4), equalTo(0x04030201));
+        Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> comp.getIntLE(5));
+        assertThat(e.getMessage(), equalTo("Index 4 out of bounds for length 4"));
+    }
 }