Bläddra i källkod

Add a mechanism for zero-IO creation of small blobstore backed `IndexInput` (#106794)

There's loads of scenarios where we create very small slices (as in less
than buffer size) from input that already have these bytes buffered.
(BKDReader#packedIndex for example)
We can save considerable memory as well as potential IO to disk or
worse-yet the blob store by just slicing the buffer if possible.
Outside of the case of slicing and never reading from the slice,
this should always save memory.
Armin Braun 1 år sedan
förälder
incheckning
9426d8bc1d

+ 24 - 0
x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java

@@ -11,6 +11,8 @@ import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.elasticsearch.common.io.stream.ByteBufferStreamInput;
+import org.elasticsearch.common.lucene.store.ByteArrayIndexInput;
+import org.elasticsearch.core.Nullable;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -319,6 +321,28 @@ public abstract class BlobCacheBufferedIndexInput extends IndexInput implements
         }
     }
 
+    /**
+     * Try slicing {@code sliceLength} bytes from the given {@code sliceOffset} from the currently buffered.
+     * If this input's buffer currently contains the sliced range fully, then it is copied to a newly allocated byte array and an array
+     * backed index input is returned. Using this method will never allocate a byte array larger than the buffer size and will result in
+     * a potentially  more memory efficient {@link IndexInput} than slicing to a new {@link BlobCacheBufferedIndexInput} and will prevent
+     * any further reads from input that is wrapped by this instance.
+     *
+     * @param name slice name
+     * @param sliceOffset slice offset
+     * @param sliceLength slice length
+     * @return a byte array backed index input if slicing directly from the buffer worked or {@code null} otherwise
+     */
+    @Nullable
+    protected final IndexInput trySliceBuffer(String name, long sliceOffset, long sliceLength) {
+        if (ByteRange.of(bufferStart, bufferStart + buffer.limit()).contains(sliceOffset, sliceOffset + sliceLength)) {
+            final byte[] bytes = new byte[(int) sliceLength];
+            buffer.get(Math.toIntExact(sliceOffset - bufferStart), bytes, 0, bytes.length);
+            return new ByteArrayIndexInput(name, bytes);
+        }
+        return null;
+    }
+
     /**
      * Expert: implements seek. Sets current position in this file, where the next {@link
      * #readInternal(ByteBuffer)} will occur.

+ 4 - 0
x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/DirectBlobContainerIndexInput.java

@@ -300,6 +300,10 @@ public final class DirectBlobContainerIndexInput extends BlobCacheBufferedIndexI
     @Override
     public IndexInput slice(String sliceName, long offset, long length) throws IOException {
         BlobCacheUtils.ensureSlice(sliceName, offset, length, this);
+        var bufferSlice = trySliceBuffer(sliceName, offset, length);
+        if (bufferSlice != null) {
+            return bufferSlice;
+        }
         final DirectBlobContainerIndexInput slice = new DirectBlobContainerIndexInput(
             sliceName,
             blobContainer,

+ 4 - 0
x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/MetadataCachingIndexInput.java

@@ -674,6 +674,10 @@ public abstract class MetadataCachingIndexInput extends BlobCacheBufferedIndexIn
 
     @Override
     public IndexInput slice(String sliceName, long sliceOffset, long sliceLength) {
+        var bufferSlice = trySliceBuffer(sliceName, sliceOffset, sliceLength);
+        if (bufferSlice != null) {
+            return bufferSlice;
+        }
         BlobCacheUtils.ensureSlice(sliceName, sliceOffset, sliceLength, this);
 
         // Are we creating a slice from a CFS file?