Browse Source

Avoid eagerly loading StoredFieldsReader in fetch phase (#83693)

Every time we create a hit document, we create a new SourceLookup and call
setSegmentAndDocument. This in turn creates a new StoredFieldsReader, which is
pretty expensive. In scenarios where you are retrieving a lot of hits, this can
add significant overhead. Prior to version 7.11, we did not create a new
SourceLookup per hit, so this is a performance regression.

This PR updates setSegmentAndDocument to avoid eagerly creating a
new StoredFieldsReader (through StoredFieldsReader#getMergeInstance).
Julie Tibshirani 3 years ago
parent
commit
4e28da43b9

+ 6 - 0
docs/changelog/83693.yaml

@@ -0,0 +1,6 @@
+pr: 83693
+summary: Avoid eagerly loading `StoredFieldsReader` in fetch phase
+area: Search
+type: bug
+issues:
+ - 82777

+ 29 - 0
libs/core/src/main/java/org/elasticsearch/core/MemoizedSupplier.java

@@ -0,0 +1,29 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.core;
+
+import java.util.function.Supplier;
+
+public class MemoizedSupplier<T> implements Supplier<T> {
+    private Supplier<T> supplier;
+    private T value;
+
+    public MemoizedSupplier(Supplier<T> supplier) {
+        this.supplier = supplier;
+    }
+
+    @Override
+    public T get() {
+        if (supplier != null) {
+            value = supplier.get();
+            supplier = null;
+        }
+        return value;
+    }
+}

+ 15 - 8
server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java

@@ -7,6 +7,7 @@
  */
 package org.elasticsearch.search.lookup;
 
+import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.elasticsearch.ElasticsearchParseException;
@@ -15,6 +16,7 @@ import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.core.MemoizedSupplier;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
@@ -26,13 +28,14 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Supplier;
 
 import static java.util.Collections.emptyMap;
 
 public class SourceLookup implements Map<String, Object> {
 
     private LeafReader reader;
-    CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;
+    private CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;
 
     private int docId = -1;
 
@@ -104,19 +107,23 @@ public class SourceLookup implements Map<String, Object> {
     }
 
     public void setSegmentAndDocument(LeafReaderContext context, int docId) {
+        // if we are called with the same document, don't invalidate source
         if (this.reader == context.reader() && this.docId == docId) {
-            // if we are called with the same document, don't invalidate source
             return;
         }
+
+        // only reset reader and fieldReader when reader changes
         if (this.reader != context.reader()) {
             this.reader = context.reader();
-            // only reset reader and fieldReader when reader changes
+
+            // All the docs to fetch are adjacent but Lucene stored fields are optimized
+            // for random access and don't optimize for sequential access - except for merging.
+            // So we do a little hack here and pretend we're going to do merges in order to
+            // get better sequential access.
             if (context.reader()instanceof SequentialStoredFieldsLeafReader lf) {
-                // All the docs to fetch are adjacent but Lucene stored fields are optimized
-                // for random access and don't optimize for sequential access - except for merging.
-                // So we do a little hack here and pretend we're going to do merges in order to
-                // get better sequential access.
-                fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
+                // Avoid eagerly loading the stored fields reader, since this can be expensive
+                Supplier<StoredFieldsReader> supplier = new MemoizedSupplier<>(lf::getSequentialStoredFieldsReader);
+                fieldReader = (d, v) -> supplier.get().visitDocument(d, v);
             } else {
                 fieldReader = context.reader()::document;
             }

+ 78 - 0
server/src/test/java/org/elasticsearch/search/lookup/SourceLookupTests.java

@@ -0,0 +1,78 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.search.lookup;
+
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xcontent.XContentFactory;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class SourceLookupTests extends ESTestCase {
+
+    public void testSetSegmentAndDocument() throws IOException {
+        try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
+            Document doc = new Document();
+            doc.add(new StringField("field", "value", Field.Store.YES));
+            iw.addDocument(doc);
+
+            try (IndexReader reader = iw.getReader()) {
+                LeafReaderContext readerContext = reader.leaves().get(0);
+
+                SourceLookup sourceLookup = new SourceLookup();
+                sourceLookup.setSegmentAndDocument(readerContext, 42);
+                sourceLookup.setSource(
+                    BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("field", "value").endObject())
+                );
+                assertNotNull(sourceLookup.internalSourceRef());
+
+                // Source should be preserved if we pass in the same reader and document
+                sourceLookup.setSegmentAndDocument(readerContext, 42);
+                assertNotNull(sourceLookup.internalSourceRef());
+
+                // Check that the stored fields reader is not loaded eagerly
+                LeafReader throwingReader = new SequentialStoredFieldsLeafReader(readerContext.reader()) {
+                    @Override
+                    protected StoredFieldsReader doGetSequentialStoredFieldsReader(StoredFieldsReader reader) {
+                        throw new UnsupportedOperationException("attempted to load stored fields reader");
+                    }
+
+                    @Override
+                    public CacheHelper getReaderCacheHelper() {
+                        return in.getReaderCacheHelper();
+                    }
+
+                    @Override
+                    public CacheHelper getCoreCacheHelper() {
+                        return in.getCoreCacheHelper();
+                    }
+                };
+
+                sourceLookup.setSegmentAndDocument(throwingReader.getContext(), 0);
+                ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, sourceLookup::source);
+                assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
+                assertThat(e.getCause().getMessage(), containsString("attempted to load stored fields reader"));
+            }
+        }
+    }
+}