浏览代码

Reduce memory usage of match all bitset (#92777)

By default, Elasticsearch uses up to 1 bit per document to store the set
of root documents when the index mapping has nested fields. This PR
introduces a special BitSet using less memory for match_all filters.
This optimization is only triggered when the index mapping has a nested
field, but that field never exists in documents.
Nhat Nguyen 2 年之前
父节点
当前提交
fa58477029

+ 5 - 0
docs/changelog/92777.yaml

@@ -0,0 +1,5 @@
+pr: 92777
+summary: Reduce memory usage of match all bitset
+area: Search
+type: enhancement
+issues: []

+ 2 - 1
server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java

@@ -43,6 +43,7 @@ import org.elasticsearch.index.mapper.NestedLookup;
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.index.shard.ShardUtils;
+import org.elasticsearch.lucene.util.BitSets;
 import org.elasticsearch.threadpool.ThreadPool;
 
 import java.io.Closeable;
@@ -104,7 +105,7 @@ public final class BitsetFilterCache
         if (s == null) {
             return null;
         } else {
-            return BitSet.of(s.iterator(), context.reader().maxDoc());
+            return BitSets.of(s.iterator(), context.reader().maxDoc());
         }
     }
 

+ 31 - 0
server/src/main/java/org/elasticsearch/lucene/util/BitSets.java

@@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.lucene.util;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+
+import java.io.IOException;
+
+public final class BitSets {
+    private BitSets() {}
+
+    /**
+     * Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. If the iterator matches all documents,
+     * then this method will wrap the returned Bitset as {@link MatchAllBitSet} to reduce memory usage.
+     */
+    public static BitSet of(DocIdSetIterator iter, int maxDocs) throws IOException {
+        final BitSet bitSet = BitSet.of(iter, maxDocs);
+        if (bitSet.cardinality() == maxDocs) {
+            return new MatchAllBitSet(maxDocs);
+        } else {
+            return bitSet;
+        }
+    }
+}

+ 90 - 0
server/src/main/java/org/elasticsearch/lucene/util/MatchAllBitSet.java

@@ -0,0 +1,90 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.lucene.util;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+
+/**
+ * An optimized implementation of {@link BitSet} that matches all documents to reduce memory usage.
+ */
+public final class MatchAllBitSet extends BitSet {
+    private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllBitSet.class);
+
+    private final int numBits;
+
+    public MatchAllBitSet(int numBits) {
+        this.numBits = numBits;
+    }
+
+    @Override
+    public void set(int i) {
+
+    }
+
+    @Override
+    public boolean getAndSet(int i) {
+        return true;
+    }
+
+    @Override
+    public void clear(int i) {
+        assert false : "MatchAllBitSet doesn't support clear";
+        throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
+    }
+
+    @Override
+    public void clear(int startIndex, int endIndex) {
+        assert false : "MatchAllBitSet doesn't support clear";
+        throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
+    }
+
+    @Override
+    public int cardinality() {
+        return numBits;
+    }
+
+    @Override
+    public int approximateCardinality() {
+        return numBits;
+    }
+
+    @Override
+    public int prevSetBit(int index) {
+        return index;
+    }
+
+    @Override
+    public int nextSetBit(int index) {
+        return index;
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        return RAM_BYTES_USED;
+    }
+
+    @Override
+    public boolean get(int index) {
+        return true;
+    }
+
+    @Override
+    public int length() {
+        return numBits;
+    }
+
+    @Override
+    public void or(DocIdSetIterator iter) throws IOException {
+
+    }
+}

+ 49 - 0
server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java

@@ -11,6 +11,7 @@ package org.elasticsearch.index.cache.bitset;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -20,6 +21,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.join.BitSetProducer;
 import org.apache.lucene.store.ByteBuffersDirectory;
@@ -31,6 +33,7 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.shard.ShardId;
+import org.elasticsearch.lucene.util.MatchAllBitSet;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 
@@ -39,6 +42,10 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.lessThan;
 
 public class BitSetFilterCacheTests extends ESTestCase {
 
@@ -168,6 +175,48 @@ public class BitSetFilterCacheTests extends ESTestCase {
         assertEquals(0, stats.get());
     }
 
+    public void testStats() throws IOException {
+        Directory directory = newDirectory();
+        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig());
+        int numDocs = randomIntBetween(2000, 5000);
+        for (int i = 0; i < numDocs; i++) {
+            Document d = new Document();
+            d.add(new LongPoint("f", i));
+            writer.addDocument(d);
+        }
+        writer.commit();
+        writer.forceMerge(1);
+        IndexReader reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("test", "_na_", 0));
+        assertThat(reader.leaves(), hasSize(1));
+        assertThat(reader.numDocs(), equalTo(numDocs));
+
+        final AtomicLong stats = new AtomicLong();
+        final BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() {
+            @Override
+            public void onCache(ShardId shardId, Accountable accountable) {
+                stats.addAndGet(accountable.ramBytesUsed());
+            }
+
+            @Override
+            public void onRemoval(ShardId shardId, Accountable accountable) {
+                stats.addAndGet(-accountable.ramBytesUsed());
+            }
+        });
+        // match all
+        Query matchAll = randomBoolean() ? LongPoint.newRangeQuery("f", 0, numDocs + between(0, 1000)) : new MatchAllDocsQuery();
+        BitSetProducer bitSetProducer = cache.getBitSetProducer(matchAll);
+        BitSet bitset = bitSetProducer.getBitSet(reader.leaves().get(0));
+        assertThat(bitset, instanceOf(MatchAllBitSet.class));
+        long usedBytes = stats.get();
+        assertThat(usedBytes, lessThan(32L));
+        // range
+        bitSetProducer = cache.getBitSetProducer(LongPoint.newRangeQuery("f", 0, between(1000, 2000)));
+        bitSetProducer.getBitSet(reader.leaves().get(0));
+        usedBytes = stats.get() - usedBytes;
+        assertThat(usedBytes, greaterThan(256L));
+        IOUtils.close(cache, reader, writer, directory);
+    }
+
     public void testSetNullListener() {
         try {
             new BitsetFilterCache(INDEX_SETTINGS, null);

+ 58 - 0
server/src/test/java/org/elasticsearch/lucene/util/BitSetsTests.java

@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.lucene.util;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class BitSetsTests extends ESTestCase {
+
+    public void testRandomBitset() throws Exception {
+        int maxDocs = randomIntBetween(1, 1024);
+        int numDocs = 0;
+        FixedBitSet matches = new FixedBitSet(maxDocs);
+        for (int i = 0; i < maxDocs; i++) {
+            if (numDocs < maxDocs && randomBoolean()) {
+                numDocs++;
+                matches.set(i);
+            }
+        }
+        DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs));
+        BitSet bitSet = BitSets.of(it, maxDocs);
+        assertThat(bitSet.cardinality(), equalTo(numDocs));
+        assertThat(bitSet.length(), equalTo(maxDocs));
+        for (int i = 0; i < maxDocs; i++) {
+            assertThat(bitSet.get(i), equalTo(matches.get(i)));
+            assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
+            assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
+        }
+    }
+
+    public void testMatchAllBitSet() throws Exception {
+        int maxDocs = randomIntBetween(1, 128);
+        FixedBitSet matches = new FixedBitSet(maxDocs);
+        for (int i = 0; i < maxDocs; i++) {
+            matches.set(i);
+        }
+        DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong());
+        BitSet bitSet = BitSets.of(it, maxDocs);
+        assertThat(bitSet, instanceOf(MatchAllBitSet.class));
+        for (int i = 0; i < maxDocs; i++) {
+            assertTrue(bitSet.get(i));
+            assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
+            assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
+        }
+    }
+}

+ 4 - 12
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java

@@ -14,7 +14,6 @@ import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
@@ -35,6 +34,8 @@ import org.elasticsearch.common.util.concurrent.ReleasableLock;
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.lucene.util.BitSets;
+import org.elasticsearch.lucene.util.MatchAllBitSet;
 import org.elasticsearch.threadpool.ThreadPool;
 
 import java.io.Closeable;
@@ -277,14 +278,14 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
         searcher.setQueryCache(null);
         final Query rewrittenQuery = searcher.rewrite(query);
         if (isEffectiveMatchAllDocsQuery(rewrittenQuery)) {
-            return new MatchAllRoleBitSet(context.reader().maxDoc());
+            return new MatchAllBitSet(context.reader().maxDoc());
         }
         final Weight weight = searcher.createWeight(rewrittenQuery, ScoreMode.COMPLETE_NO_SCORES, 1f);
         final Scorer s = weight.scorer(context);
         if (s == null) {
             return null;
         } else {
-            return bitSetFromDocIterator(s.iterator(), context.reader().maxDoc());
+            return BitSets.of(s.iterator(), context.reader().maxDoc());
         }
     }
 
@@ -380,13 +381,4 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
         });
     }
 
-    static BitSet bitSetFromDocIterator(DocIdSetIterator iter, int maxDoc) throws IOException {
-        final BitSet set = BitSet.of(iter, maxDoc);
-        if (set.cardinality() == maxDoc) {
-            return new MatchAllRoleBitSet(maxDoc);
-        } else {
-            return set;
-        }
-    }
-
 }

+ 3 - 2
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReader.java

@@ -24,6 +24,7 @@ import org.elasticsearch.common.cache.CacheBuilder;
 import org.elasticsearch.common.logging.LoggerMessageFormat;
 import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
 import org.elasticsearch.lucene.util.CombinedBitSet;
+import org.elasticsearch.lucene.util.MatchAllBitSet;
 import org.elasticsearch.transport.Transports;
 
 import java.io.IOException;
@@ -59,7 +60,7 @@ public final class DocumentSubsetReader extends SequentialStoredFieldsLeafReader
         final Bits liveDocs = reader.getLiveDocs();
         if (roleQueryBits == null) {
             return 0;
-        } else if (roleQueryBits instanceof MatchAllRoleBitSet) {
+        } else if (roleQueryBits instanceof MatchAllBitSet) {
             return reader.numDocs();
         } else if (liveDocs == null) {
             // slow
@@ -197,7 +198,7 @@ public final class DocumentSubsetReader extends SequentialStoredFieldsLeafReader
             // If we would return a <code>null</code> liveDocs then that would mean that no docs are marked as deleted,
             // but that isn't the case. No docs match with the role query and therefore all docs are marked as deleted
             return new Bits.MatchNoBits(in.maxDoc());
-        } else if (roleQueryBits instanceof MatchAllRoleBitSet) {
+        } else if (roleQueryBits instanceof MatchAllBitSet) {
             return actualLiveDocs;
         } else if (actualLiveDocs == null) {
             return roleQueryBits;

+ 0 - 76
x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/MatchAllRoleBitSet.java

@@ -1,76 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-package org.elasticsearch.xpack.core.security.authz.accesscontrol;
-
-import org.apache.lucene.util.BitSet;
-import org.apache.lucene.util.RamUsageEstimator;
-
-final class MatchAllRoleBitSet extends BitSet {
-    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllRoleBitSet.class);
-    private final int length;
-
-    MatchAllRoleBitSet(int length) {
-        this.length = length;
-    }
-
-    @Override
-    public void set(int i) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean getAndSet(int i) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void clear(int i) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void clear(int startIndex, int endIndex) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public int cardinality() {
-        return length();
-    }
-
-    @Override
-    public int approximateCardinality() {
-        return length();
-    }
-
-    @Override
-    public int prevSetBit(int index) {
-        return index;
-    }
-
-    @Override
-    public int nextSetBit(int index) {
-        return index;
-    }
-
-    @Override
-    public long ramBytesUsed() {
-        return BASE_RAM_BYTES_USED;
-    }
-
-    @Override
-    public boolean get(int index) {
-        assert 0 <= index && index < this.length();
-        return true;
-    }
-
-    @Override
-    public int length() {
-        return length;
-    }
-}

+ 0 - 41
x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java

@@ -22,15 +22,12 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitSet;
-import org.apache.lucene.util.BitSetIterator;
-import org.apache.lucene.util.FixedBitSet;
 import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.common.CheckedBiConsumer;
 import org.elasticsearch.common.logging.Loggers;
@@ -71,7 +68,6 @@ import java.util.concurrent.atomic.AtomicReference;
 import static java.util.Collections.emptyList;
 import static java.util.Collections.emptyMap;
 import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.notNullValue;
@@ -533,43 +529,6 @@ public class DocumentSubsetBitsetCacheTests extends ESTestCase {
         }
     }
 
-    public void testRoleBitSets() throws Exception {
-        int maxDocs = randomIntBetween(1, 1024);
-        int numDocs = 0;
-        FixedBitSet matches = new FixedBitSet(maxDocs);
-        for (int i = 0; i < maxDocs; i++) {
-            if (numDocs < maxDocs && randomBoolean()) {
-                numDocs++;
-                matches.set(i);
-            }
-        }
-        DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs));
-        BitSet bitSet = DocumentSubsetBitsetCache.bitSetFromDocIterator(it, maxDocs);
-        assertThat(bitSet.cardinality(), equalTo(numDocs));
-        assertThat(bitSet.length(), equalTo(maxDocs));
-        for (int i = 0; i < maxDocs; i++) {
-            assertThat(bitSet.get(i), equalTo(matches.get(i)));
-            assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
-            assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
-        }
-    }
-
-    public void testMatchAllRoleBitSet() throws Exception {
-        int maxDocs = randomIntBetween(1, 128);
-        FixedBitSet matches = new FixedBitSet(maxDocs);
-        for (int i = 0; i < maxDocs; i++) {
-            matches.set(i);
-        }
-        DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong());
-        BitSet bitSet = DocumentSubsetBitsetCache.bitSetFromDocIterator(it, maxDocs);
-        assertThat(bitSet, instanceOf(MatchAllRoleBitSet.class));
-        for (int i = 0; i < maxDocs; i++) {
-            assertTrue(bitSet.get(i));
-            assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
-            assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
-        }
-    }
-
     public void testEquivalentMatchAllDocsQuery() {
         assertTrue(DocumentSubsetBitsetCache.isEffectiveMatchAllDocsQuery(new MatchAllDocsQuery()));
         assertTrue(DocumentSubsetBitsetCache.isEffectiveMatchAllDocsQuery(new ConstantScoreQuery(new MatchAllDocsQuery())));