|
@@ -42,29 +42,51 @@ import java.util.Set;
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
import java.util.concurrent.ExecutionException;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
+import java.util.concurrent.atomic.AtomicLong;
|
|
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
|
|
|
|
/**
|
|
|
* This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
|
|
|
* It is bounded by memory size and access time.
|
|
|
*
|
|
|
+ * DLS uses {@link BitSet} instances to track which documents should be visible to the user ("live") and which should not ("dead").
|
|
|
+ * This means that there is a bit for each document in a Lucene index (ES shard).
|
|
|
+ * Consequently, an index with 10 million document will use more than 1Mb of bitset memory for every unique DLS query, and an index
|
|
|
+ * with 1 billion documents will use more than 100Mb of memory per DLS query.
|
|
|
+ * Because DLS supports templating queries based on user metadata, there may be many distinct queries in use for each index, even if
|
|
|
+ * there is only a single active role.
|
|
|
+ *
|
|
|
+ * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple
|
|
|
+ * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at
|
|
|
+ * least 1 large (100Mb or more ) {@code BitSet} during a user's active session, and ideally should be capable of support multiple
|
|
|
+ * simultaneous users with distinct DLS queries.
|
|
|
+ *
|
|
|
+ * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it
|
|
|
+ * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing
|
|
|
+ * customisation. On a 32Gb heap, a 10% cache would be 3.2Gb which is large enough to store BitSets representing 25 billion docs.
|
|
|
+ *
|
|
|
+ * However, because queries can be templated by user metadata and that metadata can change frequently, it is common for the
|
|
|
+ * effetively lifetime of a single DLS query to be relatively short. We do not want to sacrifice 10% of heap to a cache that is storing
|
|
|
+ * BitSets that are not longer needed, so we set the TTL on this cache to be 2 hours ({@link #CACHE_TTL_SETTING}). This time has been
|
|
|
+ * chosen so that it will retain BitSets that are in active use during a user's session, but not be an ongoing drain on memory.
|
|
|
+ *
|
|
|
* @see org.elasticsearch.index.cache.bitset.BitsetFilterCache
|
|
|
*/
|
|
|
public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListener, Closeable, Accountable {
|
|
|
|
|
|
/**
|
|
|
- * The TTL defaults to 1 week. We depend on the {@code max_bytes} setting to keep the cache to a sensible size, by evicting LRU
|
|
|
- * entries, however there is benefit in reclaiming memory by expiring bitsets that have not be used for some period of time.
|
|
|
- * Because {@link org.elasticsearch.xpack.core.security.authz.permission.IndicesPermission.Group#query} can be templated, it is
|
|
|
- * not uncommon for a query to only be used for a relatively short period of time (e.g. because a user's metadata changed, or because
|
|
|
- * that user is an infrequent user of Elasticsearch). This access time expiry helps free up memory in those circumstances even if the
|
|
|
- * cache is never filled.
|
|
|
+ * The TTL defaults to 2 hours. We default to a large cache size ({@link #CACHE_SIZE_SETTING}), and aggressively
|
|
|
+ * expire unused entries so that the cache does not hold on to memory unnecessarily.
|
|
|
*/
|
|
|
static final Setting<TimeValue> CACHE_TTL_SETTING =
|
|
|
- Setting.timeSetting("xpack.security.dls.bitset.cache.ttl", TimeValue.timeValueHours(24 * 7), Property.NodeScope);
|
|
|
+ Setting.timeSetting("xpack.security.dls.bitset.cache.ttl", TimeValue.timeValueHours(2), Property.NodeScope);
|
|
|
|
|
|
- static final Setting<ByteSizeValue> CACHE_SIZE_SETTING = Setting.byteSizeSetting("xpack.security.dls.bitset.cache.size",
|
|
|
- new ByteSizeValue(50, ByteSizeUnit.MB), Property.NodeScope);
|
|
|
+ /**
|
|
|
+ * The size defaults to 10% of heap so that it automatically scales up with larger node size
|
|
|
+ */
|
|
|
+ static final Setting<ByteSizeValue> CACHE_SIZE_SETTING = Setting.memorySizeSetting("xpack.security.dls.bitset.cache.size",
|
|
|
+ "10%", Property.NodeScope);
|
|
|
|
|
|
private static final BitSet NULL_MARKER = new FixedBitSet(0);
|
|
|
|
|
@@ -82,8 +104,10 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
|
|
|
private final ReleasableLock cacheModificationLock;
|
|
|
private final ExecutorService cleanupExecutor;
|
|
|
|
|
|
+ private final long maxWeightBytes;
|
|
|
private final Cache<BitsetCacheKey, BitSet> bitsetCache;
|
|
|
private final Map<IndexReader.CacheKey, Set<BitsetCacheKey>> keysByIndex;
|
|
|
+ private final AtomicLong cacheFullWarningTime;
|
|
|
|
|
|
public DocumentSubsetBitsetCache(Settings settings, ThreadPool threadPool) {
|
|
|
this(settings, threadPool.executor(ThreadPool.Names.GENERIC));
|
|
@@ -103,15 +127,16 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
|
|
|
this.cleanupExecutor = cleanupExecutor;
|
|
|
|
|
|
final TimeValue ttl = CACHE_TTL_SETTING.get(settings);
|
|
|
- final ByteSizeValue size = CACHE_SIZE_SETTING.get(settings);
|
|
|
+ this.maxWeightBytes = CACHE_SIZE_SETTING.get(settings).getBytes();
|
|
|
this.bitsetCache = CacheBuilder.<BitsetCacheKey, BitSet>builder()
|
|
|
.setExpireAfterAccess(ttl)
|
|
|
- .setMaximumWeight(size.getBytes())
|
|
|
+ .setMaximumWeight(maxWeightBytes)
|
|
|
.weigher((key, bitSet) -> bitSet == NULL_MARKER ? 0 : bitSet.ramBytesUsed())
|
|
|
.removalListener(this::onCacheEviction)
|
|
|
.build();
|
|
|
|
|
|
this.keysByIndex = new ConcurrentHashMap<>();
|
|
|
+ this.cacheFullWarningTime = new AtomicLong(0);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -211,7 +236,17 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
|
|
|
// A cache loader is not allowed to return null, return a marker object instead.
|
|
|
return NULL_MARKER;
|
|
|
} else {
|
|
|
- return BitSet.of(s.iterator(), context.reader().maxDoc());
|
|
|
+ final BitSet bs = BitSet.of(s.iterator(), context.reader().maxDoc());
|
|
|
+ final long bitSetBytes = bs.ramBytesUsed();
|
|
|
+ if (bitSetBytes > this.maxWeightBytes) {
|
|
|
+ logger.warn("built a DLS BitSet that uses [{}] bytes; the DLS BitSet cache has a maximum size of [{}] bytes;" +
|
|
|
+ " this object cannot be cached and will need to be rebuilt for each use;" +
|
|
|
+ " consider increasing the value of [{}]",
|
|
|
+ bitSetBytes, maxWeightBytes, CACHE_SIZE_SETTING.getKey());
|
|
|
+ } else if (bitSetBytes + bitsetCache.weight() > maxWeightBytes) {
|
|
|
+ maybeLogCacheFullWarning();
|
|
|
+ }
|
|
|
+ return bs;
|
|
|
}
|
|
|
});
|
|
|
if (bitSet == NULL_MARKER) {
|
|
@@ -222,6 +257,20 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ private void maybeLogCacheFullWarning() {
|
|
|
+ final long nextLogTime = cacheFullWarningTime.get();
|
|
|
+ final long now = System.currentTimeMillis();
|
|
|
+ if (nextLogTime > now) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ final long nextCheck = now + TimeUnit.MINUTES.toMillis(30);
|
|
|
+ if (cacheFullWarningTime.compareAndSet(nextLogTime, nextCheck)) {
|
|
|
+ logger.info(
|
|
|
+ "the Document Level Security BitSet cache is full which may impact performance; consider increasing the value of [{}]",
|
|
|
+ CACHE_SIZE_SETTING.getKey());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static List<Setting<?>> getSettings() {
|
|
|
return List.of(CACHE_TTL_SETTING, CACHE_SIZE_SETTING);
|
|
|
}
|