Jelajahi Sumber

[8.19] Bypass MMap arena grouping as this has caused issues with too many regions being mapped (#135012) (#135130)

* Bypass MMap arena grouping as this has caused issues with too many regions being mapped (#135012)

There is a JDK issue where closing sharedArenas from many threads can
significantly harm performance.

This ref-counting of shared arenas was designed as a way to get around
this performance issue. However, we have noticed a significant increase
in leaks and issues with mmap regions since this change.

https://bugs.openjdk.org/browse/JDK-8335480 should have helped the
performance impact of closing shared arenas (though possibly not fully
mitigated it).

I am proposing we turn off the grouping as it appears (at least to me),
not worth it.

I am willing to backdown if we thing other fixes should be done.

I also suggest this gets backported to 9.1, 8.19, and is merged into 9.2

(cherry picked from commit 2672cd0088a3b4f13be84b0c150912de8029f9cb)

* [CI] Auto commit changes from spotless

* fixing compilation

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Benjamin Trent 2 minggu lalu
induk
melakukan
ccd582fe37

+ 5 - 0
distribution/src/config/jvm.options

@@ -62,6 +62,11 @@
 23:-XX:CompileCommand=dontinline,java/lang/invoke/MethodHandle.setAsTypeCache
 23:-XX:CompileCommand=dontinline,java/lang/invoke/MethodHandle.asTypeUncached
 
+# Lucene provides a mechanism for shared mmapped arenas to be referenced between multiple threads
+# this is to get around potential performance issues when closing shared arenas on many threads
+# default to 1 to disable this feature
+-Dorg.apache.lucene.store.MMapDirectory.sharedArenaMaxPermits=1
+
 ## heap dumps
 
 # generate a heap dump when an allocation from the Java heap fails; heap dumps

+ 6 - 0
docs/changelog/135012.yaml

@@ -0,0 +1,6 @@
+pr: 135012
+summary: Bypass MMap arena grouping as this has caused issues with too many regions
+  being mapped
+area: "Engine"
+type: bug
+issues: []

+ 2 - 5
plugins/store-smb/src/main/java/org/elasticsearch/index/store/smb/SmbMmapFsDirectoryFactory.java

@@ -24,12 +24,9 @@ public final class SmbMmapFsDirectoryFactory extends FsDirectoryFactory {
 
     @Override
     protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException {
+        MMapDirectory mMapDirectory = adjustSharedArenaGrouping(new MMapDirectory(location, lockFactory));
         return new SmbDirectoryWrapper(
-            setPreload(
-                new MMapDirectory(location, lockFactory),
-                lockFactory,
-                new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_PRE_LOAD_SETTING))
-            )
+            setPreload(mMapDirectory, lockFactory, new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_PRE_LOAD_SETTING)))
         );
     }
 }

+ 29 - 1
server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

@@ -27,6 +27,8 @@ import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.index.IndexModule;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.shard.ShardPath;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
 import org.elasticsearch.plugins.IndexStorePlugin;
 
 import java.io.IOException;
@@ -35,7 +37,24 @@ import java.nio.file.Path;
 import java.util.HashSet;
 import java.util.Set;
 
+import static org.apache.lucene.store.MMapDirectory.SHARED_ARENA_MAX_PERMITS_SYSPROP;
+
 public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory {
+    private static final Logger Log = LogManager.getLogger(FsDirectoryFactory.class);
+
+    private static final int sharedArenaMaxPermits;
+    static {
+        String prop = System.getProperty(SHARED_ARENA_MAX_PERMITS_SYSPROP);
+        int value = 1;
+        if (prop != null) {
+            try {
+                value = Integer.parseInt(prop); // ensure it's a valid integer
+            } catch (NumberFormatException e) {
+                Log.warn(() -> "unable to parse system property [" + SHARED_ARENA_MAX_PERMITS_SYSPROP + "] with value [" + prop + "]", e);
+            }
+        }
+        sharedArenaMaxPermits = value; // default to 1
+    }
 
     private static final FeatureFlag MADV_RANDOM_FEATURE_FLAG = new FeatureFlag("madv_random");
 
@@ -70,6 +89,7 @@ public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory {
                 // Use Lucene defaults
                 final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory);
                 if (primaryDirectory instanceof MMapDirectory mMapDirectory) {
+                    mMapDirectory = adjustSharedArenaGrouping(mMapDirectory);
                     Directory dir = new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions));
                     if (MADV_RANDOM_FEATURE_FLAG.isEnabled() == false) {
                         dir = disableRandomAdvice(dir);
@@ -79,7 +99,8 @@ public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory {
                     return primaryDirectory;
                 }
             case MMAPFS:
-                Directory dir = setPreload(new MMapDirectory(location, lockFactory), lockFactory, preLoadExtensions);
+                MMapDirectory mMapDirectory = adjustSharedArenaGrouping(new MMapDirectory(location, lockFactory));
+                Directory dir = setPreload(mMapDirectory, lockFactory, preLoadExtensions);
                 if (MADV_RANDOM_FEATURE_FLAG.isEnabled() == false) {
                     dir = disableRandomAdvice(dir);
                 }
@@ -105,6 +126,13 @@ public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory {
         return mMapDirectory;
     }
 
+    public MMapDirectory adjustSharedArenaGrouping(MMapDirectory mMapDirectory) {
+        if (sharedArenaMaxPermits <= 1) {
+            mMapDirectory.setGroupingFunction(MMapDirectory.NO_GROUPING);
+        }
+        return mMapDirectory;
+    }
+
     /**
      * Return a {@link FilterDirectory} around the provided {@link Directory} that forcefully disables {@link IOContext#RANDOM random
      * access}.