Browse Source

Capture thread dump on ShardLockObtainFailedException (#93458)

We sometimes see a `ShardLockObtainFailedException` when a shard failed
to shut down as fast as we expected, often because a node left and
rejoined the cluster. Sometimes this is because it was held open by
ongoing scrolls or PITs, but other times it may be because the shutdown
process itself is too slow. With this commit we add the ability to
capture and log a thread dump at the time of the failure to give us more
information about where the shutdown process might be running slowly.

Relates #93226
David Turner 2 years ago
parent
commit
4c68382065

+ 36 - 0
docs/reference/modules/discovery/fault-detection.asciidoc

@@ -373,3 +373,39 @@ checks are `transport_worker` and `cluster_coordination` threads, for which
 there should never be a long wait. There may also be evidence of long waits for
 threads in the {es} logs. Refer to <<modules-network-threading-model>> for more
 information.
+
+===== Diagnosing `ShardLockObtainFailedException` failures
+
+If a node leaves and rejoins the cluster then {es} will usually shut down and
+re-initialize its shards. If the shards do not shut down quickly enough then
+{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`.
+
+To gather more information about the reason for shards shutting down slowly,
+configure the following logger:
+
+[source,yaml]
+----
+logger.org.elasticsearch.env.NodeEnvironment: DEBUG
+----
+
+When this logger is enabled, {es} will attempt to run the
+<<cluster-nodes-hot-threads>> API whenever it encounters a
+`ShardLockObtainFailedException`. The results are compressed, encoded, and
+split into chunks to avoid truncation:
+
+[source,text]
+----
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN...
+[DEBUG][o.e.e.NodeEnvironment    ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines)
+----
+
+To reconstruct the output, base64-decode the data and decompress it using
+`gzip`. For instance, on Unix-like systems:
+
+[source,sh]
+----
+cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress
+----

+ 1 - 0
server/src/main/java/org/elasticsearch/common/ReferenceDocs.java

@@ -33,6 +33,7 @@ public enum ReferenceDocs {
     DISCOVERY_TROUBLESHOOTING,
     UNSTABLE_CLUSTER_TROUBLESHOOTING,
     LAGGING_NODE_TROUBLESHOOTING,
+    SHARD_LOCK_TROUBLESHOOTING,
     CONCURRENT_REPOSITORY_WRITERS,
     ARCHIVE_INDICES,
     // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner

+ 46 - 11
server/src/main/java/org/elasticsearch/env/NodeEnvironment.java

@@ -8,6 +8,7 @@
 
 package org.elasticsearch.env;
 
+import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.util.Strings;
@@ -25,8 +26,10 @@ import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
 import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.ReferenceDocs;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.io.FileSystemUtils;
+import org.elasticsearch.common.logging.ChunkedLoggingStream;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
@@ -49,12 +52,15 @@ import org.elasticsearch.index.shard.ShardPath;
 import org.elasticsearch.index.store.FsDirectoryFactory;
 import org.elasticsearch.monitor.fs.FsInfo;
 import org.elasticsearch.monitor.fs.FsProbe;
+import org.elasticsearch.monitor.jvm.HotThreads;
 import org.elasticsearch.monitor.jvm.JvmInfo;
 import org.elasticsearch.xcontent.NamedXContentRegistry;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.AtomicMoveNotSupportedException;
 import java.nio.file.DirectoryStream;
 import java.nio.file.FileStore;
@@ -926,6 +932,38 @@ public final class NodeEnvironment implements Closeable {
         }
     }
 
+    // throttle the hot-threads calls: no more than one per minute
+    private final Semaphore shardLockHotThreadsPermit = new Semaphore(1);
+    private long nextShardLockHotThreadsNanos = Long.MIN_VALUE;
+
+    private void maybeLogThreadDump(ShardId shardId, String message) {
+        if (logger.isDebugEnabled() == false) {
+            return;
+        }
+
+        final var prefix = format("hot threads while failing to obtain shard lock for %s: %s", shardId, message);
+        if (shardLockHotThreadsPermit.tryAcquire()) {
+            try {
+                final var now = System.nanoTime();
+                if (now <= nextShardLockHotThreadsNanos) {
+                    return;
+                }
+                nextShardLockHotThreadsNanos = now + TimeUnit.SECONDS.toNanos(60);
+                final var hotThreads = new HotThreads().busiestThreads(500).ignoreIdleThreads(false).detect();
+                try (
+                    var stream = ChunkedLoggingStream.create(logger, Level.DEBUG, prefix, ReferenceDocs.SHARD_LOCK_TROUBLESHOOTING);
+                    var writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8)
+                ) {
+                    writer.write(hotThreads);
+                }
+            } catch (Exception e) {
+                logger.error(format("could not obtain %s", prefix), e);
+            } finally {
+                shardLockHotThreadsPermit.release();
+            }
+        }
+    }
+
     private final class InternalShardLock {
         /*
          * This class holds a mutex for exclusive access and timeout / wait semantics
@@ -975,18 +1013,15 @@ public final class NodeEnvironment implements Closeable {
                     setDetails(details);
                 } else {
                     final Tuple<Long, String> lockDetails = this.lockDetails; // single volatile read
-                    throw new ShardLockObtainFailedException(
-                        shardId,
-                        "obtaining shard lock for ["
-                            + details
-                            + "] timed out after ["
-                            + timeoutInMillis
-                            + "ms], lock already held for ["
-                            + lockDetails.v2()
-                            + "] with age ["
-                            + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1())
-                            + "ms]"
+                    final var message = format(
+                        "obtaining shard lock for [%s] timed out after [%dms], lock already held for [%s] with age [%dms]",
+                        details,
+                        timeoutInMillis,
+                        lockDetails.v2(),
+                        TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1())
                     );
+                    maybeLogThreadDump(shardId, message);
+                    throw new ShardLockObtainFailedException(shardId, message);
                 }
             } catch (InterruptedException e) {
                 Thread.currentThread().interrupt();

+ 1 - 0
server/src/main/resources/org/elasticsearch/common/reference-docs-links.json

@@ -3,6 +3,7 @@
   "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html",
   "UNSTABLE_CLUSTER_TROUBLESHOOTING": "cluster-fault-detection.html#cluster-fault-detection-troubleshooting",
   "LAGGING_NODE_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_lagging_nodes",
+  "SHARD_LOCK_TROUBLESHOOTING": "cluster-fault-detection.html#_diagnosing_shardlockobtainfailedexception_failures",
   "CONCURRENT_REPOSITORY_WRITERS": "add-repository.html",
   "ARCHIVE_INDICES": "archive-indices.html"
 }

+ 54 - 32
server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java

@@ -7,6 +7,7 @@
  */
 package org.elasticsearch.env;
 
+import org.apache.logging.log4j.Level;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
@@ -34,7 +35,9 @@ import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.test.MockLogAppender;
 import org.elasticsearch.test.NodeRoles;
+import org.elasticsearch.test.junit.annotations.TestLogging;
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -114,44 +117,63 @@ public class NodeEnvironmentTests extends ESTestCase {
         }
     }
 
+    // using a literal string here because the logger is mentioned in the docs, and therefore must only be changed with care
+    private static final String NODE_ENVIRONMENT_LOGGER_NAME = "org.elasticsearch.env.NodeEnvironment";
+
+    @TestLogging(reason = "test includes assertions about DEBUG logging", value = NODE_ENVIRONMENT_LOGGER_NAME + ":DEBUG")
     public void testShardLock() throws Exception {
-        final NodeEnvironment env = newNodeEnvironment();
+        try (var env = newNodeEnvironment()) {
 
-        Index index = new Index("foo", "fooUUID");
-        ShardLock fooLock = env.shardLock(new ShardId(index, 0), "1");
-        assertEquals(new ShardId(index, 0), fooLock.getShardId());
+            Index index = new Index("foo", "fooUUID");
 
-        try {
-            env.shardLock(new ShardId(index, 0), "2");
-            fail("shard is locked");
-        } catch (ShardLockObtainFailedException ex) {
-            // expected
-        }
-        for (Path path : env.indexPaths(index)) {
-            Files.createDirectories(path.resolve("0"));
-            Files.createDirectories(path.resolve("1"));
-        }
-        try {
-            env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10));
-            fail("shard 0 is locked");
-        } catch (ShardLockObtainFailedException ex) {
-            // expected
-        }
+            var appender = new MockLogAppender();
+            appender.addExpectation(
+                new MockLogAppender.SeenEventExpectation(
+                    "hot threads logging",
+                    NODE_ENVIRONMENT_LOGGER_NAME,
+                    Level.DEBUG,
+                    "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [2] timed out after *"
+                )
+            );
+            appender.addExpectation(
+                new MockLogAppender.UnseenEventExpectation(
+                    "second attempt should be suppressed due to throttling",
+                    NODE_ENVIRONMENT_LOGGER_NAME,
+                    Level.DEBUG,
+                    "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [3] timed out after *"
+                )
+            );
 
-        fooLock.close();
-        // can lock again?
-        env.shardLock(new ShardId(index, 0), "4").close();
+            try (var ignored = appender.capturing(NodeEnvironment.class); var lock = env.shardLock(new ShardId(index, 0), "1")) {
+                assertEquals(new ShardId(index, 0), lock.getShardId());
 
-        List<ShardLock> locks = env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10));
-        try {
-            env.shardLock(new ShardId(index, 0), "6");
-            fail("shard is locked");
-        } catch (ShardLockObtainFailedException ex) {
-            // expected
+                expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "2"));
+
+                for (Path path : env.indexPaths(index)) {
+                    Files.createDirectories(path.resolve("0"));
+                    Files.createDirectories(path.resolve("1"));
+                }
+                expectThrows(
+                    ShardLockObtainFailedException.class,
+                    () -> env.lockAllForIndex(index, idxSettings, "3", randomIntBetween(0, 10))
+                );
+
+                appender.assertAllExpectationsMatched();
+            }
+
+            // can lock again?
+            env.shardLock(new ShardId(index, 0), "4").close();
+
+            List<ShardLock> locks = new ArrayList<>();
+            try {
+                locks.addAll(env.lockAllForIndex(index, idxSettings, "5", randomIntBetween(0, 10)));
+                expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "6"));
+            } finally {
+                IOUtils.close(locks);
+            }
+
+            assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty());
         }
-        IOUtils.close(locks);
-        assertTrue("LockedShards: " + env.lockedShards(), env.lockedShards().isEmpty());
-        env.close();
     }
 
     public void testAvailableIndexFolders() throws Exception {