Kaynağa Gözat

Make PeerFinder log messages happier (#83222)

Since #73128 a sufficiently old `PeerFinder` will report all exceptions
encountered during discovery to help diagnose cluster formation
problems. We throw exceptions on genuine connection failures, but we
also throw exceptions if the discovered node is the local node or is
master-ineligible because these nodes are no use in discovery. We report
all such exceptions as failures:

    [instance-0000000001]
        address [10.0.0.1:12345], node [null], requesting [false]
        connection failed:
            [instance-0000000002][10.0.0.1:12345]
            non-master-eligible node found

Experience shows that users often have master-ineligible nodes in their
discovery config so will see these messages frequently if the cluster
cannot form, and may interpret the `connection failed` as the source of
the problems even though they're benign.

This commit adjusts the language in these messages to be more balanced,
replacing `connection failed` with `discovery result`, including the
phrase `successfully discovered` in the exception messsage, and giving
advice on how to suppress the message.
David Turner 3 yıl önce
ebeveyn
işleme
f380ada083

+ 5 - 0
docs/changelog/83222.yaml

@@ -0,0 +1,5 @@
+pr: 83222
+summary: Make `PeerFinder` log messages happier
+area: Cluster Coordination
+type: enhancement
+issues: []

+ 28 - 2
server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java

@@ -30,6 +30,8 @@ import org.elasticsearch.transport.ConnectionProfile;
 import org.elasticsearch.transport.TransportRequestOptions.Type;
 import org.elasticsearch.transport.TransportService;
 
+import java.util.Locale;
+
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.emptySet;
 
@@ -110,9 +112,33 @@ public class HandshakingTransportAddressConnector implements TransportAddressCon
                                     IOUtils.closeWhileHandlingException(connection);
 
                                     if (remoteNode.equals(transportService.getLocalNode())) {
-                                        listener.onFailure(new ConnectTransportException(remoteNode, "local node found"));
+                                        listener.onFailure(
+                                            new ConnectTransportException(
+                                                remoteNode,
+                                                String.format(
+                                                    Locale.ROOT,
+                                                    "successfully discovered local node %s at [%s]",
+                                                    remoteNode.descriptionWithoutAttributes(),
+                                                    transportAddress
+                                                )
+                                            )
+                                        );
                                     } else if (remoteNode.isMasterNode() == false) {
-                                        listener.onFailure(new ConnectTransportException(remoteNode, "non-master-eligible node found"));
+                                        listener.onFailure(
+                                            new ConnectTransportException(
+                                                remoteNode,
+                                                String.format(
+                                                    Locale.ROOT,
+                                                    """
+                                                        successfully discovered master-ineligible node %s at [%s]; to suppress this \
+                                                        message, remove address [%s] from your discovery configuration or ensure that \
+                                                        traffic to this address is routed only to master-eligible nodes""",
+                                                    remoteNode.descriptionWithoutAttributes(),
+                                                    transportAddress,
+                                                    transportAddress
+                                                )
+                                            )
+                                        );
                                     } else {
                                         transportService.connectToNode(remoteNode, new ActionListener<>() {
                                             @Override

+ 3 - 3
server/src/main/java/org/elasticsearch/discovery/PeerFinder.java

@@ -413,7 +413,7 @@ public abstract class PeerFinder {
                     if (verboseFailureLogging) {
                         if (logger.isDebugEnabled()) {
                             // log message at level WARN, but since DEBUG logging is enabled we include the full stack trace
-                            logger.warn(new ParameterizedMessage("{} connection failed", Peer.this), e);
+                            logger.warn(new ParameterizedMessage("{} discovery result", Peer.this), e);
                         } else {
                             final StringBuilder messageBuilder = new StringBuilder();
                             Throwable cause = e;
@@ -424,10 +424,10 @@ public abstract class PeerFinder {
                             final String message = messageBuilder.length() < 1024
                                 ? messageBuilder.toString()
                                 : (messageBuilder.substring(0, 1023) + "...");
-                            logger.warn("{} connection failed{}", Peer.this, message);
+                            logger.warn("{} discovery result{}", Peer.this, message);
                         }
                     } else {
-                        logger.debug(new ParameterizedMessage("{} connection failed", Peer.this), e);
+                        logger.debug(new ParameterizedMessage("{} discovery result", Peer.this), e);
                     }
                     synchronized (mutex) {
                         assert probeConnectionResult.get() == null

+ 24 - 7
server/src/test/java/org/elasticsearch/discovery/HandshakingTransportAddressConnectorTests.java

@@ -46,6 +46,8 @@ import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING;
 import static org.elasticsearch.discovery.HandshakingTransportAddressConnector.PROBE_CONNECT_TIMEOUT_SETTING;
 import static org.elasticsearch.discovery.HandshakingTransportAddressConnector.PROBE_HANDSHAKE_TIMEOUT_SETTING;
 import static org.elasticsearch.node.Node.NODE_NAME_SETTING;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.oneOf;
@@ -172,7 +174,7 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         try {
             handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener);
-            failureListener.assertFailure();
+            assertThat(failureListener.getFailureMessage(), containsString("simulated"));
             mockAppender.assertAllExpectationsMatched();
         } finally {
             Loggers.removeAppender(targetLogger, mockAppender);
@@ -187,7 +189,16 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         FailureListener failureListener = new FailureListener();
         handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener);
-        failureListener.assertFailure();
+        assertThat(
+            failureListener.getFailureMessage(),
+            allOf(
+                containsString("successfully discovered master-ineligible node"),
+                containsString(remoteNode.descriptionWithoutAttributes()),
+                containsString("to suppress this message"),
+                containsString("remove address [" + discoveryAddress + "] from your discovery configuration"),
+                containsString("ensure that traffic to this address is routed only to master-eligible nodes")
+            )
+        );
     }
 
     public void testDoesNotConnectToLocalNode() throws Exception {
@@ -197,7 +208,7 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         FailureListener failureListener = new FailureListener();
         handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener);
-        failureListener.assertFailure();
+        assertThat(failureListener.getFailureMessage(), containsString("successfully discovered local node"));
     }
 
     public void testDoesNotConnectToDifferentCluster() throws InterruptedException {
@@ -207,7 +218,10 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         FailureListener failureListener = new FailureListener();
         handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener);
-        failureListener.assertFailure();
+        assertThat(
+            failureListener.getFailureMessage(),
+            containsString("remote cluster name [another-cluster] does not match local cluster name [local-cluster]")
+        );
     }
 
     public void testTimeoutDefaults() {
@@ -223,7 +237,7 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         FailureListener failureListener = new FailureListener();
         handshakingTransportAddressConnector.connectToRemoteMasterNode(discoveryAddress, failureListener);
-        failureListener.assertFailure();
+        assertThat(failureListener.getFailureMessage(), containsString("timed out"));
     }
 
     private TransportAddress getDiscoveryAddress() {
@@ -232,6 +246,7 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
     private static class FailureListener implements ActionListener<ProbeConnectionResult> {
         final CountDownLatch completionLatch = new CountDownLatch(1);
+        String message;
 
         @Override
         public void onResponse(ProbeConnectionResult connectResult) {
@@ -240,11 +255,13 @@ public class HandshakingTransportAddressConnectorTests extends ESTestCase {
 
         @Override
         public void onFailure(Exception e) {
+            message = e.getMessage();
             completionLatch.countDown();
         }
 
-        void assertFailure() throws InterruptedException {
-            assertTrue(completionLatch.await(15, TimeUnit.SECONDS));
+        String getFailureMessage() throws InterruptedException {
+            assertTrue("timed out waiting for listener to complete", completionLatch.await(15, TimeUnit.SECONDS));
+            return message;
         }
     }
 }

+ 7 - 7
server/src/test/java/org/elasticsearch/discovery/PeerFinderTests.java

@@ -792,10 +792,10 @@ public class PeerFinderTests extends ESTestCase {
 
             appender.addExpectation(
                 new MockLogAppender.SeenEventExpectation(
-                    "connection failed",
+                    "discovery result",
                     "org.elasticsearch.discovery.PeerFinder",
                     Level.WARN,
-                    "address [" + otherNode.getAddress() + "]* connection failed: cannot connect to*"
+                    "address [" + otherNode.getAddress() + "]* discovery result: cannot connect to*"
                 ) {
                     @Override
                     public boolean innerMatch(LogEvent event) {
@@ -816,7 +816,7 @@ public class PeerFinderTests extends ESTestCase {
     }
 
     @TestLogging(reason = "testing logging at DEBUG level", value = "org.elasticsearch.discovery:DEBUG")
-    public void testLogsStackTraceInConnectionFailedMessages() throws IllegalAccessException {
+    public void testLogsStackTraceInDiscoveryResultMessages() throws IllegalAccessException {
         final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list");
 
         providedAddresses.add(otherNode.getAddress());
@@ -832,10 +832,10 @@ public class PeerFinderTests extends ESTestCase {
             Loggers.addAppender(LogManager.getLogger("org.elasticsearch.discovery.PeerFinder"), appender);
             appender.addExpectation(
                 new MockLogAppender.SeenEventExpectation(
-                    "connection failed",
+                    "discovery result",
                     "org.elasticsearch.discovery.PeerFinder",
                     Level.DEBUG,
-                    "address [" + otherNode.getAddress() + "]* connection failed*"
+                    "address [" + otherNode.getAddress() + "]* discovery result*"
                 ) {
                     @Override
                     public boolean innerMatch(LogEvent event) {
@@ -850,10 +850,10 @@ public class PeerFinderTests extends ESTestCase {
 
             appender.addExpectation(
                 new MockLogAppender.SeenEventExpectation(
-                    "connection failed",
+                    "discovery result",
                     "org.elasticsearch.discovery.PeerFinder",
                     Level.WARN,
-                    "address [" + otherNode.getAddress() + "]* connection failed*"
+                    "address [" + otherNode.getAddress() + "]* discovery result*"
                 ) {
                     @Override
                     public boolean innerMatch(LogEvent event) {