Browse Source

better logs around peer connecting / disconnecting

Alex Cheema 9 months ago
parent
commit
8cb678e795
3 changed files with 27 additions and 4 deletions
  1. 1 1
      .circleci/config.yml
  2. 5 1
      exo/networking/udp_discovery.py
  3. 21 2
      exo/orchestration/standard_node.py

+ 1 - 1
.circleci/config.yml

@@ -144,7 +144,7 @@ jobs:
             PID2=$!
             sleep 10
             kill $PID1 $PID2
-            if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
+            if grep -q "Successfully connected peers: \['node2@.*:.*'\]" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Successfully connected peers: \['node1@.*:.*'\]" output2.log && ! grep -q "Failed to connect peers:" output2.log; then
               echo "Test passed: Both instances discovered each other"
               exit 0
             else

+ 5 - 1
exo/networking/udp_discovery.py

@@ -87,7 +87,11 @@ class UDPDiscovery(Discovery):
         print(traceback.format_exc())
       finally:
         if transport:
-          transport.close()
+          try:
+            transport.close()
+          except:
+            if DEBUG_DISCOVERY >= 2: print(f"Error closing transport: {e}")
+            if DEBUG_DISCOVERY >= 2: traceback.print_exc()
         await asyncio.sleep(self.broadcast_interval)
 
   async def on_listen_message(self, data, addr):

+ 21 - 2
exo/orchestration/standard_node.py

@@ -294,28 +294,47 @@ class StandardNode(Node):
     peers_to_disconnect = [peer for peer in peers_removed if await peer.is_connected()]
     peers_to_connect = [peer for peer in peers_added + peers_updated + peers_unchanged if not await peer.is_connected()]
 
-    print(f"{peers_added=} {peers_removed=} {peers_updated=} {peers_unchanged=} {peers_to_disconnect=} {peers_to_connect=}")
+    def _pretty(peers: List[PeerHandle]) -> List[str]:
+      return [f"{peer.id()}@{peer.addr()}" for peer in peers]
+    if DEBUG >= 2: print(f"update_peers: added={peers_added} removed={peers_removed} updated={peers_updated} unchanged={peers_unchanged} to_disconnect={peers_to_disconnect} to_connect={peers_to_connect}")
 
     async def disconnect_with_timeout(peer, timeout=5):
       try:
         await asyncio.wait_for(peer.disconnect(), timeout)
+        return True
       except Exception as e:
         print(f"Error disconnecting peer {peer.id()}@{peer.addr()}: {e}")
         traceback.print_exc()
+        return False
 
     async def connect_with_timeout(peer, timeout=5):
       try:
         await asyncio.wait_for(peer.connect(), timeout)
+        return True
       except Exception as e:
         print(f"Error connecting peer {peer.id()}@{peer.addr()}: {e}")
         traceback.print_exc()
+        return False
 
-    await asyncio.gather(
+    disconnect_results = await asyncio.gather(
       *(disconnect_with_timeout(peer) for peer in peers_to_disconnect),
+      return_exceptions=True
+    )
+    connect_results = await asyncio.gather(
       *(connect_with_timeout(peer) for peer in peers_to_connect),
       return_exceptions=True
     )
 
+    successful_disconnects = [peer for peer, result in zip(peers_to_disconnect, disconnect_results) if result is True]
+    failed_disconnects = [peer for peer, result in zip(peers_to_disconnect, disconnect_results) if result is False]
+    successful_connects = [peer for peer, result in zip(peers_to_connect, connect_results) if result is True]
+    failed_connects = [peer for peer, result in zip(peers_to_connect, connect_results) if result is False]
+    if DEBUG >= 1:
+      if successful_disconnects: print(f"Successfully disconnected peers: {_pretty(successful_disconnects)}")
+      if failed_disconnects: print(f"Failed to disconnect peers: {_pretty(failed_disconnects)}")
+      if successful_connects: print(f"Successfully connected peers: {_pretty(successful_connects)}")
+      if failed_connects: print(f"Failed to connect peers: {_pretty(failed_connects)}")
+
     self.peers = next_peers
     return len(peers_to_connect) > 0 or len(peers_to_disconnect) > 0