Jelajahi Sumber

discovery should not include unhealthy peers

Alex Cheema 7 bulan lalu
induk
melakukan
b1cf1085be
1 mengubah file dengan 12 tambahan dan 3 penghapusan
  1. 12 3
      exo/networking/tailscale_discovery.py

+ 12 - 3
exo/networking/tailscale_discovery.py

@@ -87,13 +87,22 @@ class TailscaleDiscovery(Discovery):
             continue
 
           if peer_id not in self.known_peers or self.known_peers[peer_id][0].addr() != f"{peer_host}:{peer_port}":
+            new_peer_handle = self.create_peer_handle(peer_id, f"{peer_host}:{peer_port}", device_capabilities)
+            if not await new_peer_handle.health_check():
+              if DEBUG >= 1: print(f"Peer {peer_id} at {peer_host}:{peer_port} is not healthy. Skipping.")
+              continue
+
             if DEBUG >= 1: print(f"Adding {peer_id=} at {peer_host}:{peer_port}. Replace existing peer_id: {peer_id in self.known_peers}")
             self.known_peers[peer_id] = (
-              self.create_peer_handle(peer_id, f"{peer_host}:{peer_port}", device_capabilities),
+              new_peer_handle,
               current_time,
               current_time,
             )
           else:
+            if not await self.known_peers[peer_id][0].health_check():
+              if DEBUG >= 1: print(f"Peer {peer_id} at {peer_host}:{peer_port} is not healthy. Deleting.")
+              del self.known_peers[peer_id]
+              continue
             self.known_peers[peer_id] = (self.known_peers[peer_id][0], self.known_peers[peer_id][1], current_time)
 
       except Exception as e:
@@ -126,9 +135,9 @@ class TailscaleDiscovery(Discovery):
         current_time = time.time()
         peers_to_remove = [
           peer_handle.id() for peer_handle, connected_at, last_seen in self.known_peers.values()
-          if (not await peer_handle.is_connected() and current_time - connected_at > self.discovery_timeout) or current_time - last_seen > self.discovery_timeout
+          if (not await peer_handle.is_connected() and current_time - connected_at > self.discovery_timeout) or current_time - last_seen > self.discovery_timeout or not await peer_handle.health_check()
         ]
-        if DEBUG_DISCOVERY >= 2: print("Peer statuses:", {peer_handle.id(): f"is_connected={await peer_handle.is_connected()}, {connected_at=}, {last_seen=}" for peer_handle, connected_at, last_seen in self.known_peers.values()})
+        if DEBUG_DISCOVERY >= 2: print("Peer statuses:", {peer_handle.id(): f"is_connected={await peer_handle.is_connected()}, {connected_at=}, {last_seen=}, health_check={await peer_handle.health_check()}" for peer_handle, connected_at, last_seen in self.known_peers.values()})
         for peer_id in peers_to_remove:
           if peer_id in self.known_peers: del self.known_peers[peer_id]
           if DEBUG_DISCOVERY >= 2: print(f"Removed peer {peer_id} due to inactivity.")