Ver código fonte

Merge pull request #398 from exo-explore/fix_flops_parsing

fix flops parsing
Alex Cheema 6 meses atrás
pai
commit
50a1b171f6

+ 2 - 2
exo/networking/grpc/grpc_peer_handle.py

@@ -9,7 +9,7 @@ from . import node_service_pb2_grpc
 from ..peer_handle import PeerHandle
 from exo.inference.shard import Shard
 from exo.topology.topology import Topology
-from exo.topology.device_capabilities import DeviceCapabilities
+from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops
 from exo.helpers import DEBUG
 
 
@@ -117,7 +117,7 @@ class GRPCPeerHandle(PeerHandle):
     response = await self.stub.CollectTopology(request)
     topology = Topology()
     for node_id, capabilities in response.nodes.items():
-      device_capabilities = DeviceCapabilities(model=capabilities.model, chip=capabilities.chip, memory=capabilities.memory, flops=capabilities.flops)
+      device_capabilities = DeviceCapabilities(model=capabilities.model, chip=capabilities.chip, memory=capabilities.memory, flops=DeviceFlops(fp16=capabilities.flops.fp16, fp32=capabilities.flops.fp32, int8=capabilities.flops.int8))
       topology.update_node(node_id, device_capabilities)
     for node_id, peers in response.peer_graph.items():
       for peer_id in peers.peer_ids:

+ 1 - 0
exo/orchestration/standard_node.py

@@ -422,6 +422,7 @@ class StandardNode(Node):
         self.topology.merge(other_topology)
       except Exception as e:
         print(f"Error collecting topology from {peer.id()}: {e}")
+        traceback.print_exc()
 
     next_topology.active_node_id = self.topology.active_node_id  # this is not so clean.
     self.topology = next_topology