grpc_peer_handle.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import grpc
  2. import numpy as np
  3. import asyncio
  4. from typing import Optional, Tuple, List
  5. from . import node_service_pb2
  6. from . import node_service_pb2_grpc
  7. from ..peer_handle import PeerHandle
  8. from exo.inference.shard import Shard
  9. from exo.topology.topology import Topology
  10. from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops
  11. from exo.helpers import DEBUG
  12. class GRPCPeerHandle(PeerHandle):
  13. def __init__(self, _id: str, address: str, desc: str, device_capabilities: DeviceCapabilities):
  14. self._id = _id
  15. self.address = address
  16. self.desc = desc
  17. self._device_capabilities = device_capabilities
  18. self.channel = None
  19. self.stub = None
  20. def id(self) -> str:
  21. return self._id
  22. def addr(self) -> str:
  23. return self.address
  24. def description(self) -> str:
  25. return self.desc
  26. def device_capabilities(self) -> DeviceCapabilities:
  27. return self._device_capabilities
  28. async def connect(self):
  29. if self.channel is None:
  30. self.channel = grpc.aio.insecure_channel(self.address, options=[
  31. ("grpc.max_metadata_size", 32*1024*1024),
  32. ('grpc.max_receive_message_length', 32*1024*1024),
  33. ('grpc.max_send_message_length', 32*1024*1024)
  34. ])
  35. self.stub = node_service_pb2_grpc.NodeServiceStub(self.channel)
  36. await self.channel.channel_ready()
  37. async def is_connected(self) -> bool:
  38. return self.channel is not None and self.channel.get_state() == grpc.ChannelConnectivity.READY
  39. async def disconnect(self):
  40. if self.channel:
  41. await self.channel.close()
  42. self.channel = None
  43. self.stub = None
  44. async def _ensure_connected(self):
  45. if not await self.is_connected(): await asyncio.wait_for(self.connect(), timeout=5)
  46. async def health_check(self) -> bool:
  47. try:
  48. await self._ensure_connected()
  49. request = node_service_pb2.HealthCheckRequest()
  50. response = await asyncio.wait_for(self.stub.HealthCheck(request), timeout=5)
  51. return response.is_healthy
  52. except asyncio.TimeoutError:
  53. return False
  54. except Exception:
  55. if DEBUG >= 4:
  56. print(f"Health check failed for {self._id}@{self.address}.")
  57. import traceback
  58. traceback.print_exc()
  59. return False
  60. async def send_prompt(self, shard: Shard, prompt: str, request_id: Optional[str] = None) -> Optional[np.array]:
  61. request = node_service_pb2.PromptRequest(
  62. prompt=prompt,
  63. shard=node_service_pb2.Shard(
  64. model_id=shard.model_id,
  65. start_layer=shard.start_layer,
  66. end_layer=shard.end_layer,
  67. n_layers=shard.n_layers,
  68. ),
  69. request_id=request_id,
  70. )
  71. response = await self.stub.SendPrompt(request)
  72. if not response.tensor_data or not response.shape or not response.dtype:
  73. return None
  74. return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape)
  75. async def send_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None) -> Optional[np.array]:
  76. request = node_service_pb2.TensorRequest(
  77. shard=node_service_pb2.Shard(
  78. model_id=shard.model_id,
  79. start_layer=shard.start_layer,
  80. end_layer=shard.end_layer,
  81. n_layers=shard.n_layers,
  82. ),
  83. tensor=node_service_pb2.Tensor(tensor_data=tensor.tobytes(), shape=tensor.shape, dtype=str(tensor.dtype)),
  84. request_id=request_id,
  85. )
  86. response = await self.stub.SendTensor(request)
  87. if not response.tensor_data or not response.shape or not response.dtype:
  88. return None
  89. return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape)
  90. async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]:
  91. request = node_service_pb2.GetInferenceResultRequest(request_id=request_id)
  92. response = await self.stub.GetInferenceResult(request)
  93. if response.tensor is None:
  94. return None, response.is_finished
  95. return (
  96. np.frombuffer(response.tensor.tensor_data, dtype=np.dtype(response.tensor.dtype)).reshape(response.tensor.shape),
  97. response.is_finished,
  98. )
  99. async def collect_topology(self, my_node_id: str, visited: set[str], max_depth: int) -> Topology:
  100. request = node_service_pb2.CollectTopologyRequest(visited=visited, max_depth=max_depth)
  101. response = await self.stub.CollectTopology(request)
  102. topology = Topology()
  103. for node_id, capabilities in response.nodes.items():
  104. if node_id == my_node_id: continue
  105. device_capabilities = DeviceCapabilities(
  106. model=capabilities.model,
  107. chip=capabilities.chip,
  108. memory=capabilities.memory,
  109. flops=DeviceFlops(fp16=capabilities.flops.fp16, fp32=capabilities.flops.fp32, int8=capabilities.flops.int8)
  110. )
  111. topology.update_node(node_id, device_capabilities)
  112. for node_id, peer_connections in response.peer_graph.items():
  113. if node_id == my_node_id: continue
  114. for conn in peer_connections.connections:
  115. topology.add_edge(node_id, conn.to_id, conn.description)
  116. return topology
  117. async def send_result(self, request_id: str, result: List[int], is_finished: bool) -> None:
  118. request = node_service_pb2.SendResultRequest(request_id=request_id, result=result, is_finished=is_finished)
  119. await self.stub.SendResult(request)
  120. async def send_opaque_status(self, request_id: str, status: str) -> None:
  121. request = node_service_pb2.SendOpaqueStatusRequest(request_id=request_id, status=status)
  122. await self.stub.SendOpaqueStatus(request)