standard_node.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. from typing import List, Dict, Optional, Callable, Tuple
  2. import numpy as np
  3. from networking import Discovery, PeerHandle, Server
  4. from inference.inference_engine import InferenceEngine, Shard
  5. from .node import Node
  6. from topology.topology import Topology
  7. from topology.device_capabilities import device_capabilities
  8. from topology.partitioning_strategy import PartitioningStrategy
  9. from topology.partitioning_strategy import Partition
  10. import asyncio
  11. import uuid
  12. class StandardNode(Node):
  13. def __init__(self, id: str, server: Server, inference_engine: InferenceEngine, discovery: Discovery, partitioning_strategy: PartitioningStrategy = None, on_token: Callable[[List[int]], None] = None, max_generate_tokens: int = 50):
  14. self.id = id
  15. self.inference_engine = inference_engine
  16. self.server = server
  17. self.discovery = discovery
  18. self.partitioning_strategy = partitioning_strategy
  19. self.peers: List[PeerHandle] = {}
  20. self.topology: Topology = Topology()
  21. self.device_capabilities = device_capabilities()
  22. self.buffered_token_output: Dict[str, Tuple[List[int], bool]] = {}
  23. self.on_token = on_token
  24. self.max_generate_tokens = max_generate_tokens
  25. async def start(self, wait_for_peers: int = 0) -> None:
  26. await self.server.start()
  27. await self.discovery.start()
  28. await self.update_peers(wait_for_peers)
  29. await self.collect_topology()
  30. print(f"Collected topology: {self.topology}")
  31. asyncio.create_task(self.periodic_topology_collection(5))
  32. async def stop(self) -> None:
  33. await self.discovery.stop()
  34. await self.server.stop()
  35. async def process_prompt(self, shard: Shard, prompt: str, request_id: Optional[str] = None) -> Optional[np.ndarray]:
  36. if request_id is None:
  37. request_id = str(uuid.uuid4())
  38. if request_id not in self.buffered_token_output:
  39. self.buffered_token_output[request_id] = ([], False)
  40. print(f"[{request_id}] process prompt: {shard}, {prompt}")
  41. result, is_finished = await self.inference_engine.infer_prompt(self.get_current_shard(shard), prompt)
  42. self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], is_finished)
  43. if result.size == 1:
  44. self.buffered_token_output[request_id][0].append(result.item())
  45. self.on_token(self.buffered_token_output[request_id][0])
  46. print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id])}")
  47. if not is_finished and len(self.buffered_token_output[request_id]) < self.max_generate_tokens:
  48. asyncio.create_task(self.forward_tensor_to_next_shard(shard, result, request_id))
  49. return np.array(self.buffered_token_output[request_id]) if len(self.buffered_token_output[request_id]) > 0 else None
  50. async def process_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None) -> Optional[np.ndarray]:
  51. if request_id is None:
  52. request_id = str(uuid.uuid4())
  53. if request_id not in self.buffered_token_output:
  54. self.buffered_token_output[request_id] = ([], False)
  55. try:
  56. print(f"[{request_id}] process_tensor: {shard}, {tensor}")
  57. result, is_finished = await self.inference_engine.infer_tensor(self.get_current_shard(shard), tensor)
  58. self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], is_finished)
  59. if result.size == 1: # we got a new token out
  60. self.buffered_token_output[request_id][0].append(result.item())
  61. self.on_token(self.buffered_token_output[request_id][0])
  62. print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id])}")
  63. if not is_finished and len(self.buffered_token_output[request_id]) < self.max_generate_tokens:
  64. asyncio.create_task(self.forward_tensor_to_next_shard(shard, result, request_id))
  65. return np.array(self.buffered_token_output[request_id][0]) if len(self.buffered_token_output[request_id][0]) > 0 else None
  66. except Exception as e:
  67. import traceback
  68. print(f"Error processing tensor for shard {shard}: {e}")
  69. traceback.print_exc()
  70. return None
  71. async def forward_tensor_to_next_shard(self, shard: Shard, tensor: np.ndarray, request_id: str) -> None:
  72. if not self.partitioning_strategy:
  73. print("No partitioning strategy found. Skipping forward.")
  74. return
  75. partitions = self.partitioning_strategy.partition(self.topology)
  76. current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None)
  77. print(f"Current partition index: {current_partition_index}")
  78. if current_partition_index is not None:
  79. next_partition_index = (current_partition_index + 1) % len(partitions)
  80. next_partition: Partition = partitions[next_partition_index]
  81. print(f"Computed next from: {shard}, {self.topology}. Next partition: {next_partition}")
  82. if next_partition:
  83. if next_partition.node_id == self.id:
  84. await self.process_tensor(shard, tensor, request_id)
  85. return
  86. target_peer = next((p for p in self.peers if p.id() == next_partition.node_id), None)
  87. if not target_peer:
  88. raise ValueError(f"Peer for {next_partition} not found")
  89. start_layer = int(next_partition.start * shard.n_layers)
  90. end_layer = int(next_partition.end * shard.n_layers) - 1
  91. next_shard = Shard(shard.model_id, start_layer, end_layer, shard.n_layers)
  92. print(f"Sending tensor to {target_peer.id()} for shard: {next_shard}: {tensor}")
  93. await target_peer.send_tensor(next_shard, tensor, request_id)
  94. def get_current_shard(self, shard: Shard) -> Shard:
  95. partitions = self.partitioning_strategy.partition(self.topology)
  96. current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None)
  97. if current_partition_index is None:
  98. raise ValueError(f"No current partition found for node: {self.id}")
  99. current_partition = partitions[current_partition_index]
  100. start_layer = int(current_partition.start * shard.n_layers)
  101. end_layer = int(current_partition.end * shard.n_layers) - 1
  102. return Shard(shard.model_id, start_layer, end_layer, shard.n_layers)
  103. async def reset_shard(self, shard: Shard) -> None:
  104. # Implement shard reset logic
  105. print(f"Resetting shard: {shard}")
  106. self.buffered_token_output = {}
  107. await self.inference_engine.reset_shard(self.get_current_shard(shard))
  108. async def update_peers(self, wait_for_peers: int = 0) -> None:
  109. self.peers = await self.discovery.discover_peers(wait_for_peers)
  110. print(f"Starting with the following peers: {self.peers}")
  111. print("Connecting to new peers...")
  112. for peer in self.peers:
  113. is_connected = await peer.is_connected()
  114. print(f"Connected to {peer.id()}: {is_connected}")
  115. if not is_connected:
  116. await peer.connect()
  117. print(f"Connected to peer {peer.id()}")
  118. async def collect_topology(self, max_depth: int = 4) -> Topology:
  119. self.topology.update_node(self.id, self.device_capabilities)
  120. for peer in self.peers:
  121. self.topology.update_node(peer.id(), peer.device_capabilities())
  122. self.topology.add_edge(self.id, peer.id())
  123. if max_depth > 0:
  124. try:
  125. other_topology = await peer.collect_topology(max_depth = max_depth - 1)
  126. print(f"Collected topology from: {peer.id()}: {other_topology}")
  127. self.topology.merge(other_topology)
  128. except Exception as e:
  129. print(f"Error collecting topology from {peer.id()}: {e}")
  130. return self.topology
  131. async def periodic_topology_collection(self, interval: int):
  132. while True:
  133. await asyncio.sleep(interval)
  134. try:
  135. await self.update_peers()
  136. await self.collect_topology()
  137. except Exception as e:
  138. print(f"Error collecting topology: {e}")
  139. print("Topology collection task executed.")
  140. print(f"Current topology: {self.topology}")
  141. async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]:
  142. if request_id not in self.buffered_token_output:
  143. return None, False
  144. return np.array(self.buffered_token_output[request_id][0]), self.buffered_token_output[request_id][1]