|
@@ -92,6 +92,9 @@ class TinygradDynamicShardInferenceEngine(InferenceEngine):
|
|
|
tokens = await asyncio.get_running_loop().run_in_executor(self.executor, self.tokenizer.decode, tokens)
|
|
|
return tokens
|
|
|
|
|
|
+ async def load_checkpoint(self, shard: Shard, path: str):
|
|
|
+ await self.ensure_shard(shard)
|
|
|
+
|
|
|
async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray) -> np.ndarray:
|
|
|
await self.ensure_shard(shard)
|
|
|
def wrap_infer():
|