Parcourir la source

todo for speculative model

Alex Cheema il y a 1 an
Parent
commit
57215041a0
1 fichiers modifiés avec 1 ajouts et 1 suppressions
  1. 1 1
      exo/inference/mlx/sharded_model.py

+ 1 - 1
exo/inference/mlx/sharded_model.py

@@ -8,7 +8,7 @@ from mlx_lm.sample_utils import top_p_sampling
 
 from ..shard import Shard
 
-
+# TODO: support a speculative model so we can parallelise compute across devices
 class StatefulShardedModel:
   def __init__(self, shard: Shard, model: nn.Module, max_kv_size: int = 1024, max_caches: int = 2):
     self.shard = shard