Browse Source

todo for speculative model

Alex Cheema 10 months ago
parent
commit
57215041a0
1 changed files with 1 additions and 1 deletions
  1. 1 1
      exo/inference/mlx/sharded_model.py

+ 1 - 1
exo/inference/mlx/sharded_model.py

@@ -8,7 +8,7 @@ from mlx_lm.sample_utils import top_p_sampling
 
 from ..shard import Shard
 
-
+# TODO: support a speculative model so we can parallelise compute across devices
 class StatefulShardedModel:
   def __init__(self, shard: Shard, model: nn.Module, max_kv_size: int = 1024, max_caches: int = 2):
     self.shard = shard