|
@@ -15,12 +15,12 @@ import base64
|
|
|
|
|
|
import mlx.core as mx
|
|
import mlx.core as mx
|
|
import mlx.nn as nn
|
|
import mlx.nn as nn
|
|
-from transformers import AutoProcessor
|
|
|
|
|
|
|
|
from mlx_lm.tokenizer_utils import load_tokenizer, TokenizerWrapper
|
|
from mlx_lm.tokenizer_utils import load_tokenizer, TokenizerWrapper
|
|
from mlx_lm.tuner.utils import apply_lora_layers
|
|
from mlx_lm.tuner.utils import apply_lora_layers
|
|
|
|
|
|
from exo import DEBUG
|
|
from exo import DEBUG
|
|
|
|
+from exo.inference.tokenizers import resolve_tokenizer
|
|
from ..shard import Shard
|
|
from ..shard import Shard
|
|
|
|
|
|
|
|
|
|
@@ -171,15 +171,8 @@ async def load_shard(
|
|
model = apply_lora_layers(model, adapter_path)
|
|
model = apply_lora_layers(model, adapter_path)
|
|
model.eval()
|
|
model.eval()
|
|
|
|
|
|
- # TODO: figure out a generic solution
|
|
|
|
- if model.model_type == "llava":
|
|
|
|
- processor = AutoProcessor.from_pretrained(model_path)
|
|
|
|
- processor.eos_token_id = processor.tokenizer.eos_token_id
|
|
|
|
- processor.encode = processor.tokenizer.encode
|
|
|
|
- return model, processor
|
|
|
|
- else:
|
|
|
|
- tokenizer = load_tokenizer(model_path, tokenizer_config)
|
|
|
|
- return model, tokenizer
|
|
|
|
|
|
+ tokenizer = await resolve_tokenizer(model_path)
|
|
|
|
+ return model, tokenizer
|
|
|
|
|
|
|
|
|
|
async def get_image_from_str(_image_str: str):
|
|
async def get_image_from_str(_image_str: str):
|