Quellcode durchsuchen

only use_fast tokenizer for Mistral Large until this inconsistency bug is fixed #171

Alex Cheema vor 11 Monaten
Ursprung
Commit
8cad0e1849
1 geänderte Dateien mit 1 neuen und 1 gelöschten Zeilen
  1. 1 1
      exo/inference/tokenizers.py

+ 1 - 1
exo/inference/tokenizers.py

@@ -6,7 +6,7 @@ from exo.helpers import DEBUG
 async def resolve_tokenizer(model_id: str):
   try:
     if DEBUG >= 4: print(f"Trying AutoProcessor for {model_id}")
-    processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+    processor = AutoProcessor.from_pretrained(model_id, use_fast=True if "Mistral-Large" in model_id else False)
     if not hasattr(processor, 'eos_token_id'):
       processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id
     if not hasattr(processor, 'encode'):