Browse Source

Merge pull request #684 from divinity76/patch-1

workaround f16 cast ambiguity
Alex Cheema 2 months ago
parent
commit
de99da7c75
1 changed files with 1 additions and 1 deletions
  1. 1 1
      exo/inference/tinygrad/models/llama.py

+ 1 - 1
exo/inference/tinygrad/models/llama.py

@@ -322,6 +322,6 @@ def fix_bf16(weights: Dict[Any, Tensor]):
     }
   if getenv("SUPPORT_BF16", 1):
     # TODO: without casting to float16, 70B llama OOM on tinybox.
-    return {k: v.cast(dtypes.float16) if v.dtype == dtypes.bfloat16 else v for k, v in weights.items()}
+    return {k: v.cast(dtypes.float32).cast(dtypes.float16) if v.dtype == dtypes.bfloat16 else v for k, v in weights.items()}
   # TODO: check if device supports bf16
   return {k: v.llvm_bf16_cast(dtypes.half).to(v.device) if v.dtype == dtypes.bfloat16 else v for k, v in weights.items()}