ops_llvm.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from __future__ import annotations
  2. import ctypes, functools
  3. from typing import Tuple
  4. from tinygrad.device import Compiled, Compiler, MallocAllocator
  5. from tinygrad.helpers import DEBUG, cpu_time_execution, cpu_objdump
  6. from tinygrad.renderer.llvmir import LLVMRenderer
  7. import llvmlite.binding as llvm
  8. class LLVMCompiler(Compiler):
  9. def __init__(self, device:LLVMDevice):
  10. self.device = device
  11. super().__init__("compile_llvm")
  12. def compile(self, src:str) -> bytes:
  13. mod = llvm.parse_assembly(src)
  14. mod.verify()
  15. self.device.optimizer.run(mod)
  16. if DEBUG >= 5: print(self.device.target_machine.emit_assembly(mod))
  17. return self.device.target_machine.emit_object(mod)
  18. class LLVMProgram:
  19. def __init__(self, device:LLVMDevice, name:str, lib:bytes):
  20. if DEBUG >= 6: cpu_objdump(lib)
  21. self.name, self.lib = name, lib
  22. device.engine.add_object_file(llvm.object_file.ObjectFileRef.from_data(lib))
  23. self.fxn = device.engine.get_function_address(name)
  24. def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
  25. if not hasattr(self, 'cfunc'):
  26. self.cfunc = ctypes.CFUNCTYPE(ctypes.c_int, *([ctypes.c_void_p]*len(bufs)), *([ctypes.c_int32]*len(vals)))(self.fxn)
  27. return cpu_time_execution(lambda: self.cfunc(*bufs, *vals), enable=wait)
  28. class LLVMDevice(Compiled):
  29. def __init__(self, device:str):
  30. llvm.initialize()
  31. llvm.initialize_native_target()
  32. llvm.initialize_native_asmprinter()
  33. llvm.initialize_native_asmparser()
  34. self.optimizer: llvm.passmanagers.ModulePassManager = llvm.create_module_pass_manager()
  35. # this opt actually can change things. ex: opt=3 means no FMA, opt=2 means FMA
  36. self.target_machine: llvm.targets.TargetMachine = llvm.Target.from_triple(llvm.get_process_triple()).create_target_machine(opt=2)
  37. self.target_machine.add_analysis_passes(self.optimizer)
  38. self.target_machine.set_asm_verbosity(True)
  39. backing_mod = llvm.parse_assembly(str())
  40. backing_mod.triple = llvm.get_process_triple()
  41. self.engine: llvm.executionengine.ExecutionEngine = llvm.create_mcjit_compiler(backing_mod, self.target_machine)
  42. super().__init__(device, MallocAllocator, LLVMRenderer(), LLVMCompiler(self), functools.partial(LLVMProgram, self))