ops_clang.py 1.4 KB

12345678910111213141516171819202122232425262728
  1. import ctypes, subprocess, pathlib, tempfile
  2. from tinygrad.device import Compiled, Compiler, MallocAllocator
  3. from tinygrad.helpers import cpu_time_execution, DEBUG, cpu_objdump
  4. from tinygrad.renderer.cstyle import ClangRenderer
  5. class ClangCompiler(Compiler):
  6. def compile(self, src:str) -> bytes:
  7. # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here
  8. with tempfile.NamedTemporaryFile(delete=True) as output_file:
  9. subprocess.check_output(['clang', '-include', 'tgmath.h', '-shared', '-march=native', '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-',
  10. '-o', str(output_file.name)], input=src.encode('utf-8'))
  11. return pathlib.Path(output_file.name).read_bytes()
  12. class ClangProgram:
  13. def __init__(self, name:str, lib:bytes):
  14. if DEBUG >= 6: cpu_objdump(lib)
  15. self.name, self.lib = name, lib
  16. # write to disk so we can load it
  17. with tempfile.NamedTemporaryFile(delete=True) as cached_file_path:
  18. pathlib.Path(cached_file_path.name).write_bytes(lib)
  19. self.fxn = ctypes.CDLL(str(cached_file_path.name))[name]
  20. def __call__(self, *bufs, vals=(), wait=False): return cpu_time_execution(lambda: self.fxn(*bufs, *vals), enable=wait)
  21. class ClangDevice(Compiled):
  22. def __init__(self, device:str):
  23. from tinygrad.runtime.graph.clang import ClangGraph
  24. super().__init__(device, MallocAllocator, ClangRenderer(), ClangCompiler("compile_clang"), ClangProgram, ClangGraph)