external_test_hip_compile.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import time, unittest
  2. from tinygrad.runtime.support.hip_comgr import compile_hip
  3. from tinygrad import Tensor
  4. from tinygrad.device import Device
  5. from tinygrad.engine.schedule import create_schedule
  6. from tinygrad.codegen.kernel import Kernel
  7. class TestHIPCompileSpeed(unittest.TestCase):
  8. @unittest.skipIf(Device.DEFAULT != "HIP", "only run on HIP")
  9. def test_hip_compile(self):
  10. a, b = Tensor([1,2,3,4,5]), Tensor([1,2,3,4,5])
  11. out = a + b
  12. lin = Kernel(create_schedule([out.lazydata])[-1].ast[0])
  13. lin.linearize()
  14. reference = """
  15. #include <hip/hip_common.h>
  16. typedef long unsigned int size_t;
  17. extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
  18. extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
  19. extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
  20. extern "C" __attribute__((global))void {name}(int* data0, const int* data1, const int* data2) {{
  21. int gidx0 = __ockl_get_group_id(0); /* 5 */
  22. int val0 = data1[gidx0];
  23. int val1 = data2[gidx0];
  24. data0[gidx0] = (val0+val1);
  25. }}
  26. """
  27. def time_compile(code):
  28. st = time.perf_counter()
  29. compile_hip(code)
  30. return (time.perf_counter() - st) * 1000
  31. tinygrad_tm = min([time_compile(Device[Device.DEFAULT].renderer.render(f"test{i}", lin.uops)) for i in range(10)])
  32. ref_tm = min([time_compile(reference.format(name=f"test{i}")) for i in range(10)])
  33. print(f"tinygrad {tinygrad_tm:6.2f} ms")
  34. print(f"reference {ref_tm:6.2f} ms")
  35. assert (tinygrad_tm - ref_tm) <= 10