external_benchmark_hip_compile.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. import random, os
  2. from tinygrad.helpers import Timing
  3. from tinygrad.runtime.ops_hip import compile_hip, HIPDevice
  4. from tinygrad.runtime.ops_gpu import compile_cl, CLDevice
  5. # OMP_NUM_THREADS=1 strace -tt -f -e trace=file python3 test/external/external_benchmark_hip_compile.py
  6. # AMD_COMGR_REDIRECT_LOGS=stdout AMD_COMGR_EMIT_VERBOSE_LOGS=1 python3 test/external/external_benchmark_hip_compile.py
  7. # issue is in https://github.com/ROCm-Developer-Tools/clr/
  8. if __name__ == "__main__":
  9. HIPDevice()
  10. CLDevice()
  11. # warmup
  12. name = "none"+str(random.randint(0, 1000000))
  13. compile_cl.__wrapped__(f"void {name}() {{}}")
  14. print("compile cl warmed up")
  15. compile_hip.__wrapped__(f"void {name}() {{}}")
  16. print("compile hip warmed up")
  17. print("**** benchmark ****")
  18. name = "none"+str(random.randint(0, 1000000))
  19. # this uses AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC, then it links the lib on the next step
  20. with Timing("compile cl: "): compile_cl.__wrapped__(f"void {name}() {{}}")
  21. # this uses AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC, much slower
  22. with Timing("compile hip: "): compile_hip.__wrapped__(f"void {name}() {{}}")
  23. os._exit(0)