1
0

test_copy_speed.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import unittest
  2. from tinygrad import Tensor
  3. from tinygrad import Device
  4. from tinygrad.helpers import Timing, CI, OSX
  5. import multiprocessing.shared_memory as shared_memory
  6. N = 4096 if CI else 16384
  7. class TestCopySpeed(unittest.TestCase):
  8. @classmethod
  9. def setUpClass(cls): Device[Device.DEFAULT].synchronize()
  10. def testCopySHMtoDefault(self):
  11. s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
  12. s.close()
  13. if CI and not OSX:
  14. t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
  15. else:
  16. t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
  17. for _ in range(3):
  18. with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
  19. with Timing("queue: "):
  20. t.to(Device.DEFAULT).realize()
  21. Device[Device.DEFAULT].synchronize()
  22. s.unlink()
  23. def testCopyCPUtoDefault(self):
  24. t = Tensor.rand(N, N, device="clang").realize()
  25. print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
  26. for _ in range(3):
  27. with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
  28. with Timing("queue: "):
  29. t.to(Device.DEFAULT).realize()
  30. Device[Device.DEFAULT].synchronize()
  31. def testCopyCPUtoDefaultFresh(self):
  32. print("fresh copy")
  33. for _ in range(3):
  34. t = Tensor.rand(N, N, device="clang").realize()
  35. with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
  36. with Timing("queue: "):
  37. t.to(Device.DEFAULT).realize()
  38. Device[Device.DEFAULT].synchronize()
  39. del t
  40. def testCopyDefaulttoCPU(self):
  41. t = Tensor.rand(N, N).realize()
  42. print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
  43. for _ in range(3):
  44. with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
  45. t.to('clang').realize()
  46. @unittest.skipIf(CI, "CI doesn't have 6 GPUs")
  47. @unittest.skipIf(Device.DEFAULT != "GPU", "only test this on GPU")
  48. def testCopyCPUto6GPUs(self):
  49. from tinygrad.runtime.ops_gpu import CLDevice
  50. if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
  51. t = Tensor.rand(N, N, device="clang").realize()
  52. print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
  53. for _ in range(3):
  54. with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
  55. with Timing("queue: "):
  56. for g in range(6):
  57. t.to(f"gpu:{g}").realize()
  58. Device["gpu"].synchronize()
  59. if __name__ == '__main__':
  60. unittest.main()