ring_copy.py 472 B

123456789101112131415
  1. from tinygrad import Tensor, Device, GlobalCounters
  2. from tinygrad.helpers import Timing
  3. N = 512
  4. GPUS = 5
  5. ds = tuple([f"{Device.DEFAULT}:{i+1}" for i in range(GPUS)])
  6. t = [Tensor.ones(N, N, N, device=d).contiguous().realize() for d in ds]
  7. for _ in range(10):
  8. GlobalCounters.reset()
  9. with Timing():
  10. for ti in t:
  11. ti.to_(ds[(ds.index(ti.device)+1+len(ds))%len(ds)])
  12. # ti.to_(ds[(ds.index(ti.device)-1+len(ds))%len(ds)]) # reversed order
  13. ti.realize()