tf_gemm.py 1.1 KB

123456789101112131415161718192021222324252627282930313233
  1. import time
  2. import tensorflow as tf
  3. gpus = tf.config.list_physical_devices('GPU')
  4. if gpus:
  5. try:
  6. # Currently, memory growth needs to be the same across GPUs
  7. for gpu in gpus:
  8. tf.config.experimental.set_memory_growth(gpu, True)
  9. logical_gpus = tf.config.list_logical_devices('GPU')
  10. print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  11. except RuntimeError as e:
  12. # Memory growth must be set before GPUs have been initialized
  13. print(e)
  14. for dtype in [tf.float16, tf.float32]:
  15. for N in [256, 512, 1024, 2048, 4096, 8192]:
  16. FLOPS = N*N*N*2
  17. b = tf.random.uniform((N, N), dtype=dtype)
  18. c = tf.random.uniform((N, N), dtype=dtype)
  19. b = tf.Variable(b)
  20. c = tf.Variable(c)
  21. def tf_prog(b, c):
  22. st = time.perf_counter()
  23. a = tf.matmul(b, c)
  24. tf.debugging.check_numerics(a, "Nan or Inf in result") # Ensures that the calculation is done.
  25. return time.perf_counter() - st
  26. tm = min([tf_prog(b, c) for _ in range(20)])
  27. print(f"{N*N:10d} {tm*1e6:9.2f} us, would be {FLOPS*1e-9/tm:9.2f} GFLOPS {N:4d}x{N:4d}x{N:4d} matmul in {dtype}")