benchmark_matmul.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import time
  2. onnx_path = "/tmp/my.onnx"
  3. N = 2048
  4. CNT = 400
  5. """
  6. import torch
  7. import torch.nn as nn
  8. #dtype = torch.bfloat16
  9. dtype = torch.float32
  10. class MatMul(nn.Module):
  11. def __init__(self):
  12. super().__init__()
  13. self.a = nn.Linear(N, N, bias=False)
  14. def forward(self, x):
  15. x = x.to(dtype)
  16. for i in range(CNT): x = self.a(x).relu()
  17. return x.to(torch.float32)
  18. torch_model = MatMul().to(dtype)
  19. torch.onnx.export(torch_model, torch.randn(N, N), onnx_path)
  20. """
  21. """
  22. import onnx
  23. from tinygrad.tensor import Tensor
  24. from extra.onnx import get_run_onnx
  25. out = get_run_onnx(onnx.load(onnx_path))({"onnx::MatMul_0": Tensor.zeros(N, N)})
  26. for x in out.values(): x.realize()
  27. """
  28. from openvino.runtime import Core
  29. core = Core()
  30. devices = core.available_devices
  31. for device in devices:
  32. device_name = core.get_property(device, "FULL_DEVICE_NAME")
  33. print(f"{device}: {device_name}")
  34. model = core.read_model(onnx_path)
  35. compiled_model = core.compile_model(model, device_name='GPU.0')
  36. print(compiled_model)
  37. ireq = compiled_model.create_infer_request()
  38. for model_input in compiled_model.inputs:
  39. tensor = ireq.get_tensor(model_input)
  40. tensor.data[:] = 2
  41. print(tensor)
  42. print("request")
  43. ireq.infer()
  44. ireq.infer()
  45. print("did one")
  46. REPS = 20
  47. st = time.perf_counter()
  48. for i in range(REPS): ireq.infer()
  49. et = time.perf_counter() - st
  50. print(f"{et*1000:.2f} ms {(CNT*N*N*N*REPS*2/et)*1e-9:.2f} GFLOPS")