test_onnx.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/usr/bin/env python
  2. import os
  3. import time
  4. import unittest
  5. import numpy as np
  6. import onnx
  7. from extra.onnx import get_run_onnx
  8. from tinygrad.tensor import Tensor
  9. from tinygrad.helpers import CI, fetch, temp
  10. def run_onnx_torch(onnx_model, inputs):
  11. import torch
  12. from onnx2torch import convert
  13. torch_model = convert(onnx_model).float()
  14. with torch.no_grad():
  15. torch_out = torch_model(*[torch.tensor(x) for x in inputs.values()])
  16. return torch_out
  17. OPENPILOT_MODEL = "https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx"
  18. np.random.seed(1337)
  19. class TestOnnxModel(unittest.TestCase):
  20. def test_benchmark_openpilot_model(self):
  21. onnx_model = onnx.load(fetch(OPENPILOT_MODEL))
  22. run_onnx = get_run_onnx(onnx_model)
  23. def get_inputs():
  24. np_inputs = {
  25. "input_imgs": np.random.randn(*(1, 12, 128, 256)),
  26. "big_input_imgs": np.random.randn(*(1, 12, 128, 256)),
  27. "desire": np.zeros((1, 100, 8)),
  28. "traffic_convention": np.array([[1., 0.]]),
  29. "nav_features": np.zeros((1, 256)),
  30. "features_buffer": np.zeros((1, 99, 128)),
  31. }
  32. inputs = {k:Tensor(v.astype(np.float32), requires_grad=False) for k,v in np_inputs.items()}
  33. return inputs
  34. for _ in range(7):
  35. inputs = get_inputs()
  36. st = time.monotonic()
  37. tinygrad_out = run_onnx(inputs)['outputs']
  38. mt = time.monotonic()
  39. tinygrad_out.realize()
  40. mt2 = time.monotonic()
  41. tinygrad_out = tinygrad_out.numpy()
  42. et = time.monotonic()
  43. if not CI:
  44. print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue")
  45. if not CI:
  46. import cProfile
  47. import pstats
  48. inputs = get_inputs()
  49. pr = cProfile.Profile(timer=time.perf_counter_ns, timeunit=1e-6)
  50. pr.enable()
  51. tinygrad_out = run_onnx(inputs)['outputs']
  52. tinygrad_out.realize()
  53. tinygrad_out = tinygrad_out.numpy()
  54. if not CI:
  55. pr.disable()
  56. stats = pstats.Stats(pr)
  57. stats.dump_stats(temp("net.prof"))
  58. os.system(f"flameprof {temp('net.prof')} > {temp('prof.svg')}")
  59. ps = stats.sort_stats(pstats.SortKey.TIME)
  60. ps.print_stats(30)
  61. def test_openpilot_model(self):
  62. onnx_model = onnx.load(fetch(OPENPILOT_MODEL))
  63. run_onnx = get_run_onnx(onnx_model)
  64. print("got run_onnx")
  65. inputs = {
  66. "input_imgs": np.random.randn(*(1, 12, 128, 256)),
  67. "big_input_imgs": np.random.randn(*(1, 12, 128, 256)),
  68. "desire": np.zeros((1, 100, 8)),
  69. "traffic_convention": np.array([[1., 0.]]),
  70. "nav_features": np.zeros((1, 256)),
  71. "features_buffer": np.zeros((1, 99, 128)),
  72. }
  73. inputs = {k:v.astype(np.float32) for k,v in inputs.items()}
  74. st = time.monotonic()
  75. print("****** run onnx ******")
  76. tinygrad_out = run_onnx(inputs)['outputs']
  77. mt = time.monotonic()
  78. print("****** realize ******")
  79. tinygrad_out.realize()
  80. mt2 = time.monotonic()
  81. tinygrad_out = tinygrad_out.numpy()
  82. et = time.monotonic()
  83. print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue")
  84. Tensor.no_grad = True
  85. torch_out = run_onnx_torch(onnx_model, inputs).numpy()
  86. Tensor.no_grad = False
  87. print(tinygrad_out, torch_out)
  88. np.testing.assert_allclose(torch_out, tinygrad_out, atol=1e-4, rtol=1e-2)
  89. @unittest.skip("slow")
  90. def test_efficientnet(self):
  91. input_name, input_new = "images:0", True
  92. self._test_model(
  93. fetch("https://github.com/onnx/models/raw/main/validated/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx"),
  94. input_name, input_new)
  95. def test_shufflenet(self):
  96. input_name, input_new = "gpu_0/data_0", False
  97. self._test_model(
  98. fetch("https://github.com/onnx/models/raw/main/validated/vision/classification/shufflenet/model/shufflenet-9.onnx"),
  99. input_name, input_new)
  100. @unittest.skip("test is very slow")
  101. def test_resnet(self):
  102. # NOTE: many onnx models can't be run right now due to max pool with strides != kernel_size
  103. input_name, input_new = "data", False
  104. self._test_model(
  105. fetch("https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet18-v2-7.onnx"),
  106. input_name, input_new)
  107. def _test_model(self, fn, input_name, input_new, debug=False):
  108. onnx_model = onnx.load(fn)
  109. print("onnx loaded")
  110. from test.models.test_efficientnet import chicken_img, car_img, preprocess, _LABELS
  111. run_onnx = get_run_onnx(onnx_model)
  112. def run(img):
  113. inputs = {input_name: preprocess(img, new=input_new)}
  114. tinygrad_out = list(run_onnx(inputs, debug=debug).values())[0].numpy()
  115. return tinygrad_out.argmax()
  116. cls = run(chicken_img)
  117. print(cls, _LABELS[cls])
  118. assert _LABELS[cls] == "hen" or _LABELS[cls] == "cock"
  119. cls = run(car_img)
  120. print(cls, _LABELS[cls])
  121. assert "car" in _LABELS[cls] or _LABELS[cls] == "convertible"
  122. if __name__ == "__main__":
  123. unittest.main()