test_specific_conv.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import unittest
  2. from tinygrad.helpers import CI
  3. from tinygrad import Tensor, Device, dtypes
  4. from test.helpers import is_dtype_supported
  5. # similar to test/external/external_test_gpu_ast.py, but universal
  6. @unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and CI, "slow on CUDA CI")
  7. class TestSpecific(unittest.TestCase):
  8. # from openpilot
  9. # 1x1 6 <- 24
  10. def test_1x1_6_24(self):
  11. x = Tensor.randn(1, 24*4, 32, 64)
  12. w = Tensor.randn(6*4, 24*4, 1, 1)
  13. x.conv2d(w).permute(0,2,3,1).reshape(32, 384, 4).contiguous().realize()
  14. def test_vec_mul(self):
  15. # this forces it to be an image...
  16. x = Tensor.ones(1, 512, 4).contiguous().reshape(1, 2048)
  17. w = Tensor.randn(2048, 512)
  18. (x @ w).reshape(1, 128, 4).contiguous().realize()
  19. @unittest.skipUnless(is_dtype_supported(dtypes.float16), "need float16 support")
  20. def test_big_vec_mul(self):
  21. # from LLaMA
  22. # 0 buffer<4096, dtypes.float> [View((1024, 1, 1, 4), (4, 0, 0, 1), 0, None)]
  23. # 1 buffer<4096, dtypes.float> [View((1024, 1024, 4, 4), (0, 4, 1, 0), 0, None)]
  24. # 2 buffer<16777216, dtypes.half> [View((1024, 1024, 4, 4), (16384, 4, 1, 4096), 0, None)]
  25. x = Tensor.randn(4096).realize()
  26. w = Tensor.randn(4096, 4096, dtype=dtypes.float16).realize()
  27. (x @ w.T).realize()
  28. # from https://dl.acm.org/doi/pdf/10.1145/3495243.3517020
  29. # ~260 GFLOPS on Adreno 640, should be 260*(720/890)*(596/710) = 176.5 on downclocked 630
  30. # we get 170
  31. def test_1x1_28_28(self):
  32. x = Tensor.randn(1, 256, 28, 28)
  33. w = Tensor.randn(256, 256, 1, 1)
  34. x.conv2d(w).permute(0,2,3,1).reshape(28, 28*256//4, 4).contiguous().realize()
  35. # 132 GFLOPS on Adreno 640, should be 132*(720/890)*(596/710) = 90 on downclocked 630
  36. # gets 54 with broken opt, 74 without opt, and 146 if we pad and opt 3!
  37. def test_3x3_28_28_stride_2(self):
  38. x = Tensor.randn(1, 288, 36, 36)
  39. w = Tensor.randn(384, 288, 3, 3)
  40. x.conv2d(w, stride=2).permute(0,2,3,1).reshape(17, 17*384//4, 4).contiguous().realize()
  41. def test_3x3_28_28_stride_2_padded(self):
  42. x = Tensor.randn(1, 288, 36, 36)
  43. w = Tensor.randn(384, 288, 3, 3)
  44. x.conv2d(w, stride=2, padding=1).permute(0,2,3,1).reshape(18, 18*384//4, 4).contiguous().realize()
  45. if __name__ == '__main__':
  46. unittest.main()