test_ops.py 106 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981
  1. import time, math, unittest
  2. import numpy as np
  3. import torch
  4. from tinygrad.helpers import getenv, IMAGE, DEBUG, CI
  5. from tinygrad import Tensor, Device, dtypes
  6. from tinygrad.tensor import _to_np_dtype
  7. if CI:
  8. import warnings
  9. warnings.filterwarnings("ignore", message="Non-empty compiler output encountered")
  10. FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
  11. PRINT_TENSORS = getenv("PRINT_TENSORS", 0)
  12. def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, grad_atol=1e-4, grad_rtol=1e-3,
  13. forward_only=False, vals=None, low=-2, high=2):
  14. if tinygrad_fxn is None: tinygrad_fxn = torch_fxn
  15. ts, tst = prepare_test_op(low, high, shps, vals, forward_only)
  16. st = time.monotonic()
  17. out = torch_fxn(*ts)
  18. torch_fp = time.monotonic() - st
  19. # move inputs to a different device, test the device of intermediate tensors are correct
  20. if mt:=getenv("MOVE_TENSOR", ""):
  21. for t in tst: t.to_(mt)
  22. st = time.monotonic()
  23. ret = tinygrad_fxn(*tst).realize()
  24. tinygrad_fp = time.monotonic() - st
  25. def compare(s, tinygrad_output, torch_output, atol, rtol):
  26. if PRINT_TENSORS: print(s, tinygrad_output, torch_output)
  27. try:
  28. assert tinygrad_output.shape == torch_output.shape, f"shape mismatch: tinygrad={tinygrad_output.shape} | torch={torch_output.shape}"
  29. assert tinygrad_output.dtype == torch_output.dtype, f"dtype mismatch: tinygrad={tinygrad_output.dtype} | torch={torch_output.dtype}"
  30. np.testing.assert_allclose(tinygrad_output, torch_output, atol=atol, rtol=rtol)
  31. except Exception as e:
  32. raise Exception(f"{s} failed shape {tinygrad_output.shape}: {e}")
  33. if DEBUG >= 6:
  34. np.set_printoptions(linewidth=200, suppress=True)
  35. print(ret.numpy())
  36. print(out.detach().numpy())
  37. compare("forward pass", ret.numpy(), out.detach().numpy(), atol=atol, rtol=rtol)
  38. torch_fbp, tinygrad_fbp = np.nan, np.nan
  39. if not forward_only and not FORWARD_ONLY:
  40. st = time.monotonic()
  41. (out+1).square().mean().backward()
  42. torch_fbp = time.monotonic() - st
  43. st = time.monotonic()
  44. (ret+1).square().mean().backward()
  45. for tt in tst: tt.grad.realize()
  46. tinygrad_fbp = time.monotonic() - st
  47. for i, (t, tt) in enumerate(zip(ts, tst)):
  48. compare(f"backward pass tensor {i}", tt.grad.numpy(), t.grad.detach().numpy(), atol=grad_atol, rtol=grad_rtol)
  49. if not CI:
  50. print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % \
  51. (shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="")
  52. def prepare_test_op(low, high, shps, vals, forward_only=False):
  53. if shps is None:
  54. ts = [torch.tensor(x, requires_grad=(not forward_only)) for x in vals]
  55. else:
  56. np.random.seed(0)
  57. np_data = [np.random.uniform(low=low, high=high, size=size).astype(_to_np_dtype(dtypes.default_float)) for size in shps]
  58. ts = [torch.tensor(data, requires_grad=(not forward_only)) for data in np_data]
  59. tst = [Tensor(x.detach().numpy(), requires_grad=(not forward_only and not FORWARD_ONLY)) for x in ts]
  60. return ts, tst
  61. class TestOps(unittest.TestCase):
  62. def helper_test_exception(self, shps, torch_fxn, tinygrad_fxn, expected, exact=False, vals=None, low=-1.5, high=1.5):
  63. if getenv("CUDACPU") or (getenv("MOCKGPU") and Device.DEFAULT == "NV"): self.skipTest('helper_test_exception fails in CUDACPU')
  64. ts, tst = prepare_test_op(low, high, shps, vals)
  65. with self.assertRaises(expected) as torch_cm:
  66. torch_fxn(*ts)
  67. with self.assertRaises(expected) as tinygrad_cm:
  68. tinygrad_fxn(*tst)
  69. if exact: self.assertEqual(str(torch_cm.exception), str(tinygrad_cm.exception))
  70. if not CI: print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="")
  71. def test_full_like(self):
  72. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32)
  73. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32)
  74. helper_test_op([], lambda: torch.full_like(b, 4), lambda: Tensor.full_like(a, 4), forward_only=True)
  75. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.int32)
  76. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.int32)
  77. helper_test_op([], lambda: torch.full_like(b, 4), lambda: Tensor.full_like(a, 4), forward_only=True)
  78. def test_full(self):
  79. helper_test_op([], lambda: torch.full((45,65), 4, dtype=torch.int32), lambda: Tensor.full((45,65), 4), forward_only=True)
  80. def test_zeros(self):
  81. helper_test_op([], lambda: torch.zeros(45,65), lambda: Tensor.zeros(45,65), forward_only=True)
  82. helper_test_op([], lambda: torch.zeros([45,65]), lambda: Tensor.zeros([45,65]), forward_only=True)
  83. helper_test_op([], lambda: torch.zeros([]), lambda: Tensor.zeros([]), forward_only=True)
  84. def test_zeros_like(self):
  85. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32)
  86. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32)
  87. helper_test_op([], lambda: torch.zeros_like(b), lambda: Tensor.zeros_like(a), forward_only=True)
  88. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.int32)
  89. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.int32)
  90. helper_test_op([], lambda: torch.zeros_like(b), lambda: Tensor.zeros_like(a), forward_only=True)
  91. def test_empty_0(self):
  92. helper_test_op([], lambda: torch.empty(45,65)*0/0, lambda: Tensor.empty(45,65)*0/0, forward_only=True)
  93. def test_ones(self):
  94. helper_test_op([], lambda: torch.ones(45,65), lambda: Tensor.ones(45,65), forward_only=True)
  95. helper_test_op([], lambda: torch.ones([45,65]), lambda: Tensor.ones([45,65]), forward_only=True)
  96. helper_test_op([], lambda: torch.ones([]), lambda: Tensor.ones([]), forward_only=True)
  97. def test_ones_like(self):
  98. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32)
  99. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32)
  100. helper_test_op([], lambda: torch.ones_like(b), lambda: Tensor.ones_like(a), forward_only=True)
  101. a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.int32)
  102. b = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.int32)
  103. helper_test_op([], lambda: torch.ones_like(b), lambda: Tensor.ones_like(a), forward_only=True)
  104. def test_eye(self):
  105. helper_test_op([], lambda: torch.eye(10), lambda: Tensor.eye(10), forward_only=True)
  106. helper_test_op([], lambda: torch.eye(3, 5), lambda: Tensor.eye(3, 5), forward_only=True)
  107. helper_test_op([], lambda: torch.eye(5, 3), lambda: Tensor.eye(5, 3), forward_only=True)
  108. helper_test_op([], lambda: torch.eye(1), lambda: Tensor.eye(1), forward_only=True)
  109. helper_test_op([], lambda: torch.eye(0), lambda: Tensor.eye(0), forward_only=True)
  110. def test_split(self):
  111. def tensor(s): return torch.arange(math.prod(s), dtype=torch.int32).reshape(s), Tensor.arange(math.prod(s)).reshape(s)
  112. test_cases = [
  113. (tensor((10,)), 5, {}),
  114. (tensor((10,)), [1,4,5], {}),
  115. (tensor((10,)), 3, {}),
  116. (tensor((3,4,)), 1, {}),
  117. (tensor((3,4,)), 1, {'dim':1}),
  118. (tensor((4,4,)), [2,2], {}),
  119. (tensor((4,4,)), [2,2], {'dim':1}),
  120. (tensor((10000,)), 2500, {}),
  121. ]
  122. for (tor, ten), sizes, args in test_cases:
  123. tor_splits, ten_splits = tor.split(sizes, **args), ten.split(sizes, **args)
  124. assert len(tor_splits) == len(ten_splits)
  125. for tor_chunk, ten_chunk in zip(tor_splits, ten_splits):
  126. helper_test_op([], lambda: tor_chunk, lambda: ten_chunk, forward_only=True)
  127. def test_chunk(self):
  128. tor = torch.arange(13, dtype=torch.int32).repeat(8, 1).chunk(6, 1)
  129. ten = Tensor.arange(13).repeat((8, 1)).chunk(6, 1)
  130. assert len(tor) == len(ten)
  131. for i in range(len(tor)):
  132. helper_test_op([], lambda: tor[i], lambda: ten[i], forward_only=True)
  133. tor = torch.arange(13, dtype=torch.int32).repeat(8, 1).chunk(6, 0)
  134. ten = Tensor.arange(13).repeat((8, 1)).chunk(6, 0)
  135. assert len(tor) == len(ten)
  136. for i in range(len(tor)):
  137. helper_test_op([], lambda: tor[i], lambda: ten[i], forward_only=True)
  138. tor = torch.arange(13, dtype=torch.int32).repeat(8, 1).chunk(3, -1)
  139. ten = Tensor.arange(13).repeat((8, 1)).chunk(3, -1)
  140. assert len(tor) == len(ten)
  141. for i in range(len(tor)):
  142. helper_test_op([], lambda: tor[i], lambda: ten[i], forward_only=True)
  143. tor = torch.arange(13, dtype=torch.int32).repeat(8, 3, 3).chunk(3, -2)
  144. ten = Tensor.arange(13).repeat((8, 3, 3)).chunk(3, -2)
  145. assert len(tor) == len(ten)
  146. for i in range(len(tor)):
  147. helper_test_op([], lambda: tor[i], lambda: ten[i], forward_only=True)
  148. def test_arange(self):
  149. helper_test_op([], lambda: torch.arange(10, dtype=torch.int32), lambda: Tensor.arange(10), forward_only=True)
  150. helper_test_op([], lambda: torch.arange(36, dtype=torch.int32), lambda: Tensor.arange(36), forward_only=True)
  151. helper_test_op([], lambda: torch.arange(5, 10, 3, dtype=torch.int32), lambda: Tensor.arange(5, 10, 3), forward_only=True)
  152. helper_test_op([], lambda: torch.arange(10, 5, -3, dtype=torch.int32), lambda: Tensor.arange(10, 5, -3), forward_only=True)
  153. helper_test_op([], lambda: torch.arange(11, 5, -3, dtype=torch.int32), lambda: Tensor.arange(11, 5, -3), forward_only=True)
  154. helper_test_op([], lambda: torch.arange(1, 78, 2, dtype=torch.int32), lambda: Tensor.arange(1, 78, 2), forward_only=True)
  155. helper_test_op([], lambda: torch.arange(5.5, 175.5, 2.5), lambda: Tensor.arange(5.5, 175.5, 2.5), forward_only=True)
  156. helper_test_op([], lambda: torch.arange(-30.2, -0.3, 0.75), lambda: Tensor.arange(-30.2, -0.3, 0.75), forward_only=True)
  157. helper_test_op([], lambda: torch.arange(-50.3, -380.2, -2.25), lambda: Tensor.arange(-50.3, -380.2, -2.25), forward_only=True)
  158. def test_arange_big(self):
  159. helper_test_op([], lambda: torch.arange(256, dtype=torch.int32), lambda: Tensor.arange(256), forward_only=True)
  160. def test_sum_fake(self):
  161. helper_test_op([(256, 1)], lambda x: x.sum(axis=1))
  162. def test_sum_collapse(self):
  163. helper_test_op([], lambda: torch.ones(256,256).sum(axis=1), lambda: Tensor.ones(256,256).sum(axis=1), forward_only=True)
  164. def test_sum_collapse_neg(self):
  165. helper_test_op([], lambda: (-torch.ones(3,3)).sum(axis=1), lambda: (-Tensor.ones(3,3)).sum(axis=1), forward_only=True)
  166. def test_sum_pad_collapse(self):
  167. helper_test_op([], lambda: torch.nn.functional.pad(torch.ones(256,256), pad=(0,64,0,0)).sum(axis=1),
  168. lambda: Tensor.ones(256,256).pad(((0,0), (0,64))).sum(axis=1), forward_only=True)
  169. # this is more complex and won't fold for a while
  170. def test_sum_cat_collapse(self):
  171. helper_test_op([], lambda: torch.cat([torch.ones(256,256), torch.zeros(256,64)], dim=1).sum(axis=1),
  172. lambda: Tensor.cat(Tensor.ones(256,256), Tensor.zeros(256,64), dim=1).sum(axis=1), forward_only=True)
  173. def test_max_dont_collapse(self):
  174. helper_test_op([], lambda: torch.ones(256,256).max(1)[0], lambda: Tensor.ones(256,256).max(1), forward_only=True)
  175. def test_where(self):
  176. helper_test_op(
  177. [(100,)],
  178. lambda x: torch.where(x > 0.5, 4, 2).type(torch.int32),
  179. lambda x: (x > 0.5).where(4, 2), forward_only=True)
  180. for shps in [[(8,),(1,),(1,)], [(10,10),(10,),(10,)], [(100,)]*3, [(10,10)]*3]:
  181. helper_test_op(
  182. shps,
  183. lambda x, a, b: torch.where(x > 0.5, a, b),
  184. lambda x, a, b: (x > 0.5).where(a, b), forward_only=True)
  185. def test_where_permute(self):
  186. helper_test_op(
  187. [(5, 5)],
  188. lambda x: torch.where(x > 0.5, 4, 2).type(torch.int32).permute((1, 0)),
  189. lambda x: (x > 0.5).where(4, 2).permute((1, 0)), forward_only=True)
  190. def _test_cmp(self, fxn, reverse=True):
  191. # test different dtypes
  192. helper_test_op(None, fxn, fxn, forward_only=True, vals=[[0.,1,2], [2.,1,0]])
  193. helper_test_op(None, fxn, fxn, forward_only=True, vals=[[0,1,2], [2,1,0]])
  194. helper_test_op(None, fxn, fxn, forward_only=True, vals=[[True, True, False], [False,True,False]])
  195. # test broadcasting
  196. for shps in [[(3, 4, 5), (3, 4, 5)], [(3, 4, 5), (5,)], [(5,), (3, 4, 5)]]:
  197. helper_test_op(shps, fxn, fxn, forward_only=True)
  198. # test cmp with const
  199. helper_test_op(None, lambda x,y: fxn(x,2), lambda x,y: fxn(x,2), forward_only=True, vals=[[0.,1,2], [2.,1,0]])
  200. if reverse: helper_test_op(None, lambda x,y: fxn(2,y), lambda x,y: fxn(2,y), forward_only=True, vals=[[0.,1,2], [2.,1,0]])
  201. # test special floats # TODO: fix nan
  202. specials = [0.0, 1.0, -1.0, math.inf, -math.inf]#, math.nan]
  203. for s0 in specials:
  204. for s1 in specials:
  205. helper_test_op(None, fxn, fxn, forward_only=True, vals=[[s0], [s1]])
  206. def test_cmp_eq(self): self._test_cmp(lambda x,y: x==y, reverse=False)
  207. def test_cmp_gt(self): self._test_cmp(lambda x,y: x>y)
  208. def test_cmp_ge(self): self._test_cmp(lambda x,y: x>=y)
  209. def test_cmp_lt(self): self._test_cmp(lambda x,y: x<y)
  210. def test_cmp_le(self): self._test_cmp(lambda x,y: x<=y)
  211. def test_cmp_ne_backwards(self):
  212. t1 = torch.ones(4, requires_grad=True)
  213. t2 = torch.ones(4, requires_grad=True)
  214. self.assertRaises(RuntimeError, (t1 != t2).sum().backward)
  215. tt1 = Tensor.ones(4, requires_grad=True)
  216. tt2 = Tensor.ones(4, requires_grad=True)
  217. self.assertRaises(RuntimeError, (tt1 != tt2).sum().backward)
  218. tt = Tensor.randn(4, requires_grad=True)
  219. (tt*(tt != 0)).sum().backward()
  220. t = torch.tensor(tt.numpy(), requires_grad=True)
  221. (t*(t != 0)).sum().backward()
  222. np.testing.assert_allclose(t.grad.numpy(), tt.grad.numpy(), rtol=1e-5)
  223. def test_cmp_lt_backwards(self):
  224. t1 = torch.ones(4, requires_grad=True)
  225. t2 = torch.ones(4, requires_grad=True)
  226. self.assertRaises(RuntimeError, (t1 < t2).sum().backward)
  227. tt1 = Tensor.ones(4, requires_grad=True)
  228. tt2 = Tensor.ones(4, requires_grad=True)
  229. self.assertRaises(RuntimeError, (tt1 < tt2).sum().backward)
  230. tt = Tensor.randn(4, requires_grad=True)
  231. (tt*(tt < 0)).sum().backward()
  232. t = torch.tensor(tt.numpy(), requires_grad=True)
  233. (t*(t < 0)).sum().backward()
  234. np.testing.assert_allclose(t.grad.numpy(), tt.grad.numpy(), rtol=1e-5)
  235. # TODO: fix backward of these functions
  236. def test_trunc(self):
  237. helper_test_op([()], lambda x: x.trunc(), forward_only=True)
  238. helper_test_op([(45,35)], lambda x: x.trunc(), forward_only=True)
  239. helper_test_op(None, lambda x: x.trunc(), vals=[[1.499, 1.5, 1.501, 1.0, 2.1, 0.0, -5.0, -2.499, -2.5, -2.501]], forward_only=True)
  240. def test_floor(self):
  241. helper_test_op([()], lambda x: x.floor(), forward_only=True)
  242. helper_test_op([(45,35)], lambda x: x.floor(), forward_only=True)
  243. helper_test_op(None, lambda x: x.floor(), vals=[[1.499, 1.5, 1.501, 1.0, 2.1, 0.0, -5.0, -2.499, -2.5, -2.501]], forward_only=True)
  244. def test_ceil(self):
  245. helper_test_op([()], lambda x: x.ceil(), forward_only=True)
  246. helper_test_op([(45,35)], lambda x: x.ceil(), forward_only=True)
  247. helper_test_op(None, lambda x: x.ceil(), vals=[[1.499, 1.5, 1.501, 1.0, 2.1, 0.0, -5.0, -2.499, -2.5, -2.501]], forward_only=True)
  248. def test_round(self):
  249. helper_test_op([()], lambda x: x.round(), forward_only=True)
  250. helper_test_op([(45,35)], lambda x: x.round(), forward_only=True)
  251. helper_test_op(None, lambda x: x.round(), vals=[[1.499, 1.5, 1.501, 1.0, 2.1, 0.0, -5.0, -2.499, -2.5, -2.501]], forward_only=True)
  252. helper_test_op(None, lambda x: x.round(), vals=[[2.5, -1.5]], forward_only=True)
  253. def test_lerp(self):
  254. helper_test_op([(45,35), (45,35), (45,35)], lambda x,y,z: x.lerp(y,z))
  255. helper_test_op(None, lambda x,y,z: x.lerp(y,z), vals=[[1.,2.,3.], [4.,5.,6.], 0.5])
  256. def test_tril(self):
  257. helper_test_op([(3,3)], lambda x: x.tril())
  258. helper_test_op([(3,3)], lambda x: x.tril(1))
  259. helper_test_op([(3,3)], lambda x: x.tril(2))
  260. helper_test_op([(3,3)], lambda x: x.tril(-1))
  261. helper_test_op([(3,3)], lambda x: x.tril(-2))
  262. helper_test_op([(4,5)], lambda x: x.tril(4))
  263. helper_test_op([(4,5)], lambda x: x.tril(5))
  264. helper_test_op([(4,5)], lambda x: x.tril(6))
  265. helper_test_op([(4,5)], lambda x: x.tril(-4))
  266. helper_test_op([(4,5)], lambda x: x.tril(-5))
  267. helper_test_op([(4,5)], lambda x: x.tril(-6))
  268. helper_test_op([(5,3,3)], lambda x: x.tril())
  269. helper_test_op([(5,0,3)], lambda x: x.tril())
  270. helper_test_op([(5,3,3)], lambda x: x.tril(1))
  271. helper_test_op(None, lambda x: x.tril(), vals=[[[True] * 3] * 3], forward_only=True)
  272. def test_triu(self):
  273. helper_test_op([(3,3)], lambda x: x.triu())
  274. helper_test_op([(3,3)], lambda x: x.triu(1))
  275. helper_test_op([(3,3)], lambda x: x.triu(2))
  276. helper_test_op([(3,3)], lambda x: x.triu(-1))
  277. helper_test_op([(3,3)], lambda x: x.triu(-2))
  278. helper_test_op([(4,5)], lambda x: x.triu(4))
  279. helper_test_op([(4,5)], lambda x: x.triu(5))
  280. helper_test_op([(4,5)], lambda x: x.triu(6))
  281. helper_test_op([(4,5)], lambda x: x.triu(-4))
  282. helper_test_op([(4,5)], lambda x: x.triu(-5))
  283. helper_test_op([(4,5)], lambda x: x.triu(-6))
  284. helper_test_op([(5,3,3)], lambda x: x.triu())
  285. helper_test_op([(5,0,3)], lambda x: x.triu())
  286. helper_test_op([(5,3,3)], lambda x: x.triu(1))
  287. helper_test_op(None, lambda x: x.triu(), vals=[[[True] * 3] * 3], forward_only=True)
  288. def test_maximum(self):
  289. helper_test_op([(45,65), (45,65)], torch.maximum, Tensor.maximum)
  290. helper_test_op([(), ()], torch.maximum, Tensor.maximum)
  291. helper_test_op(None, torch.maximum, Tensor.maximum, vals=[[1., 0., 3., -4.], 3.])
  292. helper_test_op(None, torch.maximum, Tensor.maximum, vals=[[1., 0., 3., -4.], [-1., -2., 3., 0.]])
  293. helper_test_op(None, torch.maximum, Tensor.maximum, vals=[[True, False, False], True], forward_only=True)
  294. helper_test_op(None, torch.maximum, Tensor.maximum, vals=[[True, False, False], [True, True, False]], forward_only=True)
  295. def test_minimum(self):
  296. helper_test_op([(45,65), (45,65)], torch.minimum, Tensor.minimum)
  297. helper_test_op([(), ()], torch.minimum, Tensor.minimum)
  298. helper_test_op(None, torch.minimum, Tensor.minimum, vals=[[1., 0., 3., -4.], 3.])
  299. helper_test_op(None, torch.minimum, Tensor.minimum, vals=[[1., 0., 3., -4.], [-1., -2., 3., 0.]])
  300. helper_test_op(None, torch.minimum, Tensor.minimum, vals=[[True, False, False], True], forward_only=True)
  301. helper_test_op(None, torch.minimum, Tensor.minimum, vals=[[True, False, False], [True, True, False]], forward_only=True)
  302. def test_tiny_add(self):
  303. helper_test_op([(3), (3)], lambda x,y: x+y, Tensor.add, forward_only=True)
  304. def test_tiny_mul(self):
  305. helper_test_op([(64), (64)], lambda x,y: x*y, Tensor.mul, forward_only=True)
  306. def test_add(self):
  307. helper_test_op([(45,68), (45,68)], lambda x,y: x+y, Tensor.add)
  308. helper_test_op([(45,68), (45,68)], lambda x,y: x+y)
  309. helper_test_op([(), ()], lambda x,y: x+y)
  310. def test_add3(self):
  311. helper_test_op([(45,65), (45,65), (45,65)], lambda x,y,z: x+y+z)
  312. def test_broadcasted_add(self):
  313. helper_test_op([(45,65), (45,1)], lambda x,y: x+y)
  314. helper_test_op([(45,65), ()], lambda x,y: x+y)
  315. def test_broadcasted_add_2(self):
  316. helper_test_op([(45,65), (65,)], lambda x,y: x+y)
  317. def test_sub(self):
  318. helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub)
  319. helper_test_op([(45,65), (45,65)], lambda x,y: x-y)
  320. helper_test_op([(), ()], lambda x,y: x-y)
  321. def test_scalar_sub(self):
  322. helper_test_op([(45,65)], lambda x: x-2)
  323. helper_test_op([()], lambda x: x-2)
  324. def test_scalar_rsub(self):
  325. helper_test_op([(45,65)], lambda x: 2-x)
  326. helper_test_op([()], lambda x: 2-x)
  327. def test_neg(self):
  328. helper_test_op([(45,65)], lambda x: -x)
  329. helper_test_op([(45,65)], lambda x: x.neg())
  330. helper_test_op([()], lambda x: x.neg())
  331. def test_logical_not(self):
  332. helper_test_op(None, torch.logical_not, Tensor.logical_not, vals=[[True, False, True]], forward_only=True)
  333. helper_test_op(None, torch.logical_not, Tensor.logical_not, vals=[[1.,2.,0.,0.5]], forward_only=True)
  334. def test_mul(self):
  335. helper_test_op([(64,64), (64,64)], lambda x,y: x*y, Tensor.mul)
  336. helper_test_op([(64,64), (64,64)], lambda x,y: x*y)
  337. helper_test_op([(), ()], lambda x,y: x*y)
  338. def test_scalar_mul(self):
  339. helper_test_op([(45,65)], lambda x: x*2)
  340. helper_test_op([(45,65)], lambda x: x*-1)
  341. helper_test_op([(45,65)], lambda x: 255*x)
  342. helper_test_op([(45,65)], lambda x: 2*x)
  343. helper_test_op([()], lambda x: x*2)
  344. helper_test_op([()], lambda x: 2*x)
  345. def test_div(self):
  346. helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div)
  347. helper_test_op([(45,65), (45,65)], lambda x,y: x/y)
  348. helper_test_op([(), ()], lambda x,y: x/y)
  349. def test_div_int(self):
  350. helper_test_op(None, lambda x,y: x/y, Tensor.div, forward_only=True, vals=np.array([[5, 6, 7],[1, 2, 3]], dtype=np.int32))
  351. helper_test_op(None, lambda x: x/2, lambda x: x/2, forward_only=True, vals=np.array([[3, 4, 5]], dtype=np.int32))
  352. def test_scalar_div(self):
  353. helper_test_op([(45,65)], lambda x: x/255)
  354. helper_test_op([(45,65)], lambda x: x/1)
  355. helper_test_op([(45,65)], lambda x: 1/x)
  356. helper_test_op([(45,65)], lambda x: x/2)
  357. helper_test_op([(45,65)], lambda x: 2/x)
  358. helper_test_op([()], lambda x: x/2)
  359. helper_test_op([()], lambda x: 2/x)
  360. def test_mul_naninf(self):
  361. helper_test_op([(45,65)], lambda x: x*math.inf)
  362. helper_test_op([(45,65)], lambda x: x*-math.inf)
  363. helper_test_op([(45,65)], lambda x: x*math.nan)
  364. def test_div_naninf(self):
  365. helper_test_op([(45,65)], lambda x: x/math.inf)
  366. helper_test_op([(45,65)], lambda x: x/-math.inf)
  367. helper_test_op([(45,65)], lambda x: x/math.nan)
  368. helper_test_op([(45,65)], lambda x: math.inf/x)
  369. helper_test_op([(45,65)], lambda x: (-math.inf)/x)
  370. helper_test_op([(45,65)], lambda x: math.nan/x)
  371. def test_pow_full(self):
  372. helper_test_op([(45,65), (45,65)], lambda x,y: x**y)
  373. helper_test_op([(45,65), (45,65)], lambda x,y: x.pow(y))
  374. def test_pow(self):
  375. helper_test_op([(45,65)], lambda x: x**0)
  376. helper_test_op([(45,65)], lambda x: x**1)
  377. helper_test_op([(45,65)], lambda x: x**2)
  378. helper_test_op([(45,65)], lambda x: x**3)
  379. helper_test_op([(45,65)], lambda x: x**-2)
  380. helper_test_op([()], lambda x: x**2)
  381. helper_test_op([()], lambda x: x**-2)
  382. # Regression tests for https://github.com/tinygrad/tinygrad/issues/1151
  383. helper_test_op([(45,65)], lambda x: x**3, low=-30, high=-27)
  384. helper_test_op([()], lambda x: x**3, low=-30, high=-27)
  385. # Regression tests for https://github.com/tinygrad/tinygrad/issues/1251
  386. helper_test_op([(45,65)], lambda x: x**0.2, low=-30, high=-27)
  387. helper_test_op([(45,65)], lambda x: x**1.2, low=-30, high=-27)
  388. helper_test_op([()], lambda x: x**0.2, low=-30, high=-27)
  389. helper_test_op([()], lambda x: x**1.2, low=-30, high=-27)
  390. a, b = Tensor([0.0], requires_grad=True), torch.tensor([0.0], requires_grad=True)
  391. helper_test_op([], lambda: b**1.1, lambda: a**1.1)
  392. def test_pow_const(self):
  393. helper_test_op([(45,65)], lambda x: x**1.0)
  394. helper_test_op([(45,65)], lambda x: x**-1.0)
  395. helper_test_op([(45,65)], lambda x: 1.0**x)
  396. helper_test_op([(45,65)], lambda x: x**2.0)
  397. helper_test_op([(45,65)], lambda x: 2.0**x)
  398. helper_test_op([()], lambda x: x**2.0)
  399. helper_test_op([()], lambda x: 2.0**x)
  400. # TODO: fix backward
  401. helper_test_op(None, lambda x: 0**x, vals=[[-2.,-1,0,1,2,3]], forward_only=True)
  402. # TODO: fix backward, should be nan
  403. helper_test_op(None, lambda x: (-2)**x, vals=[[-2.,-1,0,1,2,3]], forward_only=True)
  404. def test_sqrt(self):
  405. helper_test_op([(45,65)], lambda x: x.sqrt())
  406. helper_test_op([()], lambda x: x.sqrt())
  407. def test_rsqrt(self):
  408. helper_test_op([(45,65)], lambda x: x.rsqrt())
  409. helper_test_op([()], lambda x: x.rsqrt())
  410. def test_xor(self):
  411. tor = torch.tensor([[1,-8,1],[32,1,6]], dtype=torch.int)
  412. ten = Tensor([[1,-8,1],[32,1,6]], dtype=dtypes.int32)
  413. helper_test_op([], lambda: tor^tor, lambda: ten^ten, forward_only=True)
  414. helper_test_op([], lambda: tor^0x1337, lambda: ten^0x1337, forward_only=True)
  415. helper_test_op([], lambda: 0x1337^tor, lambda: 0x1337^ten, forward_only=True)
  416. def test_and(self):
  417. tor = torch.tensor([[1,-8,1],[32,1,6]], dtype=torch.int)
  418. ten = Tensor([[1,-8,1],[32,1,6]], dtype=dtypes.int32)
  419. helper_test_op([], lambda: tor&tor, lambda: ten&ten, forward_only=True)
  420. helper_test_op([], lambda: tor&0x1337, lambda: ten&0x1337, forward_only=True)
  421. helper_test_op([], lambda: 0x1337&tor, lambda: 0x1337&ten, forward_only=True)
  422. def test_or(self):
  423. tor = torch.tensor([[1,-8,1],[32,1,6]], dtype=torch.int)
  424. ten = Tensor([[1,-8,1],[32,1,6]], dtype=dtypes.int32)
  425. helper_test_op([], lambda: tor|tor, lambda: ten|ten, forward_only=True)
  426. helper_test_op([], lambda: tor|0x1337, lambda: ten|0x1337, forward_only=True)
  427. helper_test_op([], lambda: 0x1337|tor, lambda: 0x1337|ten, forward_only=True)
  428. def test_lshift(self):
  429. data = [[0,1,2],[1<<8,1<<16,1<<31-1]]
  430. tor = torch.tensor(data, dtype=torch.int)
  431. ten = Tensor(data, dtype=dtypes.uint32)
  432. # cast to int32 because torch does not support uint32
  433. helper_test_op([], lambda: tor << 0, lambda: (ten << 0).cast(dtypes.int32), forward_only=True)
  434. helper_test_op([], lambda: tor << 2, lambda: (ten << 2).cast(dtypes.int32), forward_only=True)
  435. helper_test_op([], lambda: tor << 31, lambda: (ten << 31).cast(dtypes.int32), forward_only=True)
  436. helper_test_op([], lambda: tor.__lshift__(2), lambda: ten.__lshift__(2).cast(dtypes.int32), forward_only=True)
  437. helper_test_op([], lambda: tor.bitwise_left_shift(2), lambda: ten.lshift(2).cast(dtypes.int32), forward_only=True)
  438. def test_rshift(self):
  439. data = [[0,1,2],[1<<8,1<<16,1<<31-1]]
  440. tor = torch.tensor(data, dtype=torch.int)
  441. ten = Tensor(data, dtype=dtypes.uint32)
  442. # cast to int32 because torch does not support uint32
  443. helper_test_op([], lambda: tor >> 0, lambda: (ten >> 0).cast(dtypes.int32), forward_only=True)
  444. helper_test_op([], lambda: tor >> 2, lambda: (ten >> 2).cast(dtypes.int32), forward_only=True)
  445. helper_test_op([], lambda: tor >> 31, lambda: (ten >> 31).cast(dtypes.int32), forward_only=True)
  446. helper_test_op([], lambda: tor.__rshift__(2), lambda: ten.__rshift__(2).cast(dtypes.int32), forward_only=True)
  447. helper_test_op([], lambda: tor.bitwise_right_shift(2), lambda: ten.rshift(2).cast(dtypes.int32), forward_only=True)
  448. def test_sin(self):
  449. helper_test_op([(45,65)], lambda x: x.sin())
  450. helper_test_op([()], lambda x: x.sin())
  451. # works on real CUDA but not CUDACPU
  452. if not (getenv("CUDACPU") or (getenv("MOCKGPU") and Device.DEFAULT == "NV")):
  453. helper_test_op(None, lambda x: x.sin(), vals=[[math.nan, math.inf, -math.inf]])
  454. helper_test_op(None, lambda x: x.sin(), vals=[[1e1, 1e2, 1e3, 1e4, 1e5, 1e6, -1e1, -1e2, -1e3, -1e4, -1e5, -1e6]],
  455. atol=3e-3, rtol=3e-3, grad_atol=3e-3, grad_rtol=3e-3)
  456. def test_cos(self):
  457. helper_test_op([(45,65)], lambda x: x.cos())
  458. helper_test_op([()], lambda x: x.cos())
  459. if not (getenv("CUDACPU") or (getenv("MOCKGPU") and Device.DEFAULT == "NV")):
  460. helper_test_op(None, lambda x: x.cos(), vals=[[1e1, 1e2, 1e3, 1e4, 1e5, 1e6, -1e1, -1e2, -1e3, -1e4, -1e5, -1e6]],
  461. atol=3e-3, rtol=3e-3, grad_atol=3e-3, grad_rtol=3e-3)
  462. def test_tan(self):
  463. # NOTE: backward has much higher diff with input close to pi/2 and -pi/2
  464. helper_test_op([(45,65)], lambda x: x.tan(), low=-1.5, high=1.5)
  465. helper_test_op([(45,65)], lambda x: x.tan(), low=-5, high=5, forward_only=True)
  466. helper_test_op([()], lambda x: x.tan())
  467. if not (getenv("CUDACPU") or (getenv("MOCKGPU") and Device.DEFAULT == "NV")):
  468. helper_test_op(None, lambda x: x.cos(), vals=[[1e1, 1e2, 1e3, 1e4, 1e5, 1e6, -1e1, -1e2, -1e3, -1e4, -1e5, -1e6]],
  469. atol=3e-3, rtol=3e-3, grad_atol=3e-3, grad_rtol=3e-3)
  470. def test_relu(self):
  471. helper_test_op([(64,64)], lambda x: x.relu())
  472. helper_test_op([()], lambda x: x.relu())
  473. def test_relu_exact(self):
  474. helper_test_op(None, lambda x: x.relu(), vals=[[-1.,0,1]])
  475. def test_relu_maximum_exact(self):
  476. helper_test_op(None, lambda x: torch.maximum(x, torch.zeros_like(x, requires_grad=False)), lambda x: Tensor.maximum(x, 0), vals=[[-1.,0,1]])
  477. def test_leakyrelu(self):
  478. helper_test_op([(45,65)], lambda x: torch.nn.functional.leaky_relu(x,0.01), Tensor.leakyrelu)
  479. helper_test_op([()], lambda x: torch.nn.functional.leaky_relu(x,0.01), Tensor.leakyrelu)
  480. def test_celu(self):
  481. for val in range(1, 5):
  482. helper_test_op([(45,65)], lambda x: torch.nn.functional.celu(x,val), lambda x: x.celu(val))
  483. helper_test_op([()], lambda x: torch.nn.functional.celu(x,val), lambda x: x.celu(val))
  484. def test_abs(self):
  485. helper_test_op([(45,65)], torch.abs, Tensor.abs)
  486. helper_test_op([()], torch.abs, Tensor.abs)
  487. def test_abs_exact(self):
  488. helper_test_op(None, torch.abs, Tensor.abs, vals=[[-1.,0,1]])
  489. def test_log(self):
  490. helper_test_op([(45,65)], torch.log, Tensor.log)
  491. helper_test_op([()], torch.log, Tensor.log)
  492. def test_log2(self):
  493. helper_test_op([(45,65)], torch.log2, Tensor.log2)
  494. helper_test_op([()], torch.log2, Tensor.log2)
  495. def test_exp(self):
  496. helper_test_op([(45,65)], torch.exp, Tensor.exp)
  497. helper_test_op([()], torch.exp, Tensor.exp)
  498. def test_exp2(self):
  499. helper_test_op([(45,65)], torch.exp2, Tensor.exp2)
  500. helper_test_op([()], torch.exp2, Tensor.exp2)
  501. def test_sign(self):
  502. helper_test_op([(45,65)], torch.sign, Tensor.sign)
  503. helper_test_op([()], torch.sign, Tensor.sign)
  504. def test_sign_exact(self):
  505. helper_test_op(None, torch.sign, Tensor.sign, vals=[[-1.,0,1]])
  506. def test_softsign(self):
  507. helper_test_op([(45,65)], torch.nn.functional.softsign, Tensor.softsign)
  508. helper_test_op([()], torch.nn.functional.softsign, Tensor.softsign)
  509. def test_softsign_exact(self):
  510. helper_test_op(None, torch.nn.functional.softsign, Tensor.softsign, vals=[[-1.,0,1]])
  511. def test_sigmoid(self):
  512. helper_test_op([(45,65)], torch.sigmoid, Tensor.sigmoid)
  513. helper_test_op([(45,65)], torch.sigmoid, Tensor.sigmoid, low=300, high=400)
  514. helper_test_op([(45,65)], torch.sigmoid, Tensor.sigmoid, low=-400, high=-300)
  515. helper_test_op([()], torch.sigmoid, Tensor.sigmoid)
  516. def test_softplus(self):
  517. helper_test_op([(45,65)], torch.nn.functional.softplus, Tensor.softplus, grad_atol=1e-6)
  518. helper_test_op([()], torch.nn.functional.softplus, Tensor.softplus, grad_atol=1e-6)
  519. def test_gelu(self):
  520. helper_test_op([(45,65)], lambda x: torch.nn.functional.gelu(x, approximate="tanh"), Tensor.gelu)
  521. helper_test_op([(45,65)], lambda x: torch.nn.functional.gelu(x, approximate="tanh"), Tensor.gelu, low=300, high=400)
  522. helper_test_op([(45,65)], lambda x: torch.nn.functional.gelu(x, approximate="tanh"), Tensor.gelu, low=-400, high=-300)
  523. def test_quick_gelu(self):
  524. helper_test_op([(45,65)], lambda x: x * torch.sigmoid(1.702 * x), Tensor.quick_gelu)
  525. helper_test_op([(45,65)], lambda x: x * torch.sigmoid(1.702 * x), Tensor.quick_gelu, low=300, high=400)
  526. helper_test_op([(45,65)], lambda x: x * torch.sigmoid(1.702 * x), Tensor.quick_gelu, low=-400, high=-300)
  527. helper_test_op([()], lambda x: x * torch.sigmoid(1.702 * x), Tensor.quick_gelu)
  528. def test_elu(self):
  529. helper_test_op([(45,65)], torch.nn.functional.elu, Tensor.elu)
  530. helper_test_op([(45,65)], lambda x: torch.nn.functional.elu(x, alpha=0.1), lambda x: Tensor.elu(x, alpha=0.1))
  531. helper_test_op([()], torch.nn.functional.elu, Tensor.elu)
  532. def test_relu6(self):
  533. helper_test_op([(45,65)], torch.nn.functional.relu6, Tensor.relu6)
  534. helper_test_op([()], torch.nn.functional.relu6, Tensor.relu6)
  535. def test_hardswish(self):
  536. helper_test_op([(45,65)], torch.nn.functional.hardswish, Tensor.hardswish, grad_atol=1e-6)
  537. helper_test_op([()], torch.nn.functional.hardswish, Tensor.hardswish, grad_atol=1e-6)
  538. def test_mish(self):
  539. helper_test_op([(45,65)], torch.nn.functional.mish, Tensor.mish)
  540. helper_test_op([()], torch.nn.functional.mish, Tensor.mish)
  541. def test_multinomial(self):
  542. # NOTE: this is random, so it has a very large atol
  543. helper_test_op([(1000,)], lambda x: torch.multinomial(x.clip(0,1), num_samples=1).type(torch.int32),
  544. lambda x: Tensor.multinomial(x.clip(0,1)), forward_only=True, atol=1000.)
  545. def test_small_cumsum(self):
  546. helper_test_op([(10)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  547. def test_simple_cumsum(self):
  548. helper_test_op([(512)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  549. helper_test_op([(1022)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  550. def test_cumsum(self):
  551. helper_test_op([()], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  552. self.helper_test_exception([()], lambda x: torch.cumsum(x, dim=1), lambda x: Tensor.cumsum(x, axis=1), expected=IndexError)
  553. helper_test_op([(20,)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  554. self.helper_test_exception([(20,)], lambda x: torch.cumsum(x, dim=1), lambda x: Tensor.cumsum(x, axis=1), expected=IndexError)
  555. self.helper_test_exception([(20,)], lambda x: torch.cumsum(x, dim=-2), lambda x: Tensor.cumsum(x, axis=-2), expected=IndexError)
  556. helper_test_op([(20,30)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  557. helper_test_op([(20,30)], lambda x: torch.cumsum(x, dim=1), lambda x: Tensor.cumsum(x, axis=1))
  558. helper_test_op([(20,30,40)], lambda x: torch.cumsum(x, dim=2), lambda x: Tensor.cumsum(x, axis=2))
  559. helper_test_op([(20,30,40)], lambda x: torch.cumsum(x, dim=-1), lambda x: Tensor.cumsum(x, axis=-1))
  560. def test_cumsum_zero_axis(self):
  561. helper_test_op([(2,0,4)], lambda x: torch.cumsum(x, dim=1), lambda x: Tensor.cumsum(x, axis=1))
  562. helper_test_op([(0,3)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
  563. helper_test_op([(2,3,0)], lambda x: torch.cumsum(x, dim=2), lambda x: Tensor.cumsum(x, axis=2))
  564. def test_argmax(self):
  565. # check if it returns the first index for multiple occurences
  566. self.assertEqual(torch.tensor([2,2]).argmax().numpy(), Tensor([2,2]).argmax().numpy())
  567. np.testing.assert_equal(Tensor([2,2]).argmax().numpy(), np.array(0))
  568. np.testing.assert_equal(Tensor([1,2,2]).argmax().numpy(), np.array(1))
  569. helper_test_op([(10,20)], lambda x: x.argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True)
  570. helper_test_op([(10,20)], lambda x: x.argmax(0, False).type(torch.int32), lambda x: x.argmax(0, False), forward_only=True)
  571. helper_test_op([(10,20)], lambda x: x.argmax(1, False).type(torch.int32), lambda x: x.argmax(1, False), forward_only=True)
  572. helper_test_op([(10,20)], lambda x: x.argmax(1, True).type(torch.int32), lambda x: x.argmax(1, True), forward_only=True)
  573. def test_argmin(self):
  574. # check if it returns the first index for multiple occurences
  575. self.assertEqual(torch.tensor([2, 2]).argmin().numpy(), Tensor([2, 2]).argmin().numpy())
  576. np.testing.assert_equal(Tensor([2,2]).argmin().numpy(), np.array(0))
  577. np.testing.assert_equal(Tensor([3,2,2]).argmin().numpy(), np.array(1))
  578. helper_test_op([(10,20)], lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True)
  579. helper_test_op([(10,20)], lambda x: x.argmin(0, False).type(torch.int32), lambda x: x.argmin(0, False), forward_only=True)
  580. helper_test_op([(10,20)], lambda x: x.argmin(1, False).type(torch.int32), lambda x: x.argmin(1, False), forward_only=True)
  581. helper_test_op([(10,20)], lambda x: x.argmin(1, True).type(torch.int32), lambda x: x.argmin(1, True), forward_only=True)
  582. def test_einsum(self):
  583. # matrix transpose
  584. helper_test_op([(150,150)], lambda a: torch.einsum('ij->ji', a), lambda a: Tensor.einsum('ij->ji', a))
  585. helper_test_op([(150,150)], lambda a: torch.einsum('ij -> ji', a), lambda a: Tensor.einsum('ij -> ji', a))
  586. helper_test_op([(150,150)], lambda a: torch.einsum('ji', a), lambda a: Tensor.einsum('ji', a))
  587. helper_test_op([(20,30,40)], lambda a: torch.einsum('jki', a), lambda a: Tensor.einsum('jki', a))
  588. helper_test_op([(20,30,40)], lambda a: torch.einsum('dog', a), lambda a: Tensor.einsum('dog', a))
  589. # no -> and empty rhs
  590. helper_test_op([(20,30),(30,40)], lambda a, b: torch.einsum('ij,jk', a, b), lambda a, b: Tensor.einsum('ij,jk', a, b))
  591. # sum all elements
  592. helper_test_op([(20,30,40)], lambda a: torch.einsum('ijk->', a), lambda a: Tensor.einsum('ijk->', a))
  593. # column sum
  594. helper_test_op([(50,50)], lambda a: torch.einsum('ij->j', a), lambda a: Tensor.einsum('ij->j', a))
  595. # row sum
  596. helper_test_op([(15,15)], lambda a: torch.einsum('ij->i', a), lambda a: Tensor.einsum('ij->i', a))
  597. # matrix-vector multiplication
  598. helper_test_op([(15,20), (20,)], lambda a,b: torch.einsum('ik,k->i', a,b), lambda a,b: Tensor.einsum('ik,k->i', a, b))
  599. # matrix-matrix multiplication
  600. helper_test_op([(15,20), (20,30)], lambda a,b: torch.einsum('ik,kj->ij', a,b), lambda a,b: Tensor.einsum('ik,kj->ij', a, b))
  601. # matrix-matrix multiplication, different letter order
  602. helper_test_op([(15,20), (20,30)], lambda a,b: torch.einsum('jk,ki->ji', a,b), lambda a,b: Tensor.einsum('jk,ki->ji', a, b))
  603. # dot product
  604. helper_test_op([(30),(30)], lambda a,b: torch.einsum('i,i->i', [a,b]), lambda a,b: Tensor.einsum('i,i->i', [a,b]))
  605. # hadamard product
  606. helper_test_op([(30,40),(30,40)], lambda a,b: torch.einsum('ij,ij->ij', a,b), lambda a,b: Tensor.einsum('ij,ij->ij', a,b))
  607. # outer product
  608. helper_test_op([(15,), (15,)], lambda a,b: torch.einsum('i,j->ij', a,b), lambda a,b: Tensor.einsum('i,j->ij',a,b))
  609. # batch matrix multiplication
  610. helper_test_op([(10,20,30),(10,30,40)], lambda a,b: torch.einsum('ijk,ikl->ijl', [a, b]), lambda a,b: Tensor.einsum('ijk,ikl->ijl', [a, b]))
  611. # batch matrix multiplication, result permuted
  612. helper_test_op([(10,20,25),(10,25,32)], lambda a,b: torch.einsum('ijk,ikl->jil', [a, b]), lambda a,b: Tensor.einsum('ijk,ikl->jil', [a, b]))
  613. # batch matrix multiplication, result & input permuted
  614. helper_test_op([(20,10,25),(10,25,32)], lambda a,b: torch.einsum('jik,ikl->jil', [a, b]), lambda a,b: Tensor.einsum('jik,ikl->jil', [a, b]))
  615. # batch matrix multiplication, result with different letters
  616. helper_test_op([(10,20,30),(10,30,40)], lambda a,b: torch.einsum('ijk,ika->ija', [a, b]), lambda a,b: Tensor.einsum('ijk,ika->ija', [a, b]))
  617. # tensor contraction
  618. helper_test_op([(3,5,8,10),(11,13,5,16,8)], lambda a,b: torch.einsum('pqrs,tuqvr->pstuv', a,b),
  619. lambda a,b: Tensor.einsum('pqrs,tuqvr->pstuv', a,b), atol=1e-5)
  620. # tensor contraction, input permuted
  621. helper_test_op([(3,8,10,5),(11,5,13,16,8)], lambda a,b: torch.einsum('prsq,tquvr->pstuv', a,b),
  622. lambda a,b: Tensor.einsum('prsq,tquvr->pstuv', a,b), atol=1e-5)
  623. # tensor contraction, result with different letters
  624. helper_test_op([(3,5,8,10),(11,13,5,16,8)], lambda a,b: torch.einsum('zqrs,tuqvr->zstuv', a,b),
  625. lambda a,b: Tensor.einsum('zqrs,tuqvr->zstuv', a,b), atol=1e-5)
  626. # bilinear transformation
  627. helper_test_op([(2,3),(5,3,7),(2,7)], lambda a,b,c: torch.einsum('ik,jkl,il->ij', [a,b,c]), lambda a,b,c: Tensor.einsum('ik,jkl,il->ij', [a,b,c]))
  628. # test ellipsis # TODO: FIXME
  629. with self.assertRaises(Exception):
  630. helper_test_op([(16,29,256),(16,29,256)], lambda a,b: torch.einsum('...id, ...jd -> ...ij', [a,b]),
  631. lambda a,b: Tensor.einsum('...id, ...jd -> ...ij', [a,b]))
  632. def test_einsum_shape_check(self):
  633. a = Tensor.zeros(3,8,10,5)
  634. b = Tensor.zeros(11,5,13,16,8)
  635. with self.assertRaises(AssertionError):
  636. Tensor.einsum('pqrs,tuqvr->pstuv',a,b)
  637. def test_einsum_arity_check1(self):
  638. a = Tensor.zeros(10,15)
  639. b = Tensor.zeros(15,20)
  640. c = Tensor.zeros(20,10)
  641. with self.assertRaises(AssertionError):
  642. Tensor.einsum('ij,jk->ij', a,b,c)
  643. def test_einsum_arity_check2(self):
  644. a = Tensor.zeros(10,10)
  645. with self.assertRaises(AssertionError):
  646. Tensor.einsum('ij,jk->ij', a)
  647. @unittest.skipIf(IMAGE>0, "no 1d dot for images")
  648. def test_dot_1d(self):
  649. helper_test_op([(65), (65)], lambda x,y: x.matmul(y), Tensor.dot)
  650. helper_test_op([(65), (65,45)], lambda x,y: x.matmul(y), Tensor.dot)
  651. helper_test_op([(45,65), (65)], lambda x,y: x.matmul(y), Tensor.dot)
  652. helper_test_op([(8,45,65), (65)], lambda x,y: x.matmul(y), Tensor.dot)
  653. helper_test_op([(65), (8,65,45)], lambda x,y: x.matmul(y), Tensor.dot)
  654. self.helper_test_exception([(4), (1,2)], lambda x, y: x.matmul(y), Tensor.dot, expected=(RuntimeError, AssertionError))
  655. self.helper_test_exception([(2,1), (4)], lambda x, y: x.matmul(y), Tensor.dot, expected=(RuntimeError, AssertionError))
  656. self.helper_test_exception([(1), (4)], lambda x, y: x.matmul(y), Tensor.dot, expected=(RuntimeError, AssertionError))
  657. def test_dot(self):
  658. helper_test_op([(45,65), (65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5)
  659. helper_test_op([(8,45,65), (8,65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5)
  660. self.helper_test_exception([(2, 4), (1, 3)], lambda x, y: x.matmul(y), Tensor.dot, expected=(RuntimeError, AssertionError))
  661. self.helper_test_exception([(2, 1), (4, 3)], lambda x, y: x.matmul(y), Tensor.dot, expected=(RuntimeError, AssertionError))
  662. with self.assertRaises(AssertionError):
  663. a = Tensor(3.14)
  664. a.matmul(a)
  665. def test_mulacc_with_zero_strides(self):
  666. helper_test_op(
  667. [],
  668. lambda: torch.tensor(1.0).reshape((1,1,1)).expand(2,4,3).mul(torch.tensor(1.0).reshape((1,1,1)).expand(2,4,3)).sum(-1),
  669. lambda: Tensor(1.0).reshape((1,1,1)).expand(2,4,3).mul(Tensor(1.0).reshape((1,1,1)).expand(2,4,3)).sum(-1),
  670. forward_only=True
  671. )
  672. a = [[1.,1.,1.,1.], [1.,1.,1.,1.]]
  673. b = [1.,1.,1.,1.]
  674. helper_test_op(
  675. [],
  676. lambda: torch.tensor(a).reshape((2,4,1)).expand(2,4,3).mul(torch.tensor(b).reshape((1,4,1)).expand(2,4,3)).sum([0,2]),
  677. lambda: Tensor(a).reshape((2,4,1)).expand(2,4,3).mul(Tensor(b).reshape((1,4,1)).expand(2,4,3)).sum([0,2]),
  678. forward_only=True
  679. )
  680. helper_test_op(
  681. [],
  682. lambda: torch.ones((1,2)).matmul(torch.ones((2,3))), lambda: Tensor.ones((1,2)).dot(Tensor.ones((2,3))),
  683. forward_only=True
  684. )
  685. def test_matmul_simple(self):
  686. helper_test_op([(4), (4,4)], lambda x,y: x.matmul(y), Tensor.dot)
  687. def test_matmul(self):
  688. helper_test_op([(64), (64,99)], lambda x,y: x.matmul(y), Tensor.dot)
  689. @unittest.skipIf(IMAGE>0, "no batched matmul on images")
  690. def test_matmul_batched(self):
  691. helper_test_op([(3), (1,3,3,5)], lambda x,y: x.matmul(y), Tensor.dot)
  692. @unittest.skipIf(IMAGE>0, "no batched matmul on images")
  693. def test_matmul_batched_vector(self):
  694. helper_test_op([(4,3), (1,3,3,5)], lambda x,y: x.matmul(y), Tensor.dot)
  695. def test_small_gemm(self):
  696. helper_test_op([(8,8), (8,8)], lambda x,y: x.matmul(y), lambda x,y: x@y)
  697. def test_9_gemm(self):
  698. helper_test_op([(9,9), (9,9)], lambda x,y: x.matmul(y), lambda x,y: x@y)
  699. def test_small_gemm_padded(self):
  700. helper_test_op([(9,9), (9,9)],
  701. lambda x,y: torch.nn.functional.pad(x, (0,7,0,7)).matmul(torch.nn.functional.pad(y, (0,7,0,7))),
  702. lambda x,y: x.pad(((0,7),(0,7)))@y.pad(((0,7),(0,7))))
  703. def test_small_gemm_range(self):
  704. helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.arange(0,64,dtype=np.float32).reshape(8,8),
  705. np.arange(64,128,dtype=np.float32).reshape(8,8)])
  706. def test_small_gemm_eye(self):
  707. helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.eye(8).astype(np.float32), np.eye(8).astype(np.float32)])
  708. def test_gemm(self):
  709. helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y), Tensor.dot)
  710. def test_big_gemm(self):
  711. helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-4)
  712. @unittest.skipIf(IMAGE>0, "no 0 in shape matmul on images")
  713. def test_gemm_with_zeros_shape(self):
  714. helper_test_op([(8,8), (8,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  715. helper_test_op([(0,8), (8,8)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  716. helper_test_op([(0,8), (8,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  717. helper_test_op([(8,0), (0,8)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  718. helper_test_op([(0,0), (0,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  719. helper_test_op([(0), (0,8)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  720. helper_test_op([(0), (0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
  721. def test_broadcastdot(self):
  722. helper_test_op([(10,45,65), (65,45)], lambda x,y: x @ y, Tensor.dot)
  723. with self.assertRaises(AssertionError):
  724. a = Tensor(3.14)
  725. b = Tensor.ones(3,3)
  726. a @ b
  727. def test_multidot(self):
  728. helper_test_op([(10,45,65), (10,65,45)], lambda x,y: x @ y, Tensor.dot)
  729. helper_test_op([(3,3,45,65), (3,3,65,45)], lambda x,y: x @ y, Tensor.dot)
  730. def test_sum_simple(self):
  731. helper_test_op(None, lambda x: x.sum(), vals=[[1.,1.]])
  732. def test_sum_full(self):
  733. helper_test_op([(16384)], lambda x: x.sum())
  734. def test_sum_relu(self):
  735. helper_test_op([(3,4,5)], lambda x: x.relu().sum().relu())
  736. def test_sum_tiny(self):
  737. helper_test_op([(4,2,2)], lambda x: x.sum(axis=(0,2)))
  738. def test_sum(self):
  739. helper_test_op([(45,3)], lambda x: x.sum())
  740. helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=3))
  741. helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=(1,3)))
  742. helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=(0,2)))
  743. helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=(1,2)))
  744. helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=1))
  745. helper_test_op([()], lambda x: x.sum())
  746. helper_test_op([()], lambda x: x.sum(0))
  747. helper_test_op([()], lambda x: x.sum(-1))
  748. helper_test_op([()], lambda x: x.sum(()))
  749. self.helper_test_exception([(3,4,5,6)], lambda x: x.sum(5), lambda x: x.sum(5), expected=IndexError)
  750. self.helper_test_exception([()], lambda x: x.sum(1), lambda x: x.sum(1), expected=IndexError)
  751. self.helper_test_exception([()], lambda x: x.sum((1,)), lambda x: x.sum((1,)), expected=IndexError)
  752. def test_sum_with_zeros_shape(self):
  753. helper_test_op([(4, 0)], lambda x: x.sum(axis=(0,)))
  754. helper_test_op([(4, 0)], lambda x: x.sum(axis=(1,)))
  755. helper_test_op([(4, 0)], lambda x: x.sum(axis=(0,1)))
  756. def test_min(self):
  757. helper_test_op([(3,3)], lambda x: x.min())
  758. helper_test_op([(45,3)], lambda x: x.min())
  759. helper_test_op([(45,3)], lambda x: x.min().mul(0.5))
  760. helper_test_op([()], lambda x: x.min())
  761. def test_max(self):
  762. helper_test_op([(45,3)], lambda x: x.max())
  763. helper_test_op([(45,3)], lambda x: x.max().mul(0.5))
  764. helper_test_op(None, lambda x: x.max().mul(0.5), vals=[[[1.0,1.0,0.0,1.0]],])
  765. helper_test_op([(3,4,5,6)], lambda x: x.max(axis=1)[0], lambda x: x.max(axis=1))
  766. helper_test_op([()], lambda x: x.max())
  767. def test_any(self):
  768. helper_test_op([(3,4,5,6)], lambda x: x.any(), forward_only=True)
  769. helper_test_op(None, lambda x: x.any(), vals=[[True, True]], forward_only=True)
  770. helper_test_op(None, lambda x: x.any(), vals=[[True, False]], forward_only=True)
  771. helper_test_op(None, lambda x: x.any(), vals=[[False, False]], forward_only=True)
  772. helper_test_op([()], lambda x: x.any(), forward_only=True)
  773. def test_any_axis(self):
  774. helper_test_op([(3,4,5,6)], lambda x: x.any(axis=(1,2)), forward_only=True)
  775. def test_any_zero_axis(self):
  776. helper_test_op([(1,0,3,0,5)], lambda x: x.any(axis=(1,3)), forward_only=True)
  777. def test_all(self):
  778. helper_test_op([(3,4,5,6)], lambda x: x.all(), forward_only=True)
  779. helper_test_op(None, lambda x: x.all(), vals=[[True, True]], forward_only=True)
  780. helper_test_op(None, lambda x: x.all(), vals=[[True, False]], forward_only=True)
  781. helper_test_op(None, lambda x: x.all(), vals=[[False, False]], forward_only=True)
  782. helper_test_op([()], lambda x: x.all(), forward_only=True)
  783. def test_all_axis(self):
  784. helper_test_op([(3,4,5,6)], lambda x: x.all(axis=(1,2)), forward_only=True)
  785. def test_all_zero_axis(self):
  786. helper_test_op([(1,0,3,0,5)], lambda x: x.all(axis=(1,3)), forward_only=True)
  787. def test_mean(self):
  788. helper_test_op([(3,4,5,6)], lambda x: x.mean())
  789. helper_test_op([()], lambda x: x.mean())
  790. def test_mean_axis(self):
  791. helper_test_op([(3,4,5,6)], lambda x: x.mean(axis=(1,2)))
  792. def test_mean_zero_axis(self):
  793. helper_test_op([(1,0,3,0,5)], lambda x: x.mean(axis=(1,3)))
  794. def test_var(self):
  795. helper_test_op([(15, 25, 35)], lambda x: x.var())
  796. helper_test_op([(15, 25, 35)], lambda x: x.var(correction=0))
  797. helper_test_op([(15, 25, 35)], lambda x: x.var(correction=5))
  798. # TODO: fix this
  799. # helper_test_op([(10, 2)], lambda x: x.var(correction=50))
  800. def test_var_axis(self):
  801. helper_test_op([(15, 25, 35)], lambda x: x.var(0))
  802. helper_test_op([(15, 25, 35)], lambda x: x.var(2))
  803. helper_test_op([(15, 25, 35)], lambda x: x.var([1, 2]))
  804. helper_test_op([(15, 25, 35)], lambda x: x.var(0, correction=0))
  805. helper_test_op([(15, 25, 35)], lambda x: x.var(2, correction=0))
  806. helper_test_op([(15, 25, 35)], lambda x: x.var([1, 2], correction=0))
  807. def test_var_zero_in_axis(self):
  808. helper_test_op([(1,0,3,0,5)], lambda x: x.var(axis=(1,3)))
  809. helper_test_op([(1,0,3,0,5)], lambda x: x.var(axis=(1,3), correction=0))
  810. helper_test_op([(1,0,3,0,5)], lambda x: x.var(axis=(1,3), correction=5))
  811. # TODO: fix backward when correction >= n
  812. def test_var_one_in_axis(self):
  813. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,3)), forward_only=True)
  814. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,3), correction=0))
  815. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,3), correction=5), forward_only=True)
  816. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,4)))
  817. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,4), correction=0))
  818. helper_test_op([(1,2,3,1,5)], lambda x: x.var(axis=(0,4), correction=5), forward_only=True)
  819. def test_var_keepdim(self):
  820. helper_test_op([(15, 25, 35)], lambda x: x.var(keepdim=True))
  821. helper_test_op([(15, 25, 35)], lambda x: x.var(0, keepdim=True, correction=0))
  822. def test_std(self):
  823. helper_test_op([(15, 25, 35)], lambda x: x.std())
  824. helper_test_op([(15, 25, 35)], lambda x: x.std(correction=0))
  825. helper_test_op([(15, 25, 35)], lambda x: x.std(correction=5))
  826. def test_std_axis(self):
  827. helper_test_op([(15, 25, 35)], lambda x: x.std(0))
  828. helper_test_op([(15, 25, 35)], lambda x: x.std(2))
  829. helper_test_op([(15, 25, 35)], lambda x: x.std([1, 2]))
  830. helper_test_op([(15, 25, 35)], lambda x: x.std(0, correction=0))
  831. helper_test_op([(15, 25, 35)], lambda x: x.std(2, correction=0))
  832. helper_test_op([(15, 25, 35)], lambda x: x.std([1, 2], correction=0))
  833. def test_std_zero_in_axis(self):
  834. helper_test_op([(1,0,3,0,5)], lambda x: x.std(axis=(1,3)))
  835. helper_test_op([(1,0,3,0,5)], lambda x: x.std(axis=(1,3), correction=0))
  836. helper_test_op([(1,0,3,0,5)], lambda x: x.std(axis=(1,3), correction=5))
  837. # TODO: fix backward when correction >= n
  838. def test_std_one_in_axis(self):
  839. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,3)), forward_only=True)
  840. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,3), correction=0))
  841. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,3), correction=5), forward_only=True)
  842. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,4)))
  843. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,4), correction=0))
  844. helper_test_op([(1,2,3,1,5)], lambda x: x.std(axis=(0,4), correction=5))
  845. def test_std_keepdim(self):
  846. helper_test_op([(15, 25, 35)], lambda x: x.std(keepdim=True))
  847. helper_test_op([(15, 25, 35)], lambda x: x.std(0, keepdim=True, correction=0))
  848. def test_softmax(self):
  849. # exceed per kernel buffer limit with backward
  850. forward_only = (Device.DEFAULT == "WEBGPU")
  851. helper_test_op([(45,65)], torch.nn.Softmax(dim=1), Tensor.softmax, atol=1e-7, grad_atol=1e-7, forward_only=forward_only)
  852. helper_test_op([(45)], torch.nn.Softmax(dim=0), Tensor.softmax, atol=1e-7, grad_atol=1e-7, forward_only=forward_only)
  853. helper_test_op([()], torch.nn.Softmax(dim=0), Tensor.softmax, atol=1e-7, grad_atol=1e-7, forward_only=forward_only)
  854. helper_test_op([()], torch.nn.Softmax(dim=-1), Tensor.softmax, atol=1e-7, grad_atol=1e-7, forward_only=forward_only)
  855. def test_softmax_other_axis(self):
  856. helper_test_op([(10,10,10)], lambda x: x.softmax(0), atol=1e-7, grad_atol=1e-7)
  857. helper_test_op([(10,10,10)], lambda x: x.softmax(1), atol=1e-7, grad_atol=1e-7)
  858. helper_test_op([(10,10,10)], lambda x: x.softmax(2), atol=1e-7, grad_atol=1e-7)
  859. def test_softmax_argmax(self):
  860. helper_test_op([(45,65)], lambda x: x.softmax(0).argmax().type(torch.int32),
  861. lambda x: x.softmax(0).argmax(), forward_only=True, atol=1e-7, grad_atol=1e-7)
  862. helper_test_op([(45,65)], lambda x: x.softmax(1).argmax().type(torch.int32),
  863. lambda x: x.softmax(1).argmax(), forward_only=True, atol=1e-7, grad_atol=1e-7)
  864. def test_log_softmax(self):
  865. helper_test_op([(45,65)], torch.nn.LogSoftmax(dim=1), Tensor.log_softmax, atol=1e-7, grad_atol=1e-7)
  866. helper_test_op([(45)], torch.nn.LogSoftmax(dim=0), Tensor.log_softmax, atol=1e-7, grad_atol=1e-7)
  867. helper_test_op([()], torch.nn.LogSoftmax(dim=0), Tensor.log_softmax, atol=1e-7, grad_atol=1e-7)
  868. helper_test_op([()], torch.nn.LogSoftmax(dim=-1), Tensor.log_softmax, atol=1e-7, grad_atol=1e-7)
  869. def test_log_softmax_other_axis(self):
  870. helper_test_op([(10,10,10)], lambda x: x.log_softmax(0), atol=1e-7, grad_atol=1e-7)
  871. helper_test_op([(10,10,10)], lambda x: x.log_softmax(1), atol=1e-7, grad_atol=1e-7)
  872. helper_test_op([(10,10,10)], lambda x: x.log_softmax(2), atol=1e-7, grad_atol=1e-7)
  873. def test_logsumexp(self):
  874. helper_test_op([(45,65)], lambda x: torch.logsumexp(x, dim=0), lambda x: x.logsumexp(0), atol=1e-7, grad_atol=1e-7)
  875. helper_test_op([(45,65)], lambda x: torch.logsumexp(x, dim=0, keepdim=True), lambda x: x.logsumexp(0, True), atol=1e-7, grad_atol=1e-7)
  876. helper_test_op([(45,65)], lambda x: torch.logsumexp(x, dim=1), lambda x: x.logsumexp(1), atol=1e-7, grad_atol=1e-7)
  877. helper_test_op([(45)], lambda x: torch.logsumexp(x, dim=0), lambda x: x.logsumexp(0), atol=1e-7, grad_atol=1e-7)
  878. helper_test_op([()], lambda x: torch.logsumexp(x, dim=0), lambda x: x.logsumexp(0), atol=1e-7, grad_atol=1e-7)
  879. helper_test_op([()], lambda x: torch.logsumexp(x, dim=-1), lambda x: x.logsumexp(-1), atol=1e-7, grad_atol=1e-7)
  880. def test_sinh(self):
  881. helper_test_op([(45,65)], lambda x: x.sinh(), grad_atol=1e-6)
  882. # TODO: backward nan instead of inf
  883. helper_test_op([(45,65)], lambda x: x.sinh(), grad_atol=1e-6, low=-300, high=-297, forward_only=True)
  884. helper_test_op([(45,65)], lambda x: x.sinh(), grad_atol=1e-6, low=300, high=303, forward_only=True)
  885. def test_cosh(self):
  886. helper_test_op([(45,65)], lambda x: x.cosh(), grad_atol=1e-6)
  887. # TODO: backward nan instead of inf
  888. helper_test_op([(45,65)], lambda x: x.cosh(), grad_atol=1e-6, low=-300, high=-297, forward_only=True)
  889. helper_test_op([(45,65)], lambda x: x.cosh(), grad_atol=1e-6, low=300, high=303, forward_only=True)
  890. def test_tanh(self):
  891. helper_test_op([(45,65)], lambda x: x.tanh(), grad_atol=1e-6)
  892. helper_test_op([(45,65)], lambda x: x.tanh(), grad_atol=1e-6, low=-300, high=-297)
  893. helper_test_op([(45,65)], lambda x: x.tanh(), grad_atol=1e-6, low=300, high=303)
  894. def test_hardtanh(self):
  895. for val in range(10, 30, 5):
  896. helper_test_op([(45,65)], lambda x: torch.nn.functional.hardtanh(x, -val, val), lambda x: x.hardtanh(-val, val), grad_atol=1e-6)
  897. helper_test_op([()], lambda x: torch.nn.functional.hardtanh(x, -val, val), lambda x: x.hardtanh(-val, val), grad_atol=1e-6)
  898. def test_asinh(self):
  899. helper_test_op([(45,65)], lambda x: x.asinh(), grad_atol=1e-6)
  900. # NOTE: this one has larger atol
  901. helper_test_op([(45,65)], lambda x: x.asinh(), atol=1e-2, grad_atol=1e-6, low=-300, high=-297)
  902. helper_test_op([(45,65)], lambda x: x.asinh(), grad_atol=1e-6, low=300, high=303)
  903. def test_acosh(self):
  904. helper_test_op([(45,65)], lambda x: x.acosh(), grad_atol=1e-6)
  905. helper_test_op([(45,65)], lambda x: x.acosh(), grad_atol=1e-6, low=-300, high=-297)
  906. helper_test_op([(45,65)], lambda x: x.acosh(), grad_atol=1e-6, low=300, high=303)
  907. def test_atanh(self):
  908. helper_test_op([(45,65)], lambda x: x.atanh(), grad_atol=1e-6)
  909. helper_test_op([(45,65)], lambda x: x.atanh(), grad_atol=1e-6, low=-300, high=-297)
  910. helper_test_op([(45,65)], lambda x: x.atanh(), grad_atol=1e-6, low=300, high=303)
  911. def test_topo_sort(self):
  912. helper_test_op([(45,65)], lambda x: (x+x)*x, grad_atol=1e-6)
  913. helper_test_op([()], lambda x: (x+x)*x, grad_atol=1e-6)
  914. def test_flip_eye_crash(self):
  915. helper_test_op([], lambda: (torch.eye(10)@torch.eye(10).flip(0)),
  916. lambda: (Tensor.eye(10)@Tensor.eye(10).flip(0)), forward_only=True)
  917. def test_broadcast_full(self):
  918. for torch_op, tinygrad_op in [(torch.add, Tensor.add), (torch.sub, Tensor.sub), (torch.mul, Tensor.mul),
  919. (torch.div, Tensor.div), (torch.pow, Tensor.pow)]:
  920. for shapes in [((5,13,24,16), (5,1,24,1)), ((1,3,1,7,1), (2,1,5,1,8))]:
  921. with self.subTest(op=torch_op.__name__, shapes=shapes):
  922. if tinygrad_op != Tensor.pow:
  923. helper_test_op(shapes, torch_op, tinygrad_op)
  924. else:
  925. helper_test_op(shapes, torch_op, tinygrad_op, low=0, high=3)
  926. def test_broadcast_simple(self):
  927. helper_test_op([(45,65), (45,1)], lambda x,y: x/y)
  928. helper_test_op([(45,65), ()], lambda x,y: x/y)
  929. def test_broadcast_partial(self):
  930. for torch_op, tinygrad_op in [(torch.add, Tensor.add), (torch.sub, Tensor.sub), (torch.mul, Tensor.mul),
  931. (torch.div, Tensor.div), (torch.pow, Tensor.pow)]:
  932. for shapes in [((1,32,32,32), (1,32,1,1)), ((5,13,24,16,2), (1,13,24,1,1)),
  933. ((4,1), (4,5)), ((1,4), (5,4))]:
  934. with self.subTest(op=torch_op.__name__, shapes=shapes):
  935. # NOTE: ANE backwards?
  936. if tinygrad_op != Tensor.pow:
  937. helper_test_op(shapes, torch_op, tinygrad_op)
  938. else:
  939. helper_test_op(shapes, torch_op, tinygrad_op, low=0, high=3)
  940. def test_slice_in_bounds_1dim(self):
  941. helper_test_op([(3)], lambda x: x[1:3])
  942. helper_test_op([(3)], lambda x: x[0:2])
  943. helper_test_op([(3)], lambda x: x[-2:2])
  944. def test_slice_on_0dim_tensor(self):
  945. helper_test_op([()], lambda x: x[None])
  946. with self.assertRaises(IndexError):
  947. a = Tensor(3.14)
  948. a[0]
  949. def test_slice_int_indexing(self):
  950. helper_test_op([(3)], lambda x: x[0])
  951. helper_test_op([(3)], lambda x: x[2])
  952. helper_test_op([(3)], lambda x: x[-1])
  953. helper_test_op([(3)], lambda x: x[-3])
  954. helper_test_op([(10,10)], lambda x: x[1])
  955. helper_test_op([(3,3,3)], lambda x: x[1,1,1])
  956. def test_slice_in_bounds_multidim(self):
  957. helper_test_op([(3,3,3)], lambda x: x[1:2])
  958. helper_test_op([(3,3,3)], lambda x: x[1:2, 2])
  959. helper_test_op([(3,3,3)], lambda x: x[1:2, 1:2])
  960. helper_test_op([(3,3,3)], lambda x: x[1:2, 1:2, 0:-1])
  961. def test_slice_with_none(self):
  962. helper_test_op([(3,3,3)], lambda x: x[None])
  963. helper_test_op([(3,3,3)], lambda x: x[1:2, None])
  964. helper_test_op([(3,3,3)], lambda x: x[1:2, None, 1:2])
  965. helper_test_op([(3,3,3)], lambda x: x[1:2, 1:2, None, -1])
  966. helper_test_op([(3,3,3)], lambda x: x[None, None, 1, None, 2, 0:2])
  967. def test_slice_with_const_tensor(self):
  968. t = Tensor.zeros(1, dtype=dtypes.int)
  969. helper_test_op([(3,3,3)], lambda x: x[:, [0], :], lambda x: x[:, t, :])
  970. helper_test_op([(3,3,3)], lambda x: x[:, [0], :], lambda x: x[:, t.contiguous(), :])
  971. def test_slice_one_endpoint_out_of_bounds(self):
  972. helper_test_op([(3,3,3)], lambda x: x[0:4])
  973. helper_test_op([(3,3,3)], lambda x: x[-6:4])
  974. helper_test_op([(3,3,3)], lambda x: x[1:50])
  975. helper_test_op([(3,3,3)], lambda x: x[1:50, 1:2, -1])
  976. def test_slice_stride_gt_one(self):
  977. helper_test_op([(7,5,10)], lambda x: x[::2, ::3, ::4])
  978. helper_test_op([(7,5,10)], lambda x: x[1:5:2, ::3, ::4])
  979. helper_test_op([(7,5,10)], lambda x: x[1:5:2, 3, ::4])
  980. helper_test_op([(7,5,10)], lambda x: x[1:5:2, None, None, 3, None, ::4])
  981. def test_slice_negative_strides(self):
  982. # Torch doesn't support slicing with negative steps
  983. a = np.random.randn(10, 10, 10).astype(np.float32)
  984. t = Tensor(a)
  985. np.testing.assert_allclose(a[::-1], t[::-1].numpy())
  986. np.testing.assert_allclose(a[::-2], t[::-2].numpy())
  987. np.testing.assert_allclose(a[:, 2:0:-1], t[:, 2:0:-1].numpy())
  988. np.testing.assert_allclose(a[:, 2:0:-1, 3:1:-2], t[:, 2:0:-1, 3:1:-2].numpy())
  989. np.testing.assert_allclose(a[4:0:-3, 2:0:-1, -1:-5:-2], t[4:0:-3, 2:0:-1, -1:-5:-2].numpy())
  990. np.testing.assert_allclose(a[2:5:-1, :, :], t[2:5:-1, :, :].numpy()) # shape = (0, 10, 10)
  991. np.testing.assert_allclose(a[:, 2:5:-1, :], t[:, 2:5:-1, :].numpy()) # shape = (0, 10, 10)
  992. np.testing.assert_allclose(a[:, :, 2:5:-1], t[:, :, 2:5:-1].numpy()) # shape = (0, 10, 10)
  993. def test_slice_both_endpoints_out_of_bounds(self):
  994. helper_test_op([(3,3,3)], lambda x: x[5:10])
  995. helper_test_op([(3,3,3)], lambda x: x[-15:-7])
  996. def test_slice_start_gt_end(self):
  997. helper_test_op([(3,3,3)], lambda x: x[-2:2])
  998. helper_test_op([(3,3,3)], lambda x: x[-2:-5])
  999. def test_slice_empty(self):
  1000. helper_test_op([(10,10)], lambda x: x[1:1])
  1001. def test_slice_zero_in_shape(self):
  1002. helper_test_op([(10,10)], lambda x: x[1:1]) # x.shape = (0, 10)
  1003. helper_test_op([(3,3,3)], lambda x: x[-2:-5]) # x.shape = (0, 3, 3)
  1004. def test_slice_errors(self):
  1005. a = Tensor.ones(4, 3)
  1006. b = Tensor(2)
  1007. with self.assertRaisesRegex(IndexError, "too many"): a[1, 77, 77, 77] # IndexError: (finds too many indices before the out of bounds)
  1008. with self.assertRaisesRegex(IndexError, "out of bounds"): a[1, 3] # IndexError: (out of bounds).
  1009. with self.assertRaisesRegex(IndexError, "out of bounds"): a[1, -4]
  1010. with self.assertRaisesRegex(IndexError, "single ellipsis"): a[..., ...] # IndexError: only single ellipsis
  1011. with self.assertRaises(ValueError): a[::0, 1] # no 0 strides
  1012. with self.assertRaises(IndexError): b[:] # slice cannot be applied to a 0-dim tensor
  1013. def test_slice_ellipsis(self):
  1014. helper_test_op([(3,3,3,3)], lambda x: x[..., 0])
  1015. helper_test_op([(3,3,3,3)], lambda x: x[0, ...])
  1016. helper_test_op([(3,3,3,3)], lambda x: x[0, ..., 0])
  1017. helper_test_op([(3,3,3,3)], lambda x: x[0:3, ..., 2:3])
  1018. helper_test_op([(3,3,3,3)], lambda x: x[None, 0:3, ..., 0, None])
  1019. # this was the failure in llama early realizing freqs_cis
  1020. def test_double_slice(self):
  1021. helper_test_op([(4,4)], lambda x: x[:, 1:2][1:2])
  1022. helper_test_op([(4,4)], lambda x: x[1:3][1:2])
  1023. helper_test_op([(4,4)], lambda x: x[:, 1:2][0:1])
  1024. helper_test_op([(4,4)], lambda x: x[:, 1:2][:, 0:1])
  1025. def test_pad2d(self):
  1026. helper_test_op([(3,3,3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4)), lambda x: x.pad2d(padding=(1,2,3,4)))
  1027. helper_test_op([(3,3,3,3)], lambda x: torch.nn.functional.pad(x, (-1,2,-3,4)), lambda x: x.pad2d(padding=(-1,2,-3,4)))
  1028. helper_test_op([(3,3,3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4), value=5), lambda x: x.pad2d(padding=(1,2,3,4),value=5))
  1029. helper_test_op([(3,3,3,3)], lambda x: torch.nn.functional.pad(x, (-1,2,-3,4), value=5), lambda x: x.pad2d(padding=(-1,2,-3,4),value=5))
  1030. def test_pad(self):
  1031. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4)),lambda x: x.pad(((3,4),(1,2))))
  1032. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4), value=5), lambda x: x.pad(((3,4), (1,2)), value=5))
  1033. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4), value=math.inf), lambda x: x.pad(((3,4), (1,2)), value=math.inf))
  1034. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (1,2,3,4), value=-math.inf), lambda x: x.pad(((3,4), (1,2)), value=-math.inf))
  1035. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (0,0,3,4), value=1), lambda x: x.pad(((3,4), None), value=1))
  1036. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x, (0,0,0,0), value=1), lambda x: x.pad((None, None), value=1))
  1037. def test_pad_reshape(self):
  1038. helper_test_op([(1, 2)],
  1039. lambda x: torch.nn.functional.pad(x, (0, 1, 1, 0)).reshape((3, 2)),
  1040. lambda x: x.pad2d((0, 1, 1, 0)).reshape((3, 2)), forward_only=True)
  1041. helper_test_op([(1, 2)],
  1042. lambda x: torch.nn.functional.pad(x, (0, 2, 1, 1)).reshape((4, 3)),
  1043. lambda x: x.pad2d((0, 2, 1, 1)).reshape((4, 3)), forward_only=True)
  1044. helper_test_op([(1, 1, 1, 2)],
  1045. lambda x: torch.nn.functional.pad(x, (0, 4, 2, 2, 1, 2, 0, 2)).reshape((4, 3, 6, 5)),
  1046. lambda x: x.pad(((0, 2), (1, 2), (2, 2), (0, 4))).reshape((4, 3, 6, 5)), forward_only=True)
  1047. @unittest.skipIf(Device.DEFAULT == "WEBGL", "incorrect result")
  1048. def test_pad_slice(self):
  1049. for value in 0., 3.456:
  1050. helper_test_op([(1)], lambda x: torch.nn.functional.pad(x,(1,0), value=value)[0], lambda x: x.pad(((1,0),), value=value)[0])
  1051. helper_test_op([(4)], lambda x: torch.nn.functional.pad(x,(1,0), value=value)[0], lambda x: x.pad(((1,0),), value=value)[0])
  1052. helper_test_op([(4)], lambda x: torch.nn.functional.pad(x,(3,0), value=value)[0:1], lambda x: x.pad(((3,0),), value=value)[0:1])
  1053. helper_test_op([(4)], lambda x: torch.nn.functional.pad(x,(0,3), value=value)[6], lambda x: x.pad(((0,3),), value=value)[6])
  1054. helper_test_op([(4)], lambda x: torch.nn.functional.pad(x,(0,3), value=value)[4:6], lambda x: x.pad(((0,3),), value=value)[4:6])
  1055. helper_test_op([(5,5)], lambda x: torch.nn.functional.pad(x,(0,0,1,0), value=value)[0], lambda x: x.pad(((1,0),(0,0)), value=value)[0])
  1056. helper_test_op([(2,2)], lambda x: torch.nn.functional.pad(x,(0,1,0,0), value=value)[0,2], lambda x: x.pad(((0,0),(0,1)), value=value)[0,2])
  1057. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(0,0,1,0), value=value)[0,2], lambda x: x.pad(((1,0),(0,0)), value=value)[0,2])
  1058. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(0,0,0,2), value=value)[5], lambda x: x.pad(((0,2),(0,0)), value=value)[5])
  1059. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(0,0,0,2), value=value)[3:5], lambda x: x.pad(((0,2),(0,0)), value=value)[3:5])
  1060. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(3,0,0,0), value=value)[1,0], lambda x: x.pad(((0,0),(3,0)), value=value)[1,0])
  1061. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(3,0,0,0), value=value)[1,0:4], lambda x: x.pad(((0,0),(3,0)), value=value)[1,0:4])
  1062. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(3,4,1,2), value=value)[0], lambda x: x.pad(((1,2),(3,4)), value=value)[0])
  1063. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(3,4,1,2), value=value)[:,1], lambda x: x.pad(((1,2),(3,4)), value=value)[:,1])
  1064. helper_test_op([(4,4)], lambda x: torch.nn.functional.pad(x,(3,4,1,2), value=value)[:,4], lambda x: x.pad(((1,2),(3,4)), value=value)[:,4])
  1065. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x,(0,3,0,0), value=value)[:,4:6], lambda x: x.pad(((0,0),(0,3)), value=value)[:,4:6])
  1066. helper_test_op([(3,3)], lambda x: torch.nn.functional.pad(x,(0,1,3,2), value=value)[0:2,:], lambda x: x.pad(((3,2),(0,1)), value=value)[0:2,:])
  1067. helper_test_op([(3,3,3)], lambda x: torch.nn.functional.pad(x,(1,1,0,1,3,2), value=value)[0:2,:,:],
  1068. lambda x: x.pad(((3,2),(0,1),(1,1)), value=value)[0:2,:,:])
  1069. helper_test_op([(3,3,3)], lambda x: torch.nn.functional.pad(x,(1,1,0,1,3,2), value=value)[2:4,:,:],
  1070. lambda x: x.pad(((3,2),(0,1),(1,1)), value=value)[2:4,:,:])
  1071. def test_stack_slice(self):
  1072. helper_test_op([(4)], lambda x: torch.stack([x for i in range(3)])[0,:], lambda x: Tensor.stack(*[x for i in range(3)])[0,:])
  1073. helper_test_op([(5)], lambda x: torch.stack([x for i in range(3)])[0,0], lambda x: Tensor.stack(*[x for i in range(3)])[0,0])
  1074. helper_test_op([(4,4)], lambda x: torch.stack([x for i in range(4)])[3], lambda x: Tensor.stack(*[x for i in range(4)])[3])
  1075. def test_transpose(self):
  1076. helper_test_op([(3,3)], lambda x: x.T)
  1077. helper_test_op([(3,3,3)], lambda x: x.transpose(1,2))
  1078. helper_test_op([(3,3,3)], lambda x: x.transpose(0,2))
  1079. def test_permute(self):
  1080. helper_test_op([(1,2,3,4)], lambda x: x.permute((3,0,2,1)))
  1081. helper_test_op([(3,4,5,6)], lambda x: x.permute((3,2,1,0)))
  1082. helper_test_op([(3,4,5,6)], lambda x: x.permute((-2,-1,1,0)))
  1083. helper_test_op([()], lambda x: x.permute(()))
  1084. self.helper_test_exception([(3,4,5,6)], lambda x: x.permute((0,2)), lambda x: x.permute((0,2)), expected=RuntimeError)
  1085. self.helper_test_exception([(3,4,5,6)], lambda x: x.permute((0,1,2,3,3,3)), lambda x: x.permute((0,1,2,3,3,3)), expected=RuntimeError)
  1086. self.helper_test_exception([(3,4,5,6)], lambda x: x.permute((0,0,1,2,3)), lambda x: x.permute((0,0,1,2,3)), expected=RuntimeError)
  1087. def test_reshape(self):
  1088. helper_test_op([(4,3,6,6)], lambda x: x.reshape((12,6,6)))
  1089. helper_test_op([(4,3,6,6)], lambda x: x.reshape((-1,3,6,6)))
  1090. helper_test_op([(4,3,6,6)], lambda x: x.reshape((-1,1,6,6)))
  1091. helper_test_op([(4,3,6,6)], lambda x: x.reshape((4,3,6,6)), lambda x: x.reshape((None,None,6,6)))
  1092. helper_test_op([()], lambda x: x.reshape(()))
  1093. helper_test_op([(1,)], lambda x: x.reshape(()))
  1094. helper_test_op([()], lambda x: x.reshape((1,)))
  1095. helper_test_op([()], lambda x: x.reshape((1,1,1)))
  1096. self.helper_test_exception([(3,4)], lambda x: x.reshape((-1,-1,2)), lambda x: x.reshape((-1,-1,2)), expected=RuntimeError)
  1097. self.helper_test_exception([(3,4)], lambda x: x.reshape((-1,-1,-1,2)), lambda x: x.reshape((-1,-1,-1,2)), expected=RuntimeError)
  1098. with self.assertRaises(ValueError):
  1099. x = Tensor.ones((4,3,6,6))
  1100. x.reshape([])
  1101. def test_flip(self):
  1102. helper_test_op([(4,3,6,6)], lambda x: x.flip((0,)))
  1103. helper_test_op([(4,3,6,6)], lambda x: x.flip((0,1)))
  1104. helper_test_op([(4,3,6,6)], lambda x: x.flip((0,1,3)))
  1105. helper_test_op([(4,3,6,6)], lambda x: x.flip((3,)))
  1106. helper_test_op([(4,3,6,6)], lambda x: x.flip((0,1,3)).flip(0))
  1107. helper_test_op([(4,3,6,6)], lambda x: x.flip((-1,)))
  1108. helper_test_op([()], lambda x: x.flip(()))
  1109. helper_test_op([(1,)], lambda x: x.flip(()))
  1110. helper_test_op([(4,3,6,6)], lambda x: x.flip(()))
  1111. self.helper_test_exception([(3,4)], lambda x: x.flip((0,0)), lambda x: x.flip((0,0)), expected=RuntimeError)
  1112. self.helper_test_exception([(3,4)], lambda x: x.flip((1,1)), lambda x: x.flip((1,1)), expected=RuntimeError)
  1113. self.helper_test_exception([(3,4)], lambda x: x.flip((1,-1)), lambda x: x.flip((1,-1)), expected=RuntimeError)
  1114. def test_squeeze(self):
  1115. helper_test_op([(1,3,6,6)], lambda x: x.squeeze(0))
  1116. helper_test_op([(4,3,1,6)], lambda x: x.squeeze(1))
  1117. helper_test_op([(4,3,6,6)], lambda x: x.squeeze(3))
  1118. self.helper_test_exception([(4,3,6,6)], lambda x: torch.squeeze(x, 50), lambda x: x.squeeze(dim=50), expected=IndexError)
  1119. self.helper_test_exception([(4,3,6,6)], lambda x: torch.squeeze(x, -50), lambda x: x.squeeze(dim=-50), expected=IndexError)
  1120. helper_test_op([(4,3,6,1)], lambda x: x.squeeze(-1))
  1121. helper_test_op([(4,3,6,6)], lambda x: x.squeeze())
  1122. helper_test_op([(1,3,6,6)], lambda x: x.squeeze())
  1123. helper_test_op([(2,3,1)], lambda x: x.squeeze())
  1124. helper_test_op([()], lambda x: x.squeeze(-1))
  1125. helper_test_op([()], lambda x: x.squeeze(0))
  1126. helper_test_op([()], lambda x: x.squeeze())
  1127. self.helper_test_exception([()], lambda x: torch.squeeze(x, 10), lambda x: x.squeeze(dim=10), expected=IndexError)
  1128. self.helper_test_exception([()], lambda x: torch.squeeze(x, 1), lambda x: x.squeeze(dim=1), expected=IndexError)
  1129. self.helper_test_exception([()], lambda x: torch.squeeze(x, -2), lambda x: x.squeeze(dim=-2), expected=IndexError)
  1130. def test_unsqueeze(self):
  1131. helper_test_op([(4,3,6,6)], lambda x: x.unsqueeze(0))
  1132. helper_test_op([(4,3,6,6)], lambda x: x.unsqueeze(4))
  1133. helper_test_op([(4,3,6,6)], lambda x: x.unsqueeze(-1))
  1134. helper_test_op([(4,3,6,6)], lambda x: x.unsqueeze(-3))
  1135. helper_test_op([()], lambda x: x.unsqueeze(0))
  1136. def test_flatten(self):
  1137. for axis in range(3):
  1138. helper_test_op([(4,3,6,6)], lambda x: x.flatten(start_dim=axis))
  1139. for axis in range(3):
  1140. helper_test_op([(4,3,6,6)], lambda x: x.flatten(end_dim=axis))
  1141. helper_test_op([(4,3,6,6)], lambda x: x.flatten(start_dim=1, end_dim=3))
  1142. helper_test_op([()], lambda x: x.flatten())
  1143. helper_test_op([(1,)], lambda x: x.flatten())
  1144. def test_unflatten(self):
  1145. helper_test_op([(4,3,6,6)], lambda x: x.unflatten(0, (2, 2)))
  1146. helper_test_op([(4,3,6,6)], lambda x: x.unflatten(3, (3, 2)))
  1147. helper_test_op([(4,3,6,6)], lambda x: x.unflatten(-1, (3, 2, 1)))
  1148. def test_detach(self):
  1149. helper_test_op([(4,3,6,6)], lambda x: x.detach(), forward_only=True)
  1150. helper_test_op([()], lambda x: x.detach(), forward_only=True)
  1151. def test_expand(self):
  1152. helper_test_op([(4,3,1,6)], lambda x: x.expand((4,3,2,6)))
  1153. helper_test_op([(1,1,1,1)], lambda x: x.expand((4,3,2,6)))
  1154. helper_test_op([(4,3,1,6)], lambda x: x.expand((6,1,4,3,2,6)))
  1155. helper_test_op([(4,3,1,6)], lambda x: x.expand((0,1,4,3,2,6)))
  1156. helper_test_op([(4,3,1,6)], lambda x: x.expand((4,3,0,6)))
  1157. helper_test_op([()], lambda x: x.expand((4,3,2,6)))
  1158. helper_test_op([()], lambda x: x.expand([]))
  1159. with self.assertRaises((ValueError, RuntimeError)): Tensor.ones(4,3,1,6).expand(4,1,1,6)
  1160. with self.assertRaises((ValueError, RuntimeError)): Tensor.ones(4,3,1,6).expand(4,6,1,6)
  1161. with self.assertRaises((ValueError, RuntimeError)): Tensor.ones(4,3,1,6).expand(3,1,6)
  1162. with self.assertRaises((ValueError, RuntimeError)): Tensor.ones(4,3,2,6).expand(4,3,0,6)
  1163. @unittest.skip("very slow")
  1164. def test_sd_big_conv(self):
  1165. # internal shape (1, 1, 512, 62, 62, 512, 3, 3) overflows a int
  1166. helper_test_op([(1,256,64,64), (512,256,3,3)],
  1167. lambda x,w: torch.nn.functional.conv2d(x, w),
  1168. lambda x,w: x.conv2d(w), atol=1e-3)
  1169. @unittest.skip("slow")
  1170. def test_large_bs_conv(self):
  1171. # large batch size can cause OpenCL image to exceed max image height on macOS
  1172. # (or cause the conv kernel to overflow short sampling coords)
  1173. helper_test_op([(4096,3,3,3), (1,3,3,3)],
  1174. lambda x,w: torch.nn.functional.conv2d(x, w),
  1175. lambda x,w: x.conv2d(w), atol=1e-3)
  1176. @unittest.skip("slow")
  1177. def test_large_ic_conv(self):
  1178. # large input channel count can cause OpenCL image to exceed max image width on macOS
  1179. helper_test_op([(1,2048,3,3), (1,2048,3,3)],
  1180. lambda x,w: torch.nn.functional.conv2d(x, w),
  1181. lambda x,w: x.conv2d(w))
  1182. def test_biased_conv2d(self):
  1183. C = 8
  1184. helper_test_op([(1,C,5,5), (C,C,1,1), (C,)],
  1185. lambda x,w,b: torch.nn.functional.conv2d(torch.nn.functional.conv2d(x,w,b).relu(),w,b),
  1186. lambda x,w,b: Tensor.conv2d(x,w,b).relu().conv2d(w,b))
  1187. def test_simple_conv2d(self):
  1188. helper_test_op([(1,4,9,9), (4,4,3,3)],
  1189. lambda x,w: torch.nn.functional.conv2d(x,w).relu(),
  1190. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1191. @unittest.skipIf(IMAGE>0, "no conv3d on images")
  1192. def test_simple_conv3d(self):
  1193. helper_test_op([(1,4,9,9,9), (4,4,3,3,3)],
  1194. lambda x,w: torch.nn.functional.conv3d(x,w).relu(),
  1195. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1196. @unittest.skipIf(IMAGE>0, "no conv3d on images")
  1197. def test_padded_conv3d(self):
  1198. helper_test_op([(1,4,5,5,5), (4,4,3,3,3)],
  1199. lambda x,w: torch.nn.functional.conv3d(x,w,padding=1).relu(),
  1200. lambda x,w: Tensor.conv2d(x,w,padding=[1,1,1,1,1,1]).relu(), grad_rtol=1e-5)
  1201. def test_simple_conv2d_m4(self):
  1202. helper_test_op([(1,16,18,18), (16,16,3,3)],
  1203. lambda x,w: torch.nn.functional.conv2d(x,w).relu(),
  1204. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1205. def test_simple_conv2d_1x1(self):
  1206. helper_test_op([(1,4,9,9), (4,4,1,1)],
  1207. lambda x,w: torch.nn.functional.conv2d(x,w).relu(),
  1208. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1209. def test_simple_conv2d_1x1_m4(self):
  1210. helper_test_op([(1,16,32,32), (16,16,1,1)],
  1211. lambda x,w: torch.nn.functional.conv2d(x,w).relu(),
  1212. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1213. def test_nested_conv2d(self):
  1214. helper_test_op([(1,32,9,9), (32,32,3,3), (32,32,3,3)],
  1215. lambda x,w1,w2: torch.nn.functional.conv2d(torch.nn.functional.conv2d(x,w1).relu(), w2).relu(),
  1216. lambda x,w1,w2: x.conv2d(w1).relu().conv2d(w2).relu())
  1217. # expect reduce nodes == 3
  1218. def test_simple_conv2d_nhwc(self):
  1219. # weights (from tf): filter_height x filter_width x in_channels x out_channels
  1220. helper_test_op([(2,9,9,10), (3,3,10,20)],
  1221. lambda x,w: torch.nn.functional.conv2d(x.permute(0,3,1,2),w.permute(3,2,0,1)).relu(),
  1222. lambda x,w: Tensor.conv2d(x.permute(0,3,1,2),w.permute(3,2,0,1)).relu(), atol=1e-5, grad_rtol=1e-5)
  1223. def test_simple_conv2d_batched(self):
  1224. helper_test_op([(2,4,9,9), (4,4,3,3)],
  1225. lambda x,w: torch.nn.functional.conv2d(x,w).relu(),
  1226. lambda x,w: Tensor.conv2d(x,w).relu(), grad_rtol=1e-5)
  1227. # conv transpose
  1228. def test_simple_conv_transpose2d(self):
  1229. helper_test_op([(2,4,9,9), (4,4,3,3)],
  1230. lambda x,w: torch.nn.functional.conv_transpose2d(x,w).relu(),
  1231. lambda x,w: Tensor.conv_transpose2d(x,w).relu(), grad_rtol=1e-5)
  1232. def test_bias_conv_transpose2d(self):
  1233. helper_test_op([(2,4,9,9), (4,4,3,3), (4,)],
  1234. lambda x,w,b: torch.nn.functional.conv_transpose2d(x,w,b).relu(),
  1235. lambda x,w,b: Tensor.conv_transpose2d(x,w,b).relu(), grad_rtol=1e-5)
  1236. def test_grouped_conv_transpose2d(self):
  1237. helper_test_op([(2,4,9,9), (4,4,3,3)],
  1238. lambda x,w: torch.nn.functional.conv_transpose2d(x,w,groups=2).relu(),
  1239. lambda x,w: Tensor.conv_transpose2d(x,w,groups=2).relu(), grad_rtol=1e-5)
  1240. def test_padded_conv_transpose2d(self):
  1241. for padding in [(1,2), (2,1), 2, 1, 0]:
  1242. helper_test_op([(2,4,9,9), (4,4,3,3)],
  1243. lambda x,w: torch.nn.functional.conv_transpose2d(x,w,padding=padding).relu(),
  1244. lambda x,w: Tensor.conv_transpose2d(x,w,padding=padding).relu(), grad_rtol=1e-5)
  1245. def test_dilated_conv_transpose2d(self):
  1246. for dilation in [(1,2), (2,1), 2, 1]:
  1247. helper_test_op([(2,4,9,9), (4,4,3,3)],
  1248. lambda x,w: torch.nn.functional.conv_transpose2d(x,w,dilation=dilation).relu(),
  1249. lambda x,w: Tensor.conv_transpose2d(x,w,dilation=dilation).relu(), grad_rtol=1e-5)
  1250. def test_strided_conv_transpose2d(self):
  1251. for stride in [(2,1), (1,2), 1]:
  1252. helper_test_op([(2,4,4,5), (4,4,3,3)],
  1253. lambda x,w: torch.nn.functional.conv_transpose2d(x,w, stride=stride).relu(),
  1254. lambda x,w: Tensor.conv_transpose2d(x,w,stride=stride).relu(), grad_rtol=1e-5)
  1255. def test_output_padded_conv_transpose2d(self):
  1256. for output_padding, stride in [((1,1), (2,3)), ((2,1), (3,2))]:
  1257. helper_test_op([(2,4,6,5), (4,4,3,3),(4,)],
  1258. lambda x,w,b: torch.nn.functional.conv_transpose2d(x,w,b,output_padding=output_padding,stride=stride).relu(),
  1259. lambda x,w,b: Tensor.conv_transpose2d(x,w,b,output_padding=output_padding,stride=stride).relu(), grad_rtol=1e-5)
  1260. @unittest.skipIf(IMAGE>0, "no conv3d on images")
  1261. def test_simple_conv_transpose3d(self):
  1262. helper_test_op([(2,4,9,9,9), (4,4,3,3,3)],
  1263. lambda x,w: torch.nn.functional.conv_transpose3d(x,w).relu(),
  1264. lambda x,w: Tensor.conv_transpose2d(x,w).relu(), grad_rtol=1e-5)
  1265. @unittest.skipIf((IMAGE>0), "no conv1d on images")
  1266. def test_conv1d(self):
  1267. for bs in [1,8]:
  1268. for cin in [1,3]:
  1269. for H in [1,2,5]:
  1270. for groups in [1,3] if cin == 3 and H == 5 else [1]:
  1271. with self.subTest(batch_size=bs, channels=cin, groups=groups, height=H):
  1272. helper_test_op([(bs,cin,11), (6,cin//groups,H)],
  1273. lambda x,w: torch.nn.functional.conv1d(x,w,groups=groups).relu(),
  1274. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1275. @unittest.skipIf(IMAGE>0, "no conv1d on images")
  1276. def test_simple_padding_conv1d(self):
  1277. bs = 6
  1278. cin = 2
  1279. groups = 1
  1280. H = 5
  1281. p = (1,1)
  1282. helper_test_op([(bs,cin,11), (6,cin//groups,H)],
  1283. lambda x,w: torch.nn.functional.conv1d(torch.nn.functional.pad(x, p),w).relu(),
  1284. lambda x,w: Tensor.conv2d(x,w,padding=p).relu())
  1285. @unittest.skipIf(IMAGE>0, "no conv1d on images")
  1286. def test_strided_conv1d_simple(self):
  1287. bs, H = 2, 3
  1288. helper_test_op([(bs,1,5), (1,1,H)],
  1289. lambda x,w: torch.nn.functional.conv1d(x,w,stride=2).relu(),
  1290. lambda x,w: Tensor.conv2d(x,w,stride=2).relu())
  1291. @unittest.skipIf(IMAGE>0, "no conv1d on images")
  1292. def test_asymmetric_padding_conv1d(self):
  1293. for p in [(0,1), (2,1), (2,0)]:
  1294. with self.subTest(p):
  1295. for n in [3,4]:
  1296. for k in [2]:
  1297. helper_test_op([(1,1,n), (1,1,k)],
  1298. lambda x,w: torch.nn.functional.conv1d(torch.nn.functional.pad(x, p),w).relu(),
  1299. lambda x,w: Tensor.conv2d(x,w,padding=p).relu())
  1300. helper_test_op([(1,1,n), (1,1,k)],
  1301. lambda x,w: torch.nn.functional.conv1d(torch.nn.functional.pad(x, p),w).relu(),
  1302. lambda x,w: Tensor.conv2d(x,w,padding=p).relu())
  1303. def _test_conv2d(self, bs=1, cin=1):
  1304. for H in [1,2,3]:
  1305. for W in [1,2,3,5]:
  1306. for groups in [1,3] if cin == 3 and H == 3 and W == 3 else [1]:
  1307. with self.subTest(batch_size=bs, channels=cin, groups=groups, height=H, width=W):
  1308. helper_test_op([(bs,cin,11,7), (6,cin//groups,H,W)],
  1309. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1310. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1311. def test_conv2d(self): self._test_conv2d(bs=1, cin=3)
  1312. def test_conv2d_bs_4_cin_3(self): self._test_conv2d(bs=4, cin=3)
  1313. def test_conv2d_bs_1_cin_1(self): self._test_conv2d(bs=1, cin=1)
  1314. def test_conv2d_bs_4_cin_1(self): self._test_conv2d(bs=4, cin=1)
  1315. def test_large_input_conv2d(self):
  1316. bs = 4
  1317. cin = 16
  1318. groups = 1
  1319. H = 5
  1320. W = 2
  1321. helper_test_op([(bs,cin,64,64), (6,cin//groups,H,W)],
  1322. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1323. # needed to relax tolerance on NVIDIA
  1324. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), atol=1e-4, grad_rtol=1e-5)
  1325. def test_simple_grouped_conv2d(self):
  1326. bs = 1
  1327. groups = 2
  1328. rcout = 1
  1329. cin = 2
  1330. helper_test_op([(bs,groups*cin,1,1), (groups*rcout,cin,1,1)],
  1331. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1332. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1333. def test_medium_grouped_conv2d(self):
  1334. bs = 1
  1335. groups = 2
  1336. rcout = 2
  1337. cin = 2
  1338. helper_test_op([(bs,groups*cin,1,1), (groups*rcout,cin,1,1)],
  1339. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1340. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1341. def test_depthwise_conv2d(self):
  1342. bs = 1
  1343. groups = 32
  1344. rcout = 1
  1345. cin = 1
  1346. helper_test_op([(bs,groups*cin,32,32), (groups*rcout,cin,1,1)],
  1347. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1348. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1349. def test_grouped_conv2d(self):
  1350. bs = 4
  1351. groups = 5
  1352. rcout = 7
  1353. cin = 3
  1354. helper_test_op([(bs,groups*cin,5,5), (groups*rcout,cin,3,3)],
  1355. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1356. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1357. def test_fancy_conv2d(self):
  1358. bs = 2
  1359. cin = 3
  1360. cout = 1
  1361. groups = 3
  1362. H,W = 3,3
  1363. helper_test_op([(bs,cin,11,28), (groups*cout,cin//groups,H,W)],
  1364. lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
  1365. lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), grad_rtol=1e-5)
  1366. def test_strided_conv2d_simple(self):
  1367. bs,H,W = 2,3,1
  1368. helper_test_op([(bs,1,5,1), (1,1,H,W)],
  1369. lambda x,w: torch.nn.functional.conv2d(x,w,stride=2).relu(),
  1370. lambda x,w: Tensor.conv2d(x,w,stride=2).relu())
  1371. def test_strided_conv2d(self):
  1372. bs = 4
  1373. cin = 3
  1374. H,W = 3,3
  1375. with self.subTest(stride := 2):
  1376. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1377. lambda x,w: torch.nn.functional.conv2d(x,w,stride=2).relu(),
  1378. lambda x,w: Tensor.conv2d(x,w,stride=stride).relu())
  1379. with self.subTest(stride := (2,1)):
  1380. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1381. lambda x,w: torch.nn.functional.conv2d(x,w,stride=stride).relu(),
  1382. lambda x,w: Tensor.conv2d(x,w,stride=(2,1)).relu())
  1383. def test_negative_padding_conv2d(self):
  1384. n,k = 10, 3
  1385. helper_test_op([(1,1,n,n), (1,1,k,k)],
  1386. lambda x,w: torch.nn.functional.conv2d(x[:, :, 1:-1, 1:-1],w).relu(),
  1387. lambda x,w: Tensor.conv2d(x,w,padding=-1).relu())
  1388. helper_test_op([(1,1,n,n), (1,1,k,k)],
  1389. lambda x,w: torch.nn.functional.conv2d(x[:, :, 1:, 1:],w).relu(),
  1390. lambda x,w: Tensor.conv2d(x,w,padding=(-1,0,-1,0)).relu())
  1391. def test_simple_padding_conv2d(self):
  1392. p = (1,1,1,1)
  1393. helper_test_op(None,
  1394. lambda x,w: torch.nn.functional.conv2d(torch.nn.functional.pad(x, p),w).relu(),
  1395. lambda x,w: Tensor.conv2d(x,w,padding=p).relu(), vals=[[[[[2.,3.]]]], [[[[1.]]]]])
  1396. def test_asymmetric_padding_conv2d(self):
  1397. for p in [(0,1,0,1), (2,1,2,1), (2,0,2,1)]:
  1398. with self.subTest(p):
  1399. for n in [3,4]:
  1400. for k in [2]:
  1401. helper_test_op([(1,1,n,n), (1,1,k,k)],
  1402. lambda x,w: torch.nn.functional.conv2d(torch.nn.functional.pad(x, p),w).relu(),
  1403. lambda x,w: Tensor.conv2d(x,w,padding=p).relu())
  1404. helper_test_op([(1,1,n,n), (1,1,k,k)],
  1405. lambda x,w: torch.nn.functional.conv2d(torch.nn.functional.pad(x, p),w).relu(),
  1406. lambda x,w: Tensor.conv2d(x,w,padding=p).relu())
  1407. def test_padded_conv2d_p21(self):
  1408. bs,cin,H,W,padding = 4, 3, 3, 3, (2,1)
  1409. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1410. lambda x,w: torch.nn.functional.conv2d(x,w,padding=padding).relu(),
  1411. lambda x,w: Tensor.conv2d(x,w,padding=padding).relu())
  1412. def test_padded_conv2d_p22(self):
  1413. bs,cin,H,W,padding = 4, 3, 3, 3, (2,2)
  1414. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1415. lambda x,w: torch.nn.functional.conv2d(x,w,padding=padding).relu(),
  1416. lambda x,w: Tensor.conv2d(x,w,padding=padding).relu())
  1417. def test_padded_conv2d_1x1(self):
  1418. bs,cin,H,W,padding = 4, 3, 1, 1, 2
  1419. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1420. lambda x,w: torch.nn.functional.conv2d(x,w,padding=padding).relu(),
  1421. lambda x,w: Tensor.conv2d(x,w,padding=padding).relu())
  1422. def test_padded_conv2d_bs1(self):
  1423. bs,cin,H,W,padding = 1, 3, 3, 3, 1
  1424. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1425. lambda x,w: torch.nn.functional.conv2d(x,w,padding=padding).relu(),
  1426. lambda x,w: Tensor.conv2d(x,w,padding=padding).relu())
  1427. def test_padding_add(self):
  1428. helper_test_op([(64,64), (60,60)],
  1429. lambda x,w: x+torch.nn.functional.pad(w, (2,2,2,2)),
  1430. lambda x,w: x+w.pad2d((2,2,2,2)))
  1431. def test_dilated_conv2d(self):
  1432. bs = 4
  1433. cin = 3
  1434. H,W = 3,3
  1435. for d in [2, (2,1)]:
  1436. with self.subTest(dilation := d):
  1437. helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
  1438. lambda x,w: torch.nn.functional.conv2d(x,w,dilation=dilation).relu(),
  1439. lambda x,w: Tensor.conv2d(x,w,dilation=dilation).relu())
  1440. def test_maxpool2d_simple(self):
  1441. ksz = (2,2)
  1442. helper_test_op([(1,1,2,3)],
  1443. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=ksz),
  1444. lambda x: Tensor.max_pool2d(x, kernel_size=ksz))
  1445. def test_maxpool2d(self):
  1446. for ksz in [(2,2), (3,3), 2, 3, (3,2), (5,5), (5,1)]:
  1447. with self.subTest(kernel_size=ksz):
  1448. helper_test_op([(32,2,110,28)],
  1449. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=ksz),
  1450. lambda x: Tensor.max_pool2d(x, kernel_size=ksz))
  1451. def test_maxpool2d_padding(self):
  1452. for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
  1453. with self.subTest(kernel_size=ksz):
  1454. helper_test_op([(32,2,110,28)],
  1455. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=ksz, padding=1),
  1456. lambda x: Tensor.max_pool2d(x, kernel_size=ksz, padding=1))
  1457. def test_maxpool2d_bigger_stride(self):
  1458. for stride in [(2,3), (3,2), 2, 3]:
  1459. with self.subTest(stride=stride):
  1460. helper_test_op([(32,2,110,28)],
  1461. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(2,2), stride=stride),
  1462. lambda x: Tensor.max_pool2d(x, kernel_size=(2,2), stride=stride))
  1463. def test_maxpool2d_bigger_stride_dilation(self):
  1464. for stride, dilation in zip([(2,3), (3,2), 2, 3, 4], [(3,2), (2,3), 2, 3, 6]):
  1465. with self.subTest(stride=stride):
  1466. helper_test_op([(32,2,110,28)],
  1467. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(2,2), stride=stride, dilation=dilation),
  1468. lambda x: Tensor.max_pool2d(x, kernel_size=(2,2), stride=stride, dilation=dilation))
  1469. @unittest.skipIf( Device.DEFAULT in {"CUDA", "NV"}, "CUDA fails on this")
  1470. def test_maxpool2d_unit_stride(self):
  1471. helper_test_op([(8, 2, 17, 14)],
  1472. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(5,5), stride=1),
  1473. lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), stride=1))
  1474. def test_maxpool2d_smaller_stride(self):
  1475. for stride in [(2,3), (3,2), 2, 3]:
  1476. with self.subTest(stride=stride):
  1477. helper_test_op([(8, 2, 17, 14)],
  1478. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(5,5), stride=stride),
  1479. lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), stride=stride))
  1480. def test_maxpool2d_dilation(self):
  1481. for dilation in [(2, 3), (3, 2), 2, 3]:
  1482. helper_test_op([(8, 2, 17, 14)],
  1483. lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(5,5), dilation=dilation),
  1484. lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), dilation=dilation))
  1485. def test_avgpool2d(self):
  1486. shape = (32,2,111,28)
  1487. for ksz in [(2,2), (3,3), (3,2), (5,5), (5,1)]:
  1488. with self.subTest(kernel_size=ksz):
  1489. helper_test_op([shape],
  1490. lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=ksz),
  1491. lambda x: Tensor.avg_pool2d(x, kernel_size=ksz), rtol=1e-5)
  1492. def test_avgpool2d_padding(self):
  1493. shape = (32,2,111,28)
  1494. for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
  1495. with self.subTest(kernel_size=ksz):
  1496. helper_test_op([shape],
  1497. lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=ksz, padding=1),
  1498. lambda x: Tensor.avg_pool2d(x, kernel_size=ksz, padding=1), rtol=1e-5)
  1499. def test_avgpool2d_padding_not_counted(self):
  1500. shape = (32,2,111,28)
  1501. for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
  1502. with self.subTest(kernel_size=ksz):
  1503. helper_test_op([shape],
  1504. lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=ksz, padding=1, count_include_pad=False),
  1505. lambda x: Tensor.avg_pool2d(x, kernel_size=ksz, padding=1, count_include_pad=False), rtol=1e-5)
  1506. def test_global_avgpool2d(self):
  1507. helper_test_op([(32,2,111,28)],
  1508. lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=(111,28)),
  1509. lambda x: Tensor.avg_pool2d(x, kernel_size=(111,28)), rtol=1e-5)
  1510. def test_interpolate_linear(self):
  1511. for in_sz, out_sz in [((52,),(29,)), ((29,),(52,))]:
  1512. helper_test_op([(2,3)+in_sz],
  1513. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="linear"),
  1514. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear"))
  1515. def test_interpolate_linear_corners_aligned(self):
  1516. for in_sz, out_sz in [((52,),(29,)), ((29,),(52,))]:
  1517. helper_test_op([(2,3)+in_sz],
  1518. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="linear", align_corners=True),
  1519. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear", align_corners=True))
  1520. def test_interpolate_bilinear(self):
  1521. for in_sz, out_sz in [((52,40),(29,31)), ((52,29),(31,40)), ((29,31),(40,52))]:
  1522. helper_test_op([(2,3)+in_sz],
  1523. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="bilinear"),
  1524. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear"), atol=1e-4)
  1525. def test_interpolate_bilinear_corners_aligned(self):
  1526. for in_sz, out_sz in [((52,40),(29,31)), ((52,29),(31,40)), ((29,31),(40,52))]:
  1527. helper_test_op([(2,3)+in_sz],
  1528. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="bilinear", align_corners=True),
  1529. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear", align_corners=True), atol=1e-4)
  1530. def test_interpolate_trilinear(self):
  1531. for in_sz, out_sz in [((5,2,8),(3,6,4))]:
  1532. helper_test_op([(2,3)+in_sz],
  1533. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="trilinear"),
  1534. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear"), atol=1e-4)
  1535. def test_interpolate_trilinear_corners_aligned(self):
  1536. for in_sz, out_sz in [((5,2,8),(3,6,4))]:
  1537. helper_test_op([(2,3)+in_sz],
  1538. lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="trilinear", align_corners=True),
  1539. lambda x: Tensor.interpolate(x, size=out_sz, mode="linear", align_corners=True), atol=1e-4)
  1540. def test_cat(self):
  1541. for dim in range(-2, 3):
  1542. helper_test_op([(45,65,9), (45,65,9), (45,65,9)], lambda x,y,z: torch.cat((x,y,z), dim), lambda x,y,z: x.cat(y, z, dim=dim))
  1543. # zero in non-cat axis
  1544. helper_test_op([(45,0,9), (45,0,9), (45,0,9)], lambda x,y,z: torch.cat((x,y,z), 0), lambda x,y,z: x.cat(y, z, dim=0))
  1545. # zero in cat axis
  1546. helper_test_op([(45,0,9), (45,1,9), (45,2,9)], lambda x,y,z: torch.cat((x,y,z), 1), lambda x,y,z: x.cat(y, z, dim=1))
  1547. helper_test_op([(45,0,9), (45,0,9), (45,0,9)], lambda x,y,z: torch.cat((x,y,z), 1), lambda x,y,z: x.cat(y, z, dim=1))
  1548. with self.assertRaises(IndexError):
  1549. a = Tensor(3.14)
  1550. a.cat(a)
  1551. def test_multicat(self):
  1552. for dim in range(-1, 2):
  1553. helper_test_op([(45,65), (45,65), (45,65)], lambda x,y,z: torch.cat((x,y,z), dim), lambda x,y,z: x.cat(y, z, dim=dim))
  1554. def test_stack(self):
  1555. for dim in range(-1, 3):
  1556. helper_test_op([(45,65,3), (45,65,3), (45,65,3)], lambda x, y, z: torch.stack((x, y, z), dim), lambda x, y, z: Tensor.stack(x, y, z, dim=dim))
  1557. with self.assertRaises(IndexError):
  1558. Tensor.stack(Tensor.randn(45, 65, 3), dim=77)
  1559. a = Tensor(3.14)
  1560. np.testing.assert_allclose(Tensor.stack(a, a).numpy(), Tensor([3.14, 3.14]).numpy())
  1561. def test_repeat(self):
  1562. x = Tensor.randn(4, 6, 3)
  1563. base_repeats = [2, 4, 3]
  1564. for reps in [[], [4], [2, 1], [3, 2, 2]]:
  1565. repeats = base_repeats + reps
  1566. helper_test_op([(4, 6, 3)], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
  1567. helper_test_op([()], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
  1568. with self.assertRaises(ValueError):
  1569. x.repeat((2, 4))
  1570. np.testing.assert_allclose(x.repeat((2, 0, 4)).numpy(), Tensor.zeros(8, 0, 12).numpy())
  1571. def test_repeat_interleave(self):
  1572. helper_test_op([(3, 3)], lambda x: x.repeat_interleave(6))
  1573. helper_test_op([(3, 3)], lambda x: x.repeat_interleave(2, 1))
  1574. helper_test_op([(3, 3)], lambda x: x.repeat_interleave(2, 0))
  1575. def test_simple_repeat(self):
  1576. repeats = [3, 3, 4]
  1577. helper_test_op([(3, 3)], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
  1578. def test_clip(self):
  1579. helper_test_op([(45,65)], lambda x: x.clip(-2.3, 1.2))
  1580. helper_test_op([(45,65)], lambda x: x.clip(0, 0))
  1581. helper_test_op([(45,65)], lambda x: x.clip(10, 100))
  1582. helper_test_op([(45,65)], lambda x: x.clip(0, 0.1))
  1583. helper_test_op([(45,65)], lambda x: x.clip(-0.3, -0.2))
  1584. helper_test_op([(45,65)], lambda x: x.clip(3, 0)) # min > max
  1585. helper_test_op([(45,65)], lambda x: x.clip(None, 0))
  1586. helper_test_op([(45,65)], lambda x: x.clip(0, None))
  1587. self.helper_test_exception([(45,65)], lambda x: x.clip(None, None), lambda x: x.clip(None, None), RuntimeError)
  1588. def test_matvecmat(self):
  1589. helper_test_op([(1,128), (128,128), (128,128)], lambda x,y,z: (x@y).relu()@z)
  1590. def test_matvec(self):
  1591. helper_test_op([(1,128), (128,128)], lambda x,y: (x@y).relu())
  1592. @unittest.skip("this test is broken #862")
  1593. def test_max_inf(self):
  1594. n = Tensor([1, float("nan")]).max().numpy()
  1595. assert math.isnan(n.item()), f"{n.item()} is not nan"
  1596. def test_inf_where(self):
  1597. x = Tensor.full((3, 3), float("inf"))
  1598. n = (x < 0).where(x, 1).numpy()
  1599. assert np.all(n == 1.)
  1600. def _get_index_randoms(self):
  1601. # indices cannot have gradient
  1602. a = torch.randint(low=-1, high=1, size=(2,1,1,1,1,1), dtype=torch.int64, requires_grad=False)
  1603. b = torch.randint(high=1, size=(1,3,1,1,1,1), dtype=torch.int64, requires_grad=False)
  1604. c = torch.randint(low=-5, high=5, size=(1,1,4,1,1,1), dtype=torch.int64, requires_grad=False)
  1605. d = torch.randint(high=4, size=(2,1,1,5,1,1), dtype=torch.int64, requires_grad=False)
  1606. e = torch.randint(high=1, size=(1,1,1,1,6,1), dtype=torch.int64, requires_grad=False)
  1607. i, j, k, o, p = [Tensor(tor.detach().numpy().astype(np.int32), requires_grad=False) for tor in [a,b,c,d,e]]
  1608. return a,b,c,d,e,i,j,k,o,p
  1609. def test_slice_fancy_indexing_no_dim_collapse(self):
  1610. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1611. # no dim collapse from int or dim injection from None
  1612. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,d,e], lambda x: x[i,j,k,o,p])
  1613. helper_test_op([(2,5,6,5,3,4)], lambda x: x[:,b,c,d,:], lambda x: x[:,j,k,o,:])
  1614. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,...], lambda x: x[i,j,...])
  1615. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,...,e], lambda x: x[i,...,p])
  1616. helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p])
  1617. def test_slice_fancy_indexing_dim_collapse_int(self):
  1618. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1619. # dim collapse from int
  1620. helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,c,d,e], lambda x: x[1,j,k,o,p])
  1621. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,3,d,e], lambda x: x[i,j,3,o,p])
  1622. helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,2,d,2], lambda x: x[1,j,2,o,2])
  1623. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,2,2,2,e], lambda x: x[i,2,2,2,p])
  1624. helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2])
  1625. def test_slice_fancy_indexing_dim_inject_none(self):
  1626. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1627. # dim injection from None
  1628. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,c,d,e], lambda x: x[None,j,k,o,p])
  1629. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,d,None], lambda x: x[i,j,k,o,None])
  1630. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,None,d,e], lambda x: x[i,j,None,o,p])
  1631. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,c,d,None], lambda x: x[None,j,k,o,None])
  1632. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,:,None,d,e], lambda x: x[i,:,None,o,p])
  1633. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,None,None,None,None], lambda x: x[None,None,None,None,None])
  1634. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,None,b,c,d,e], lambda x: x[None,None,j,k,o,p])
  1635. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,None,b,c,None,None], lambda x: x[None,None,j,k,None,None])
  1636. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,None,None,c,d,e], lambda x: x[i,None,None,k,o,p])
  1637. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,None,None,c,None,None], lambda x: x[i,None,None,k,None,None])
  1638. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,None,b,None,d,e], lambda x: x[None,None,j,None,o,p])
  1639. def test_slice_fancy_indexing_dim_inject_and_collapse(self):
  1640. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms() # noqa
  1641. # dim injection and collapse
  1642. helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,None,d,1], lambda x: x[1,j,None,o,1])
  1643. helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,2,d,None], lambda x: x[None,j,2,o,None])
  1644. helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,1,d,None], lambda x: x[...,1,o,None])
  1645. def test_slice_fancy_indexing_with_tensors(self):
  1646. # indexing using idx with different dim
  1647. helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,0,0],[0,0,0]]), torch.tensor(1)],
  1648. lambda x: x[Tensor([[0,0,0],[0,0,0]]), Tensor(1)])
  1649. helper_test_op([(2,3)], lambda x: x[torch.tensor([1]), torch.tensor([[0,0,0],[0,0,0]])],
  1650. lambda x: x[Tensor([1]), Tensor([[0,0,0],[0,0,0]])])
  1651. helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,0,0],[0,0,0]]), torch.tensor([2,1,1])],
  1652. lambda x: x[Tensor([[0,0,0],[0,0,0]]), Tensor([2,1,1])])
  1653. helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,1,-1],[-1,-2,0]]), torch.tensor([2,1,-1])],
  1654. lambda x: x[Tensor([[0,1,-1],[-1,-2,0]]), Tensor([2,1,-1])])
  1655. def test_slice_fancy_indexing_list_indices(self):
  1656. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1657. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[[0]]], lambda x: x[[[0]]])
  1658. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[0],b,c,d,:], lambda x: x[[0],j,k,o,:])
  1659. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[[[0]]],b,c,d,[[1]]], lambda x: x[[[[0]]],j,k,o,[[1]]])
  1660. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[1,0],b,c,d,:], lambda x: x[[1,0],j,k,o,:])
  1661. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,[1,2,3],...], lambda x: x[i,j,k,[1,2,3],...])
  1662. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,[[1],[2],[3]],...], lambda x: x[i,j,k,[[1],[2],[3]],...])
  1663. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,[2,1,0],c,[2,1,0],e], lambda x: x[i,[2,1,0],k,[2,1,0],p])
  1664. def test_slice_fancy_indexing_tuple_indices(self):
  1665. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1666. helper_test_op([(2,5,6,5,3,4)], lambda x: x[(((0,),),)], lambda x: x[(((0,),),)])
  1667. helper_test_op([(2,5,6,5,3,4)], lambda x: x[(0,),b,c,d,:], lambda x: x[(0,),j,k,o,:])
  1668. helper_test_op([(2,5,6,5,3,4)], lambda x: x[(1,0),b,c,d,:], lambda x: x[(1,0),j,k,o,:])
  1669. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,(1,2,3),...], lambda x: x[i,j,k,(1,2,3),...])
  1670. helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,((2,),(1,),(0,)),c,(2,1,0)], lambda x: x[i,((2,),(1,),(0,)),k,(2,1,0)])
  1671. helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,(2,1,0),None,c,(2,1,0),e], lambda x: x[1,(2,1,0),None,k,(2,1,0),p])
  1672. def test_slice_fancy_indexing_list_with_tensors(self):
  1673. a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
  1674. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a]], lambda x: x[[i]])
  1675. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a,1]], lambda x: x[[i,1]])
  1676. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a,[1,1]]], lambda x: x[[i,[1,1]]])
  1677. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a,(1,1)]], lambda x: x[[i,(1,1)]])
  1678. helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a,b,c,d,e]], lambda x: x[[i,j,k,o,p]])
  1679. def test_slice_fancy_indexing_errors(self):
  1680. a = Tensor.ones(10,11,12)
  1681. # tensors used as indices must be int tensors
  1682. with self.assertRaises(IndexError): a[Tensor(1.1)]
  1683. with self.assertRaises(IndexError): a[Tensor([True, True])]
  1684. # shape mismatch, cannot broadcast. either exception is okay
  1685. with self.assertRaises((IndexError, ValueError)): a[Tensor.randint(3,1,1,1), Tensor.randint(1,4,1,1), Tensor.randint(2,4,4,1)]
  1686. with self.assertRaises((IndexError, ValueError)): a[Tensor.randint(3,1,1,1), Tensor.randint(1,4,1,1,1)]
  1687. def test_gather(self):
  1688. # indices cannot have gradient
  1689. # indices cannot be negative (torch gather)
  1690. b = torch.randint(3, size=[3,4,5], dtype=torch.int64, requires_grad=False)
  1691. a = Tensor(b.detach().numpy().astype(np.int32), dtype=dtypes.int32, requires_grad=False)
  1692. helper_test_op([(4,5,6)], lambda x: x.gather(dim=0, index=b), lambda x: x.gather(dim=0, index=a))
  1693. helper_test_op([(4,5,6)], lambda x: x.gather(dim=1, index=b), lambda x: x.gather(dim=1, index=a))
  1694. helper_test_op([(4,5,6)], lambda x: x.gather(dim=2, index=b), lambda x: x.gather(dim=2, index=a))
  1695. helper_test_op([(3,4,5)], lambda x: x.gather(dim=0, index=b), lambda x: x.gather(dim=0, index=a))
  1696. helper_test_op([(4,5,6)], lambda x: x.gather(dim=-1, index=b), lambda x: x.gather(dim=-1, index=a))
  1697. helper_test_op([(4,5,6)], lambda x: x.gather(dim=-2, index=b), lambda x: x.gather(dim=-2, index=a))
  1698. helper_test_op([(4,5,6)], lambda x: x.gather(dim=-3, index=b), lambda x: x.gather(dim=-3, index=a))
  1699. self.helper_test_exception([(4,5,6)], lambda x: x.gather(dim=0, index=torch.tensor([1], dtype=torch.int64)),
  1700. lambda x: x.gather(dim=0, index=Tensor([1], dtype=dtypes.int32)), expected=(RuntimeError, AssertionError))
  1701. self.helper_test_exception([(2,1,1)], lambda x: x.gather(dim=0, index=b),
  1702. lambda x: x.gather(dim=0, index=a), expected=(RuntimeError, AssertionError))
  1703. helper_test_op(None, lambda x: x.gather(dim=0, index=torch.tensor([2, 1, 0, 1, 2], requires_grad=False)),
  1704. lambda x: x.gather(dim=0, index=Tensor([2, 1, 0, 1, 2])),
  1705. vals=[[1., 2., 3.]])
  1706. def test_scaled_product_attention(self):
  1707. helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64)], torch.nn.functional.scaled_dot_product_attention, Tensor.scaled_dot_product_attention)
  1708. helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64), (32,8,16,16)],
  1709. lambda x,y,z,m: torch.nn.functional.scaled_dot_product_attention(x,y,z,attn_mask=m),
  1710. lambda x,y,z,m: Tensor.scaled_dot_product_attention(x,y,z,attn_mask=m))
  1711. def test_scaled_product_attention_mismatch_ls(self):
  1712. helper_test_op([(32,8,4,64), (32,8,16,64), (32,8,16,64)], torch.nn.functional.scaled_dot_product_attention, Tensor.scaled_dot_product_attention)
  1713. def test_scaled_product_attention_causal(self):
  1714. helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64)],
  1715. lambda x,y,z: torch.nn.functional.scaled_dot_product_attention(x,y,z,is_causal=True),
  1716. lambda x,y,z: Tensor.scaled_dot_product_attention(x,y,z,is_causal=True))
  1717. def test_binary_crossentropy(self):
  1718. helper_test_op([(32,10), (32,10)], lambda x,y: torch.nn.functional.binary_cross_entropy(x.sigmoid(),torch.clip(y,0,1)),
  1719. lambda x,y: x.sigmoid().binary_crossentropy(y.clip(0,1)))
  1720. helper_test_op([(32,10), (32,10)], lambda x,y: torch.nn.functional.binary_cross_entropy_with_logits(x,torch.clip(y,0,1)),
  1721. lambda x,y: x.binary_crossentropy_logits(y.clip(0,1)))
  1722. helper_test_op([(32,10), (32,10)], lambda x,y: torch.nn.functional.binary_cross_entropy_with_logits(x,torch.clip(y,0,1)),
  1723. lambda x,y: x.sigmoid().binary_crossentropy(y.clip(0,1)))
  1724. helper_test_op([(32,10), (32,10)], lambda x,y: torch.nn.functional.binary_cross_entropy(x.sigmoid(),torch.clip(y,0,1)),
  1725. lambda x,y: x.binary_crossentropy_logits(y.clip(0,1)))
  1726. def test_one_hot(self):
  1727. data = [1, 2, 4]
  1728. helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data), 6).type(torch.int32),
  1729. lambda: Tensor(data).one_hot(6), forward_only=True)
  1730. data = [[[1, 2, 3], [0, 3, 5]], [[1, 2, 3], [0, 3, 5]]]
  1731. helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data), 8).type(torch.int32),
  1732. lambda: Tensor(data).one_hot(8), forward_only=True)
  1733. def test_masked_fill(self):
  1734. helper_test_op([(32,10)], lambda x: x.masked_fill((x>0.1).detach(), -math.inf))
  1735. helper_test_op([(32,10)], lambda x: x.masked_fill((x<0.1).detach(), -math.inf))
  1736. def test_cast(self):
  1737. helper_test_op([(3, 3)], lambda x: x.float())
  1738. helper_test_op(None, lambda x: x.float(), vals=[[0, 1, 2, 3]], forward_only=True)
  1739. helper_test_op(None, lambda x: x.float(), vals=[[True, False]], forward_only=True)
  1740. helper_test_op([(3, 3)], lambda x: x.int(), forward_only=True)
  1741. helper_test_op([(3, 3)], lambda x: x.bool(), forward_only=True)
  1742. if __name__ == '__main__':
  1743. np.random.seed(1337)
  1744. unittest.main(verbosity=2)