external_test_hcq.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. import unittest, ctypes, struct, time, array
  2. from tinygrad import Device, Tensor, dtypes
  3. from tinygrad.helpers import to_mv, CI
  4. from tinygrad.device import Buffer, BufferOptions
  5. from tinygrad.engine.schedule import create_schedule
  6. from tinygrad.engine.realize import get_runner
  7. def _time_queue(q, d):
  8. st = time.perf_counter()
  9. q.signal(d.timeline_signal, d.timeline_value)
  10. q.submit(d)
  11. d._wait_signal(d.timeline_signal, d.timeline_value)
  12. d.timeline_value += 1
  13. return time.perf_counter() - st
  14. @unittest.skipUnless(Device.DEFAULT in ["NV", "AMD"], "Runs only on NV or AMD")
  15. class TestHCQ(unittest.TestCase):
  16. @classmethod
  17. def setUpClass(self):
  18. TestHCQ.d0 = Device[Device.DEFAULT]
  19. #TestHCQ.d1: AMDDevice = Device["AMD:1"]
  20. TestHCQ.a = Tensor([0.,1.], device=Device.DEFAULT).realize()
  21. TestHCQ.b = self.a + 1
  22. si = create_schedule([self.b.lazydata])[-1]
  23. TestHCQ.runner = get_runner(TestHCQ.d0.dname, si.ast)
  24. TestHCQ.b.lazydata.buffer.allocate()
  25. # wow that's a lot of abstraction layers
  26. TestHCQ.addr = struct.pack("QQ", TestHCQ.b.lazydata.buffer._buf.va_addr, TestHCQ.a.lazydata.buffer._buf.va_addr)
  27. TestHCQ.addr2 = struct.pack("QQ", TestHCQ.a.lazydata.buffer._buf.va_addr, TestHCQ.b.lazydata.buffer._buf.va_addr)
  28. TestHCQ.kernargs_off = TestHCQ.runner.clprg.kernargs_offset
  29. TestHCQ.kernargs_size = TestHCQ.runner.clprg.kernargs_alloc_size
  30. ctypes.memmove(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_off, TestHCQ.addr, len(TestHCQ.addr))
  31. ctypes.memmove(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size+TestHCQ.kernargs_off, TestHCQ.addr2, len(TestHCQ.addr2))
  32. if Device.DEFAULT == "AMD":
  33. from tinygrad.runtime.ops_amd import HWCopyQueue, HWPM4Queue
  34. TestHCQ.compute_queue = HWPM4Queue
  35. TestHCQ.copy_queue = HWCopyQueue
  36. elif Device.DEFAULT == "NV":
  37. from tinygrad.runtime.ops_nv import HWCopyQueue, HWComputeQueue
  38. # nv need to copy constbuffer there as well
  39. to_mv(TestHCQ.d0.kernargs_ptr, 0x160).cast('I')[:] = array.array('I', TestHCQ.runner.clprg.constbuffer_0)
  40. to_mv(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, 0x160).cast('I')[:] = array.array('I', TestHCQ.runner.clprg.constbuffer_0)
  41. TestHCQ.compute_queue = HWComputeQueue
  42. TestHCQ.copy_queue = HWCopyQueue
  43. def setUp(self):
  44. TestHCQ.d0.synchronize()
  45. TestHCQ.a.lazydata.buffer.copyin(memoryview(bytearray(struct.pack("ff", 0, 1))))
  46. TestHCQ.b.lazydata.buffer.copyin(memoryview(bytearray(struct.pack("ff", 0, 0))))
  47. TestHCQ.d0.synchronize() # wait for copyins to complete
  48. def test_run_1000_times_one_submit(self):
  49. temp_signal, temp_value = TestHCQ.d0._alloc_signal(value=0), 0
  50. q = TestHCQ.compute_queue()
  51. for _ in range(1000):
  52. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  53. q.signal(temp_signal, temp_value + 1).wait(temp_signal, temp_value + 1)
  54. temp_value += 1
  55. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  56. q.signal(temp_signal, temp_value + 1).wait(temp_signal, temp_value + 1)
  57. temp_value += 1
  58. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  59. q.submit(TestHCQ.d0)
  60. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  61. TestHCQ.d0.timeline_value += 1
  62. assert (val:=TestHCQ.a.lazydata.buffer.as_buffer().cast("f")[0]) == 2000.0, f"got val {val}"
  63. def test_run_1000_times(self):
  64. temp_signal = TestHCQ.d0._alloc_signal(value=0)
  65. q = TestHCQ.compute_queue()
  66. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  67. q.signal(temp_signal, 2).wait(temp_signal, 2)
  68. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, TestHCQ.runner.p.global_size,
  69. TestHCQ.runner.p.local_size)
  70. for _ in range(1000):
  71. TestHCQ.d0._set_signal(temp_signal, 1)
  72. q.submit(TestHCQ.d0)
  73. TestHCQ.compute_queue().signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  74. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  75. TestHCQ.d0.timeline_value += 1
  76. assert (val:=TestHCQ.a.lazydata.buffer.as_buffer().cast("f")[0]) == 2000.0, f"got val {val}"
  77. def test_run_to_3(self):
  78. temp_signal = TestHCQ.d0._alloc_signal(value=0)
  79. q = TestHCQ.compute_queue()
  80. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  81. q.signal(temp_signal, 1).wait(temp_signal, 1)
  82. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  83. q.signal(temp_signal, 2).wait(temp_signal, 2)
  84. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  85. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  86. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  87. TestHCQ.d0.timeline_value += 1
  88. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 3.0, f"got val {val}"
  89. def test_update_exec(self):
  90. q = TestHCQ.compute_queue()
  91. exec_cmd_idx = len(q)
  92. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  93. q.update_exec(exec_cmd_idx, (1,1,1), (1,1,1))
  94. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  95. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  96. TestHCQ.d0.timeline_value += 1
  97. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  98. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[1]) == 0.0, f"got val {val}, should not be updated"
  99. @unittest.skipUnless(Device.DEFAULT == "NV", "Only NV supports bind")
  100. def test_bind_run(self):
  101. temp_signal = TestHCQ.d0._alloc_signal(value=0)
  102. q = TestHCQ.compute_queue()
  103. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  104. q.signal(temp_signal, 2).wait(temp_signal, 2)
  105. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, TestHCQ.runner.p.global_size,
  106. TestHCQ.runner.p.local_size)
  107. q.bind(TestHCQ.d0)
  108. for _ in range(1000):
  109. TestHCQ.d0._set_signal(temp_signal, 1)
  110. q.submit(TestHCQ.d0)
  111. TestHCQ.compute_queue().signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  112. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  113. TestHCQ.d0.timeline_value += 1
  114. assert (val:=TestHCQ.a.lazydata.buffer.as_buffer().cast("f")[0]) == 2000.0, f"got val {val}"
  115. @unittest.skipUnless(Device.DEFAULT == "NV", "Only NV supports bind")
  116. def test_update_exec_binded(self):
  117. q = TestHCQ.compute_queue()
  118. exec_ptr = q.ptr()
  119. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  120. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  121. q.bind(TestHCQ.d0)
  122. q.update_exec(exec_ptr, (1,1,1), (1,1,1))
  123. q.submit(TestHCQ.d0)
  124. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  125. TestHCQ.d0.timeline_value += 1
  126. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  127. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[1]) == 0.0, f"got val {val}, should not be updated"
  128. @unittest.skipIf(CI, "Can't handle async update on CPU")
  129. def test_wait_signal(self):
  130. temp_signal = TestHCQ.d0._alloc_signal(value=0)
  131. TestHCQ.compute_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  132. with self.assertRaises(RuntimeError):
  133. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=50)
  134. # clean up
  135. TestHCQ.d0._set_signal(temp_signal, 1)
  136. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=100)
  137. TestHCQ.d0.timeline_value += 1
  138. @unittest.skipIf(CI, "Can't handle async update on CPU")
  139. def test_wait_copy_signal(self):
  140. temp_signal = TestHCQ.d0._alloc_signal(value=0)
  141. TestHCQ.copy_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  142. with self.assertRaises(RuntimeError):
  143. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=50)
  144. # clean up
  145. TestHCQ.d0._set_signal(temp_signal, 1)
  146. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=100)
  147. TestHCQ.d0.timeline_value += 1
  148. def test_run_normal(self):
  149. q = TestHCQ.compute_queue()
  150. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  151. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  152. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  153. TestHCQ.d0.timeline_value += 1
  154. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  155. def test_submit_empty_queues(self):
  156. TestHCQ.compute_queue().submit(TestHCQ.d0)
  157. TestHCQ.copy_queue().submit(TestHCQ.d0)
  158. def test_signal_timeout(self):
  159. with self.assertRaises(RuntimeError):
  160. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=50)
  161. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value + 122, timeout=50)
  162. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value - 1, timeout=50)
  163. def test_signal(self):
  164. new_timeline_value = TestHCQ.d0.timeline_value + 0xff
  165. TestHCQ.compute_queue().signal(TestHCQ.d0.timeline_signal, new_timeline_value).submit(TestHCQ.d0)
  166. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, new_timeline_value)
  167. TestHCQ.d0.timeline_value = new_timeline_value + 1 # update to not break runtime
  168. def test_copy_signal(self):
  169. new_timeline_value = TestHCQ.d0.timeline_value + 0xff
  170. TestHCQ.copy_queue().signal(TestHCQ.d0.timeline_signal, new_timeline_value).submit(TestHCQ.d0)
  171. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, new_timeline_value)
  172. TestHCQ.d0.timeline_value = new_timeline_value + 1 # update to not break runtime
  173. def test_run_signal(self):
  174. q = TestHCQ.compute_queue()
  175. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  176. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  177. q.submit(TestHCQ.d0)
  178. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  179. TestHCQ.d0.timeline_value += 1
  180. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  181. def test_copy_1000_times(self):
  182. q = TestHCQ.copy_queue()
  183. q.copy(TestHCQ.a.lazydata.buffer._buf.va_addr, TestHCQ.b.lazydata.buffer._buf.va_addr, 8)
  184. q.copy(TestHCQ.b.lazydata.buffer._buf.va_addr, TestHCQ.a.lazydata.buffer._buf.va_addr, 8)
  185. for _ in range(1000):
  186. q.submit(TestHCQ.d0)
  187. TestHCQ.copy_queue().signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  188. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  189. TestHCQ.d0.timeline_value += 1
  190. # confirm the signal didn't exceed the put value
  191. with self.assertRaises(RuntimeError):
  192. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value + 1, timeout=50)
  193. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[1]) == 0.0, f"got val {val}"
  194. def test_copy(self):
  195. q = TestHCQ.copy_queue()
  196. q.copy(TestHCQ.b.lazydata.buffer._buf.va_addr, TestHCQ.a.lazydata.buffer._buf.va_addr, 8)
  197. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  198. q.submit(TestHCQ.d0)
  199. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  200. TestHCQ.d0.timeline_value += 1
  201. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[1]) == 1.0, f"got val {val}"
  202. @unittest.skipUnless(Device.DEFAULT == "NV", "Only NV supports bind")
  203. def test_bind_copy(self):
  204. q = TestHCQ.copy_queue()
  205. q.copy(TestHCQ.a.lazydata.buffer._buf.va_addr, TestHCQ.b.lazydata.buffer._buf.va_addr, 8)
  206. q.copy(TestHCQ.b.lazydata.buffer._buf.va_addr, TestHCQ.a.lazydata.buffer._buf.va_addr, 8)
  207. q.bind(TestHCQ.d0)
  208. for _ in range(1000):
  209. q.submit(TestHCQ.d0)
  210. TestHCQ.copy_queue().signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  211. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  212. TestHCQ.d0.timeline_value += 1
  213. # confirm the signal didn't exceed the put value
  214. with self.assertRaises(RuntimeError):
  215. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value + 1, timeout=50)
  216. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[1]) == 0.0, f"got val {val}"
  217. def test_copy_bandwidth(self):
  218. # THEORY: the bandwidth is low here because it's only using one SDMA queue. I suspect it's more stable like this at least.
  219. SZ = 2_000_000_000
  220. a = Buffer(Device.DEFAULT, SZ, dtypes.uint8, options=BufferOptions(nolru=True)).allocate()
  221. b = Buffer(Device.DEFAULT, SZ, dtypes.uint8, options=BufferOptions(nolru=True)).allocate()
  222. q = TestHCQ.copy_queue()
  223. q.copy(a._buf.va_addr, b._buf.va_addr, SZ)
  224. et = _time_queue(q, TestHCQ.d0)
  225. gb_s = (SZ/1e9)/et
  226. print(f"same device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
  227. assert (0.3 if CI else 10) <= gb_s <= 1000
  228. def test_cross_device_copy_bandwidth(self):
  229. SZ = 2_000_000_000
  230. b = Buffer(f"{Device.DEFAULT}:1", SZ, dtypes.uint8, options=BufferOptions(nolru=True)).allocate()
  231. a = Buffer(Device.DEFAULT, SZ, dtypes.uint8, options=BufferOptions(nolru=True)).allocate()
  232. TestHCQ.d0._gpu_map(b._buf)
  233. q = TestHCQ.copy_queue()
  234. q.copy(a._buf.va_addr, b._buf.va_addr, SZ)
  235. et = _time_queue(q, TestHCQ.d0)
  236. gb_s = (SZ/1e9)/et
  237. print(f"cross device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
  238. assert (0.3 if CI else 2) <= gb_s <= 50
  239. def test_interleave_compute_and_copy(self):
  240. q = TestHCQ.compute_queue()
  241. qc = TestHCQ.copy_queue()
  242. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size) # b = [1, 2]
  243. q.signal(sig:=TestHCQ.d0._alloc_signal(value=0), value=1)
  244. qc.wait(sig, value=1)
  245. qc.copy(TestHCQ.a.lazydata.buffer._buf.va_addr, TestHCQ.b.lazydata.buffer._buf.va_addr, 8)
  246. qc.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  247. qc.submit(TestHCQ.d0)
  248. time.sleep(0.02) # give it time for the wait to fail
  249. q.submit(TestHCQ.d0)
  250. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  251. TestHCQ.d0.timeline_value += 1
  252. assert (val:=TestHCQ.a.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  253. def test_cross_device_signal(self):
  254. d1 = Device[f"{Device.DEFAULT}:1"]
  255. q1 = TestHCQ.compute_queue()
  256. q2 = TestHCQ.compute_queue()
  257. q1.signal(sig:=TestHCQ.d0._alloc_signal(value=0), value=0xfff)
  258. q2.wait(sig, value=0xfff)
  259. q2.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  260. q2.submit(TestHCQ.d0)
  261. q1.signal(d1.timeline_signal, d1.timeline_value)
  262. q1.submit(d1)
  263. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  264. TestHCQ.d0.timeline_value += 1
  265. d1._wait_signal(d1.timeline_signal, d1.timeline_value)
  266. d1.timeline_value += 1
  267. def test_timeline_signal_rollover(self):
  268. # NV 64bit, AMD 32bit
  269. TestHCQ.d0.timeline_value = (1 << 64) - 20 if Device.DEFAULT == "NV" else (1 << 32) - 20 # close value to reset
  270. TestHCQ.compute_queue().signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value - 1).submit(TestHCQ.d0)
  271. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value - 1)
  272. for _ in range(40):
  273. q = TestHCQ.compute_queue()
  274. q.wait(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value - 1)
  275. q.exec(TestHCQ.runner.clprg, TestHCQ.d0.kernargs_ptr, TestHCQ.runner.p.global_size, TestHCQ.runner.p.local_size)
  276. q.signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
  277. TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value)
  278. TestHCQ.d0.timeline_value += 1
  279. assert (val:=TestHCQ.b.lazydata.buffer.as_buffer().cast("f")[0]) == 1.0, f"got val {val}"
  280. if __name__ == "__main__":
  281. unittest.main()