1
0

abstractions2.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # tinygrad is a tensor library, and as a tensor library it has multiple parts
  2. # 1. a "runtime". this allows buffer management, compilation, and running programs
  3. # 2. a "Device" that uses the runtime but specifies compute in an abstract way for all
  4. # 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed
  5. # 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"
  6. print("******** first, the runtime ***********")
  7. from tinygrad.runtime.ops_clang import ClangProgram, ClangCompiler, MallocAllocator
  8. # allocate some buffers
  9. out = MallocAllocator.alloc(4)
  10. a = MallocAllocator.alloc(4)
  11. b = MallocAllocator.alloc(4)
  12. # load in some values (little endian)
  13. MallocAllocator.copyin(a, bytearray([2,0,0,0]))
  14. MallocAllocator.copyin(b, bytearray([3,0,0,0]))
  15. # compile a program to a binary
  16. lib = ClangCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }")
  17. # create a runtime for the program (ctypes.CDLL)
  18. fxn = ClangProgram("add", lib)
  19. # run the program
  20. fxn(out, a, b)
  21. # check the data out
  22. print(val := MallocAllocator.as_buffer(out).cast("I").tolist()[0])
  23. assert val == 5
  24. print("******** second, the Device ***********")
  25. DEVICE = "CLANG" # NOTE: you can change this!
  26. import struct
  27. from tinygrad.dtype import dtypes
  28. from tinygrad.device import Buffer, Device
  29. from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps, MetaOps
  30. from tinygrad.shape.shapetracker import ShapeTracker
  31. # allocate some buffers + load in values
  32. out = Buffer(DEVICE, 1, dtypes.int32).allocate()
  33. a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
  34. b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
  35. # NOTE: a._buf is the same as the return from MallocAllocator.alloc
  36. # describe the computation
  37. ld_1 = LazyOp(BufferOps.LOAD, (), MemBuffer(1, dtypes.int32, ShapeTracker.from_shape((1,))))
  38. ld_2 = LazyOp(BufferOps.LOAD, (), MemBuffer(2, dtypes.int32, ShapeTracker.from_shape((1,))))
  39. alu = LazyOp(BinaryOps.ADD, (ld_1, ld_2))
  40. st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.from_shape((1,))))
  41. sink = LazyOp(MetaOps.KERNEL, (st_0,))
  42. # convert the computation to a "linearized" format (print the format)
  43. from tinygrad.engine.realize import get_kernel, CompiledRunner
  44. lin = get_kernel(Device[DEVICE].renderer, sink).linearize()
  45. for u in lin.uops: print(u)
  46. # compile a program (and print the source)
  47. fxn = CompiledRunner(lin.to_program())
  48. print(fxn.p.src)
  49. # NOTE: fxn.clprg is the ClangProgram
  50. # run the program
  51. fxn.exec([out, a, b])
  52. # check the data out
  53. assert out.as_buffer().cast('I')[0] == 5
  54. print("******** third, the LazyBuffer ***********")
  55. from tinygrad.lazy import LazyBuffer
  56. from tinygrad.engine.realize import run_schedule
  57. from tinygrad.engine.schedule import create_schedule
  58. # allocate some values + load in values
  59. a = LazyBuffer.metaop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)
  60. b = LazyBuffer.metaop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)
  61. a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
  62. b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
  63. del a.srcs
  64. del b.srcs
  65. # describe the computation
  66. out = a.e(BinaryOps.ADD, b)
  67. # schedule the computation as a list of kernels
  68. sched = create_schedule([out])
  69. for si in sched: print(si.ast.op) # NOTE: the first two convert it to CLANG
  70. # DEBUGGING: print the compute ast as a tree
  71. from tinygrad.engine.graph import print_tree
  72. print_tree(sched[-1].ast)
  73. # NOTE: sched[-1].ast is the same as st_0 above
  74. # run that schedule
  75. run_schedule(sched)
  76. # check the data out
  77. assert out.realized.as_buffer().cast('I')[0] == 5
  78. print("******** fourth, the Tensor ***********")
  79. from tinygrad import Tensor
  80. a = Tensor([2], dtype=dtypes.int32, device=DEVICE)
  81. b = Tensor([3], dtype=dtypes.int32, device=DEVICE)
  82. out = a + b
  83. # check the data out
  84. print(val:=out.item())
  85. assert val == 5