to_movement_ops.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. import itertools
  2. from enum import Enum, auto
  3. from collections import defaultdict
  4. from typing import List, Tuple, DefaultDict
  5. from extra.optimization.helpers import load_worlds, ast_str_to_ast
  6. from tinygrad.ops import BufferOps, LazyOp
  7. from tinygrad.helpers import prod, tqdm
  8. from tinygrad.shape.shapetracker import ShapeTracker
  9. from tinygrad.shape.symbolic import sym_infer, Node
  10. class MovementOps(Enum): RESHAPE = auto(); PERMUTE = auto(); EXPAND = auto(); PAD = auto(); SHRINK = auto(); STRIDE = auto(); AS_STRIDED = auto() # noqa: E702
  11. def apply_mop(st: ShapeTracker, mop_arg: Tuple[MovementOps, Tuple]) -> ShapeTracker:
  12. mop, arg = mop_arg
  13. if mop == MovementOps.RESHAPE:
  14. # shapetracker doesn't allow flattening with -1 but required for MovementOps.RESHAPE
  15. if arg == (-1,): return st.reshape((prod(st.views[-1].shape),))
  16. return st.reshape(arg)
  17. if mop == MovementOps.PERMUTE: return st.permute(arg)
  18. if mop == MovementOps.EXPAND:
  19. if len(arg) != len(st.shape): st = st.reshape((1,*st.shape))
  20. return st.expand(arg)
  21. if mop == MovementOps.PAD: return st.pad(arg)
  22. if mop == MovementOps.SHRINK: return st.shrink(arg)
  23. if mop == MovementOps.STRIDE: return st.stride(arg)
  24. raise ValueError("invalid mop")
  25. def make_scratch_st(st: ShapeTracker) -> ShapeTracker:
  26. return ShapeTracker.from_shape((get_buffer_size(st.views[0].shape, st.views[0].strides, st.views[0].offset, st.views[0].mask),))
  27. # ShapeTracker to an equivalent series of MovementOps (https://github.com/tinygrad/tinygrad/pull/2216)
  28. def to_movement_ops(st: ShapeTracker) -> List[Tuple[MovementOps, Tuple]]:
  29. to_apply:List[Tuple[MovementOps, Tuple]] = []
  30. for i, v in enumerate(st.views):
  31. real_shape = tuple(y-x for x,y in v.mask) if v.mask else v.shape
  32. offset = v.offset + sum(st*(s-1) for s,st in zip(real_shape, v.strides) if st<0)
  33. real_offset = offset + (sum(x*st for (x,_),st in zip(v.mask, v.strides)) if v.mask else 0)
  34. real_real_shape = [s for s,st in zip(real_shape, v.strides) if st]
  35. strides: List[Node|int] = [abs(st) if isinstance(st,int) else st for st in v.strides if st]
  36. buffer_size = sum((s-1)*st for s,st in zip(real_real_shape,strides)) + 1
  37. if i: buffer_size = prod(st.views[i-1].shape) - real_offset
  38. def sort_by_strides(shape, strides): return sorted(zip(shape, strides), key=lambda k: (k[1],-k[0]), reverse=True), sorted(range(len(strides)), key=lambda k: (strides[k],-real_real_shape[k]), reverse=True)
  39. ordered_shape_strides, order = sort_by_strides(real_real_shape, strides)
  40. to_apply.extend([(MovementOps.RESHAPE, (-1,)), (MovementOps.SHRINK, ((real_offset, real_offset+buffer_size),))])
  41. if strides:
  42. if (ordered_shape_strides[0][0]*ordered_shape_strides[0][1])-buffer_size>0: to_apply.append((MovementOps.PAD, ((0, (ordered_shape_strides[0][0] * ordered_shape_strides[0][1]) - buffer_size),)))
  43. for i, shape_stride in enumerate(ordered_shape_strides):
  44. if i<len(ordered_shape_strides)-1 and shape_stride[1] < ordered_shape_strides[i+1][0]*ordered_shape_strides[i+1][1]:
  45. remaining_buffer = ordered_shape_strides[i-1][1] if i>0 else buffer_size
  46. to_apply.append((MovementOps.EXPAND, (shape_stride[0], *(s[0] for s in ordered_shape_strides[:i]), remaining_buffer)))
  47. to_apply.append((MovementOps.PERMUTE, (*range(1,i+1), 0, i+1)))
  48. to_apply.append((MovementOps.RESHAPE, (*(s[0] for s in ordered_shape_strides[:i]), shape_stride[0]*remaining_buffer)))
  49. to_apply.append((MovementOps.PAD, (*((0,0) for _ in range(i)), (0, shape_stride[0]*shape_stride[1]))))
  50. to_apply.append((MovementOps.RESHAPE, (*(s[0] for s in ordered_shape_strides[:i+1]), remaining_buffer+shape_stride[1])))
  51. ordered_shape_strides[i] = (ordered_shape_strides[i][0], remaining_buffer+shape_stride[1])
  52. else:
  53. to_apply.append((MovementOps.SHRINK, (*((0, s[0]) for s in ordered_shape_strides[:i]), (0, shape_stride[0]*shape_stride[1]))))
  54. to_apply.append((MovementOps.RESHAPE, (*[s[0] for s in ordered_shape_strides[:i+1]], shape_stride[1])))
  55. to_apply.extend([(MovementOps.SHRINK, (*[(0, s[0]) for s in ordered_shape_strides], (0,1))), (MovementOps.RESHAPE, tuple(s[0] for s in ordered_shape_strides))])
  56. if order != list(range(len(order))): to_apply.append((MovementOps.PERMUTE, tuple(order.index(i) for i in range(len(strides)))))
  57. to_apply.append((MovementOps.RESHAPE, tuple(s if st else 1 for s,st in zip(real_shape, v.strides))))
  58. if any(i<0 for i in v.strides): to_apply.append((MovementOps.STRIDE, tuple(-1 if st<0 else 1 for st in v.strides)))
  59. # then, we apply pre expand pads
  60. if v.mask is not None:
  61. pre_expand_pads = tuple((x,s-y) if st != 0 else (0,0) for (x,y),s,st in zip(v.mask, v.shape, v.strides))
  62. post_expand_pads = tuple((x,s-y) if st == 0 else (0,0) for (x,y),s,st in zip(v.mask, v.shape, v.strides))
  63. if any(x != (0,0) for x in pre_expand_pads):
  64. to_apply.append((MovementOps.PAD, pre_expand_pads))
  65. real_shape = tuple(x+s[0]+s[1] for x,s in zip(real_shape, pre_expand_pads))
  66. # then, we do any expands
  67. if any(s != 1 and st == 0 for s,st in zip(real_shape, v.strides)): to_apply.append((MovementOps.EXPAND, real_shape))
  68. # lastly, we apply post expand pads
  69. if v.mask is not None and any(x != (0,0) for x in post_expand_pads): to_apply.append((MovementOps.PAD, post_expand_pads))
  70. scratch_st = make_scratch_st(st)
  71. ret = []
  72. seen = {} # {shapetracker: list of mops to generate that shapetracker}
  73. for mop_arg in to_apply:
  74. scratch_st = apply_mop(scratch_st, mop_arg)
  75. if scratch_st in seen:
  76. ret = seen[scratch_st][:]
  77. else:
  78. ret.append(mop_arg)
  79. seen[scratch_st] = ret[:]
  80. return ret
  81. def get_real_view(shape, strides, offset, mask):
  82. real_shape = tuple(y-x for x,y in mask) if mask else shape
  83. offset = offset + sum(st * (s-1) for s,st in zip(real_shape, strides) if st<0)
  84. real_offset = offset + (sum(x*st for (x,_),st in zip(mask, strides)) if mask else 0)
  85. real_real_shape = [s for s,st in zip(real_shape, strides) if st]
  86. strides = [abs(st) if isinstance(st,int) else st for st in strides if st]
  87. return real_real_shape, strides, real_offset
  88. def get_buffer_size(shape, strides, offset, mask):
  89. real_real_shape, strides, real_offset = get_real_view(shape, strides, offset, mask)
  90. return real_offset + sum((s-1)*st for s, st in zip(real_real_shape,strides)) + 1
  91. def st_equivalent(st1: ShapeTracker, st2: ShapeTracker):
  92. if (idxs1:=st1.expr_idxs()) == (idxs2:=st2.expr_idxs()): return True
  93. idx1, valid1 = idxs1
  94. idx2, valid2 = idxs2
  95. # always invalid
  96. if valid1 == 0 and valid2 == 0: return True
  97. var1 = idx1.vars() | valid1.vars()
  98. var2 = idx2.vars() | valid2.vars()
  99. # Maybe there are cases that vars are different yet the sts are the same?
  100. if var1 != var2: return False
  101. # brute force over the vars range
  102. vs = list(var1)
  103. for i, ranges in enumerate(itertools.product(*[range(v.min, v.max+1) for v in vs])):
  104. if i > 1000:
  105. print("WARNING: did not search all possible combinations")
  106. break
  107. var_vals = {k:v for k,v in zip(vs, ranges)}
  108. r1 = sym_infer(idx1, var_vals) if sym_infer(valid1, var_vals) else 0
  109. r2 = sym_infer(idx2, var_vals) if sym_infer(valid2, var_vals) else 0
  110. if r1 != r2: return False
  111. return True
  112. c: DefaultDict[int,int] = defaultdict(int)
  113. def test_rebuild(st: ShapeTracker):
  114. rebuilt_st = make_scratch_st(st)
  115. mops = to_movement_ops(st)
  116. c[len(mops)] += 1
  117. for mop_arg in mops: rebuilt_st = apply_mop(rebuilt_st, mop_arg)
  118. rebuilt_st = rebuilt_st.simplify()
  119. # why is the "all(x == 0 for x in rebuilt_st.views[-1].strides)" hack needed?
  120. assert st_equivalent(st, rebuilt_st) or all(x == 0 for x in rebuilt_st.views[-1].strides), f"mismatch {st} {rebuilt_st}"
  121. last_v1 = st.views[-1]
  122. last_v2 = rebuilt_st.views[-1]
  123. assert last_v1.shape == last_v2.shape, f"{last_v1.shape} != {last_v2.shape}"
  124. def test_rebuild_bufferop_st(ast:LazyOp):
  125. if ast.op in BufferOps:
  126. test_rebuild(ast.arg.st)
  127. for src in ast.src: test_rebuild_bufferop_st(src)
  128. if __name__ == "__main__":
  129. ast_strs = load_worlds(False, False, True)[:2000]
  130. for ast_str in tqdm(ast_strs):
  131. test_rebuild_bufferop_st(ast_str_to_ast(ast_str))
  132. print(f"avg length of mop = {sum(k*v for k,v in c.items()) / sum(c.values()):.2f}")