speed_beam_v_hcopt.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. from tinygrad import Device
  2. from tinygrad.helpers import getenv, DEBUG, BEAM
  3. from tinygrad.engine.search import beam_search, time_linearizer, bufs_from_lin
  4. from extra.optimization.helpers import load_worlds, ast_str_to_lin
  5. if __name__ == "__main__":
  6. filter_reduce = bool(getenv("FILTER_REDUCE"))
  7. ast_strs = load_worlds(filter_reduce=filter_reduce, filter_novariable=True)
  8. dev = Device[Device.DEFAULT]
  9. test_n = getenv("TEST_N", 10)
  10. single = getenv("NUM", -1)
  11. if single != -1: ast_strs = ast_strs[single:single+1]
  12. beam_won, tested = 0, 0
  13. for num, ast in enumerate(ast_strs[:test_n]):
  14. def new_lin(): return ast_str_to_lin(ast, opts=dev.renderer)
  15. k = new_lin()
  16. # k.required_optimizations()
  17. if not (used_tensor_cores:=k.apply_tensor_cores(getenv("TC", 1))): k.hand_coded_optimizations()
  18. assert BEAM > 0
  19. lins = [(("tc" if used_tensor_cores else "hc"), k)]
  20. if used_tensor_cores:
  21. lins.append(("hc", new_lin()))
  22. lins[-1][1].hand_coded_optimizations()
  23. kb = new_lin()
  24. # kb.required_optimizations()
  25. test_rawbuffers = bufs_from_lin(kb) # allocate scratch buffers for optimization
  26. lins.append((f"beam{BEAM.value}", beam_search(kb, test_rawbuffers, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))))
  27. timed = sorted([(nm, tk, time_linearizer(tk, test_rawbuffers, allow_test_size=False, clear_l2=True)) for nm, tk in lins], key=lambda x: x[2])
  28. if DEBUG >= 1: print(" < ".join(f"{nm:6s} : {lin.colored_shape(30, dense=True)} : {tm*1e6:8.2f} us" for nm, lin, tm in timed))
  29. tested += 1
  30. if timed[0][0].startswith("beam"):
  31. beam_won += 1
  32. print(f"{beam_won=} / {tested=} = {beam_won/tested:.3f}")