test_net.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import numpy as np
  2. import math
  3. import random
  4. np.set_printoptions(suppress=True)
  5. from copy import deepcopy
  6. from tinygrad.helpers import getenv, colored
  7. from tinygrad.tensor import Tensor
  8. from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
  9. from tinygrad.engine.search import bufs_from_lin, time_linearizer, actions, get_kernel_actions
  10. from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats
  11. from extra.optimization.extract_policynet import PolicyNet
  12. from extra.optimization.pretrain_valuenet import ValueNet
  13. VALUE = getenv("VALUE")
  14. if __name__ == "__main__":
  15. if VALUE:
  16. net = ValueNet()
  17. load_state_dict(net, safe_load("/tmp/valuenet.safetensors"))
  18. else:
  19. net = PolicyNet()
  20. load_state_dict(net, safe_load("/tmp/policynet.safetensors"))
  21. ast_strs = load_worlds()
  22. # real randomness
  23. random.seed()
  24. random.shuffle(ast_strs)
  25. wins = 0
  26. for ep_num,ast_str in enumerate(ast_strs):
  27. print("\nEPISODE", ep_num, f"win {wins*100/max(1,ep_num):.2f}%")
  28. lin = ast_str_to_lin(ast_str)
  29. rawbufs = bufs_from_lin(lin)
  30. linhc = deepcopy(lin)
  31. linhc.hand_coded_optimizations()
  32. tmhc = time_linearizer(linhc, rawbufs)
  33. print(f"{tmhc*1e6:10.2f} HC ", linhc.colored_shape())
  34. pred_time = float('nan')
  35. tm = float('inf')
  36. while 1:
  37. if VALUE:
  38. acts,feats = [], []
  39. for k,v in get_kernel_actions(lin).items():
  40. acts.append(k)
  41. feats.append(lin_to_feats(v))
  42. preds = net(Tensor(feats))
  43. pred_time = math.exp(preds.numpy().min())
  44. act = acts[preds.numpy().argmin()]
  45. else:
  46. probs = net(Tensor([lin_to_feats(lin)]))
  47. dist = probs.exp().numpy()
  48. act = dist.argmax()
  49. if act == 0: break
  50. try:
  51. lin.apply_opt(actions[act-1])
  52. except Exception:
  53. print("FAILED")
  54. break
  55. tm = time_linearizer(lin, rawbufs)
  56. print(f"{tm*1e6:10.2f} {pred_time*1e6:10.2f}", lin.colored_shape())
  57. print(f"{colored('BEAT', 'green') if tm < tmhc else colored('lost', 'red')} hand coded {tmhc/tm:5.2f}x")
  58. wins += int(tm < tmhc)