transformer.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!/usr/bin/env python3
  2. import numpy as np
  3. import random
  4. from tinygrad.nn.state import get_parameters
  5. from tinygrad.nn.optim import Adam
  6. from extra.training import train, evaluate
  7. from extra.models.transformer import Transformer
  8. # dataset idea from https://github.com/karpathy/minGPT/blob/master/projects/adder/adder.py
  9. def make_dataset():
  10. ds = []
  11. for i in range(100):
  12. for j in range(100):
  13. s = i+j
  14. ds.append([i//10, i%10, j//10, j%10, s//100, (s//10)%10, s%10])
  15. random.shuffle(ds)
  16. ds = np.array(ds).astype(np.float32)
  17. ds_X = ds[:, 0:6]
  18. ds_Y = np.copy(ds[:, 1:])
  19. ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
  20. ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]
  21. return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test
  22. if __name__ == "__main__":
  23. model = Transformer(10, 6, 2, 128, 4, 32)
  24. X_train, Y_train, X_test, Y_test = make_dataset()
  25. lr = 0.003
  26. for i in range(10):
  27. optim = Adam(get_parameters(model), lr=lr)
  28. train(model, X_train, Y_train, optim, 50, BS=64, allow_jit=True)
  29. acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True)
  30. lr /= 1.2
  31. print(f'reducing lr to {lr:.4f}')
  32. if acc > 0.998:
  33. wrong=0
  34. for k in range(len(Y_test_preds)):
  35. if (Y_test_preds[k] != Y_test[k]).any():
  36. wrong+=1
  37. a,b,c,x = X_test[k,:2].astype(np.int32), X_test[k,2:4].astype(np.int32), Y_test[k,-3:].astype(np.int32), Y_test_preds[k,-3:].astype(np.int32)
  38. print(f'{a[0]}{a[1]} + {b[0]}{b[1]} = {x[0]}{x[1]}{x[2]} (correct: {c[0]}{c[1]}{c[2]})')
  39. print(f'Wrong predictions: {wrong}, acc = {acc:.4f}')