compile_tensorflow.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # An example to compile a small Tensorflow model to extremely portable C code
  2. import os, sys
  3. os.environ["CLANG"] = '1'
  4. os.environ["JIT"] = '2'
  5. import numpy as np
  6. import subprocess
  7. import tensorflow as tf
  8. import tf2onnx
  9. from extra.onnx import get_run_onnx
  10. from tinygrad.tensor import Tensor
  11. from extra.export_model import export_model_clang, compile_net, jit_model
  12. def get_uncompiled_model2(dataset_size=32, output_size=4):
  13. inputs = tf.keras.Input(shape=(dataset_size,), name="inputs")
  14. x = tf.keras.layers.Dense(16, activation="relu", name="dense_1")(inputs)
  15. x = tf.keras.layers.BatchNormalization()(x)
  16. x = tf.keras.layers.Dense(32, activation="relu", name="dense_2")(x)
  17. outputs = tf.keras.layers.Dense(output_size, activation="sigmoid", name="predictions")(x)
  18. model = tf.keras.Model(inputs=inputs, outputs=outputs)
  19. return model
  20. class TinyOnnx:
  21. def __init__(self, keras_model):
  22. input_signature = [tf.TensorSpec([1,32], tf.float32, name='x')]
  23. onnx_model, _ = tf2onnx.convert.from_keras(keras_model, input_signature, opset=13)
  24. self.run_onnx = get_run_onnx(onnx_model)
  25. def forward(self, x):
  26. return self.run_onnx({"x": x}, debug=False)['predictions']
  27. def compile_onnx_model(onnx_model):
  28. tinyonnx = TinyOnnx(onnx_model)
  29. the_input = Tensor.randn(1,32)
  30. run, special_names = jit_model(tinyonnx, the_input)
  31. functions, statements, bufs, bufs_to_save = compile_net(run, special_names)
  32. prg = export_model_clang(functions, statements, bufs, {}, ["input0"], ["output0"])
  33. the_output = run(the_input)
  34. cprog = ["#include <string.h>", "#include <stdio.h>", "#include <stdlib.h>"]
  35. cprog.append(prg)
  36. # weights
  37. cprog.append("void initialize(float *weights) {")
  38. weights = bytes()
  39. for name,cl in bufs_to_save.items():
  40. cprog.append(f"memcpy({name}, weights + {len(weights)//4}, {len(cl._buf)*4});")
  41. weights += bytes(cl._buf)
  42. cprog.append("}")
  43. # write the weights to disk
  44. with open("/tmp/tf_weights", "wb") as f:
  45. f.write(weights)
  46. # test program
  47. cprog.append(f"""int main(int argc, char *argv[]) {{
  48. // read in the weights from disk
  49. FILE *f = fopen("/tmp/tf_weights", "rb");
  50. float *weights = (float *)malloc({len(weights)});
  51. fread(weights, 1, {len(weights)}, f);
  52. fclose(f);
  53. // init the net
  54. initialize(weights);
  55. // test run
  56. float input[32];
  57. float outputs[4];
  58. for (int i = 0; i < 32; i++) scanf("%f", &input[i]);
  59. net(input, outputs);
  60. printf("%f %f %f %f\\n", outputs[0], outputs[1], outputs[2], outputs[3]);
  61. }}""")
  62. # ready the program
  63. prg = '\n'.join(cprog)
  64. print(prg)
  65. # add test weights
  66. subprocess.check_output(['clang', '-O2', '-lm', '-fPIC', '-x', 'c', '-', '-o', "/tmp/tf_test"], input=prg.encode('utf-8'))
  67. tinygrad_output = the_output[0].numpy()[0].tolist()
  68. print("tinygrad:", tinygrad_output, file=sys.stderr)
  69. c_input = ' '.join(["%f" % x for x in the_input[0].numpy()])+"\n"
  70. c_output = [float(x) for x in subprocess.check_output(["/tmp/tf_test"], input=c_input.encode('utf-8')).decode('utf-8').strip().split(" ")]
  71. print("compiled:", c_output, file=sys.stderr)
  72. np.testing.assert_allclose(tinygrad_output, c_output, atol=1e-5, rtol=1e-5)
  73. return the_input.numpy(), c_output
  74. if __name__ == "__main__":
  75. keras_model = get_uncompiled_model2()
  76. test_input, test_output = compile_onnx_model(keras_model)
  77. tf_output = keras_model(test_input).numpy()[0]
  78. print("keras: ", tf_output, file=sys.stderr)
  79. np.testing.assert_allclose(tf_output, test_output, atol=1e-5, rtol=1e-5)