sz.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. #!/usr/bin/env python3
  2. import os, sys
  3. import token
  4. import tokenize
  5. import itertools
  6. from tabulate import tabulate
  7. TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
  8. def is_docstring(t):
  9. return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
  10. def gen_stats(base_path="."):
  11. table = []
  12. for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
  13. for name in files:
  14. if not name.endswith(".py"): continue
  15. if 'tinygrad/runtime/autogen' in path: continue
  16. filepath = os.path.join(path, name)
  17. relfilepath = os.path.relpath(filepath, base_path)
  18. with tokenize.open(filepath) as file_:
  19. tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
  20. token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
  21. if line_count > 0: table.append([relfilepath, line_count, token_count/line_count])
  22. return table
  23. def gen_diff(table_old, table_new):
  24. table = []
  25. files_new = set([x[0] for x in table_new])
  26. files_old = set([x[0] for x in table_old])
  27. added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old
  28. if added:
  29. for file in added:
  30. file_stat = [stats for stats in table_new if file in stats]
  31. table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0])
  32. if deleted:
  33. for file in deleted:
  34. file_stat = [stats for stats in table_old if file in stats]
  35. table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]])
  36. if unchanged:
  37. for file in unchanged:
  38. file_stat_old = [stats for stats in table_old if file in stats]
  39. file_stat_new = [stats for stats in table_new if file in stats]
  40. if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0:
  41. table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2],
  42. file_stat_new[0][2]-file_stat_old[0][2]])
  43. return table
  44. def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff)
  45. if __name__ == "__main__":
  46. if len(sys.argv) == 3:
  47. headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"]
  48. table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
  49. elif len(sys.argv) == 2:
  50. headers = ["Name", "Lines", "Tokens/Line"]
  51. table = gen_stats(sys.argv[1])
  52. else:
  53. headers = ["Name", "Lines", "Tokens/Line"]
  54. table = gen_stats(".")
  55. if table:
  56. if len(sys.argv) == 3:
  57. print("### Changes")
  58. print("```")
  59. print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"),
  60. floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n")
  61. print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}")
  62. print("```")
  63. else:
  64. print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
  65. for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]):
  66. print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
  67. total_lines = sum([x[1] for x in table])
  68. print(f"\ntotal line count: {total_lines}")
  69. max_line_count = int(os.getenv("MAX_LINE_COUNT", "-1"))
  70. assert max_line_count == -1 or total_lines < max_line_count, f"OVER {max_line_count} LINES"