1
0

line_counter.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import json
  5. import token
  6. import tokenize
  7. from datetime import datetime, timezone
  8. TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
  9. def is_docstring(t):
  10. return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
  11. def gen_stats(base_path="."):
  12. table = []
  13. exo_path = os.path.join(base_path, "exo")
  14. if not os.path.exists(exo_path):
  15. print(f"Warning: {exo_path} directory not found")
  16. return table
  17. for path, _, files in os.walk(exo_path):
  18. for name in files:
  19. if not name.endswith(".py"):
  20. continue
  21. filepath = os.path.join(path, name)
  22. relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
  23. try:
  24. with tokenize.open(filepath) as file_:
  25. tokens = [t for t in tokenize.generate_tokens(file_.readline)
  26. if t.type in TOKEN_WHITELIST and not is_docstring(t)]
  27. token_count = len(tokens)
  28. line_count = len(set([x for t in tokens
  29. for x in range(t.start[0], t.end[0]+1)]))
  30. if line_count > 0:
  31. table.append([relfilepath, line_count, token_count/line_count])
  32. except Exception as e:
  33. print(f"Error processing {filepath}: {e}")
  34. continue
  35. return table
  36. def gen_diff(table_old, table_new):
  37. table = []
  38. files_new = set([x[0] for x in table_new])
  39. files_old = set([x[0] for x in table_old])
  40. added = files_new - files_old
  41. deleted = files_old - files_new
  42. unchanged = files_new & files_old
  43. for file in added:
  44. file_stat = [stats for stats in table_new if file in stats][0]
  45. table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
  46. for file in deleted:
  47. file_stat = [stats for stats in table_old if file in stats][0]
  48. table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
  49. for file in unchanged:
  50. file_stat_old = [stats for stats in table_old if file in stats][0]
  51. file_stat_new = [stats for stats in table_new if file in stats][0]
  52. if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
  53. table.append([
  54. file_stat_new[0],
  55. file_stat_new[1],
  56. file_stat_new[1] - file_stat_old[1],
  57. file_stat_new[2],
  58. file_stat_new[2] - file_stat_old[2]
  59. ])
  60. return table
  61. def create_json_report(table, is_diff=False):
  62. timestamp = datetime.now(timezone.utc).isoformat()
  63. commit_sha = os.environ.get('GITHUB_SHA', 'unknown')
  64. branch = os.environ.get('GITHUB_REF_NAME', 'unknown')
  65. pr_number = os.environ.get('GITHUB_EVENT_NUMBER', '')
  66. if is_diff:
  67. files = [{
  68. 'name': row[0],
  69. 'current_lines': row[1],
  70. 'line_diff': row[2],
  71. 'current_tokens_per_line': row[3],
  72. 'tokens_per_line_diff': row[4]
  73. } for row in table]
  74. report = {
  75. 'type': 'diff',
  76. 'timestamp': timestamp,
  77. 'commit_sha': commit_sha,
  78. 'branch': branch,
  79. 'pr_number': pr_number,
  80. 'files': files,
  81. 'total_line_changes': sum(row[2] for row in table),
  82. 'total_files_changed': len(files)
  83. }
  84. else:
  85. files = [{
  86. 'name': row[0],
  87. 'lines': row[1],
  88. 'tokens_per_line': row[2]
  89. } for row in table]
  90. report = {
  91. 'type': 'snapshot',
  92. 'timestamp': timestamp,
  93. 'commit_sha': commit_sha,
  94. 'branch': branch,
  95. 'files': files,
  96. 'total_lines': sum(row[1] for row in table),
  97. 'total_files': len(files)
  98. }
  99. return report
  100. def display_diff(diff):
  101. return "+" + str(diff) if diff > 0 else str(diff)
  102. def format_table(rows, headers, floatfmt):
  103. if not rows:
  104. return ""
  105. # Add headers as first row
  106. all_rows = [headers] + rows
  107. # Calculate column widths
  108. col_widths = []
  109. for col in range(len(headers)):
  110. col_width = max(len(str(row[col])) for row in all_rows)
  111. col_widths.append(col_width)
  112. # Format rows
  113. output = []
  114. for row_idx, row in enumerate(all_rows):
  115. formatted_cols = []
  116. for col_idx, (value, width) in enumerate(zip(row, col_widths)):
  117. if isinstance(value, float):
  118. # Handle float formatting based on floatfmt
  119. fmt = floatfmt[col_idx]
  120. if fmt.startswith('+'):
  121. value = f"{value:+.1f}"
  122. else:
  123. value = f"{value:.1f}"
  124. elif isinstance(value, int) and col_idx > 0: # Skip filename column
  125. # Handle integer formatting based on floatfmt
  126. fmt = floatfmt[col_idx]
  127. if fmt.startswith('+'):
  128. value = f"{value:+d}"
  129. else:
  130. value = f"{value:d}"
  131. formatted_cols.append(str(value).ljust(width))
  132. output.append(" ".join(formatted_cols))
  133. # Add separator line after headers
  134. if row_idx == 0:
  135. separator = []
  136. for width in col_widths:
  137. separator.append("-" * width)
  138. output.append(" ".join(separator))
  139. return "\n".join(output)
  140. if __name__ == "__main__":
  141. if len(sys.argv) == 3:
  142. # Comparing two directories
  143. headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
  144. table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
  145. if table:
  146. # Print table output
  147. print("### Code Changes in 'exo' Directory")
  148. print("```")
  149. print(format_table(
  150. sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
  151. headers,
  152. (".1f", "d", "+d", ".1f", "+.1f")
  153. ))
  154. total_changes = sum(row[2] for row in table)
  155. print(f"\nTotal line changes: {display_diff(total_changes)}")
  156. print("```")
  157. # Generate JSON report
  158. report = create_json_report(table, is_diff=True)
  159. with open('line-count-diff.json', 'w') as f:
  160. json.dump(report, f, indent=2)
  161. else:
  162. # Single directory analysis
  163. headers = ["File", "Lines", "Tokens/Line"]
  164. table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
  165. if table:
  166. # Print table output
  167. print("### Code Statistics for 'exo' Directory")
  168. print("```")
  169. print(format_table(
  170. sorted(table, key=lambda x: x[1], reverse=True),
  171. headers,
  172. (".1f", "d", ".1f")
  173. ))
  174. total_lines = sum(row[1] for row in table)
  175. print(f"\nTotal lines: {total_lines}")
  176. print("```")
  177. # Generate JSON report
  178. report = create_json_report(table, is_diff=False)
  179. with open('line-count-snapshot.json', 'w') as f:
  180. json.dump(report, f, indent=2)