|
@@ -0,0 +1,210 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import json
|
|
|
+import token
|
|
|
+import tokenize
|
|
|
+from datetime import datetime, timezone
|
|
|
+
|
|
|
+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
|
|
|
+
|
|
|
+def is_docstring(t):
|
|
|
+ return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
|
|
|
+
|
|
|
+def gen_stats(base_path="."):
|
|
|
+ table = []
|
|
|
+ exo_path = os.path.join(base_path, "exo")
|
|
|
+ if not os.path.exists(exo_path):
|
|
|
+ print(f"Warning: {exo_path} directory not found")
|
|
|
+ return table
|
|
|
+
|
|
|
+ for path, _, files in os.walk(exo_path):
|
|
|
+ for name in files:
|
|
|
+ if not name.endswith(".py"):
|
|
|
+ continue
|
|
|
+
|
|
|
+ filepath = os.path.join(path, name)
|
|
|
+ relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
|
|
|
+
|
|
|
+ try:
|
|
|
+ with tokenize.open(filepath) as file_:
|
|
|
+ tokens = [t for t in tokenize.generate_tokens(file_.readline)
|
|
|
+ if t.type in TOKEN_WHITELIST and not is_docstring(t)]
|
|
|
+ token_count = len(tokens)
|
|
|
+ line_count = len(set([x for t in tokens
|
|
|
+ for x in range(t.start[0], t.end[0]+1)]))
|
|
|
+ if line_count > 0:
|
|
|
+ table.append([relfilepath, line_count, token_count/line_count])
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Error processing {filepath}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ return table
|
|
|
+
|
|
|
+def gen_diff(table_old, table_new):
|
|
|
+ table = []
|
|
|
+ files_new = set([x[0] for x in table_new])
|
|
|
+ files_old = set([x[0] for x in table_old])
|
|
|
+
|
|
|
+ added = files_new - files_old
|
|
|
+ deleted = files_old - files_new
|
|
|
+ unchanged = files_new & files_old
|
|
|
+
|
|
|
+ for file in added:
|
|
|
+ file_stat = [stats for stats in table_new if file in stats][0]
|
|
|
+ table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
|
|
|
+
|
|
|
+ for file in deleted:
|
|
|
+ file_stat = [stats for stats in table_old if file in stats][0]
|
|
|
+ table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
|
|
|
+
|
|
|
+ for file in unchanged:
|
|
|
+ file_stat_old = [stats for stats in table_old if file in stats][0]
|
|
|
+ file_stat_new = [stats for stats in table_new if file in stats][0]
|
|
|
+ if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
|
|
|
+ table.append([
|
|
|
+ file_stat_new[0],
|
|
|
+ file_stat_new[1],
|
|
|
+ file_stat_new[1] - file_stat_old[1],
|
|
|
+ file_stat_new[2],
|
|
|
+ file_stat_new[2] - file_stat_old[2]
|
|
|
+ ])
|
|
|
+
|
|
|
+ return table
|
|
|
+
|
|
|
+def create_json_report(table, is_diff=False):
|
|
|
+ timestamp = datetime.now(timezone.utc).isoformat()
|
|
|
+ commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
|
|
|
+ branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
|
|
|
+ pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
|
|
|
+
|
|
|
+ if is_diff:
|
|
|
+ files = [{
|
|
|
+ 'name': row[0],
|
|
|
+ 'current_lines': row[1],
|
|
|
+ 'line_diff': row[2],
|
|
|
+ 'current_tokens_per_line': row[3],
|
|
|
+ 'tokens_per_line_diff': row[4]
|
|
|
+ } for row in table]
|
|
|
+
|
|
|
+ report = {
|
|
|
+ 'type': 'diff',
|
|
|
+ 'timestamp': timestamp,
|
|
|
+ 'commit_sha': commit_sha,
|
|
|
+ 'branch': branch,
|
|
|
+ 'pr_number': pr_number,
|
|
|
+ 'files': files,
|
|
|
+ 'total_line_changes': sum(row[2] for row in table),
|
|
|
+ 'total_files_changed': len(files)
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ files = [{
|
|
|
+ 'name': row[0],
|
|
|
+ 'lines': row[1],
|
|
|
+ 'tokens_per_line': row[2]
|
|
|
+ } for row in table]
|
|
|
+
|
|
|
+ report = {
|
|
|
+ 'type': 'snapshot',
|
|
|
+ 'timestamp': timestamp,
|
|
|
+ 'commit_sha': commit_sha,
|
|
|
+ 'branch': branch,
|
|
|
+ 'files': files,
|
|
|
+ 'total_lines': sum(row[1] for row in table),
|
|
|
+ 'total_files': len(files)
|
|
|
+ }
|
|
|
+
|
|
|
+ return report
|
|
|
+
|
|
|
+def display_diff(diff):
|
|
|
+ return "+" + str(diff) if diff > 0 else str(diff)
|
|
|
+
|
|
|
+def format_table(rows, headers, floatfmt):
|
|
|
+ if not rows:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ # Add headers as first row
|
|
|
+ all_rows = [headers] + rows
|
|
|
+
|
|
|
+ # Calculate column widths
|
|
|
+ col_widths = []
|
|
|
+ for col in range(len(headers)):
|
|
|
+ col_width = max(len(str(row[col])) for row in all_rows)
|
|
|
+ col_widths.append(col_width)
|
|
|
+
|
|
|
+ # Format rows
|
|
|
+ output = []
|
|
|
+ for row_idx, row in enumerate(all_rows):
|
|
|
+ formatted_cols = []
|
|
|
+ for col_idx, (value, width) in enumerate(zip(row, col_widths)):
|
|
|
+ if isinstance(value, float):
|
|
|
+ # Handle float formatting based on floatfmt
|
|
|
+ fmt = floatfmt[col_idx]
|
|
|
+ if fmt.startswith('+'):
|
|
|
+ value = f"{value:+.1f}"
|
|
|
+ else:
|
|
|
+ value = f"{value:.1f}"
|
|
|
+ elif isinstance(value, int) and col_idx > 0: # Skip filename column
|
|
|
+ # Handle integer formatting based on floatfmt
|
|
|
+ fmt = floatfmt[col_idx]
|
|
|
+ if fmt.startswith('+'):
|
|
|
+ value = f"{value:+d}"
|
|
|
+ else:
|
|
|
+ value = f"{value:d}"
|
|
|
+ formatted_cols.append(str(value).ljust(width))
|
|
|
+ output.append(" ".join(formatted_cols))
|
|
|
+
|
|
|
+ # Add separator line after headers
|
|
|
+ if row_idx == 0:
|
|
|
+ separator = []
|
|
|
+ for width in col_widths:
|
|
|
+ separator.append("-" * width)
|
|
|
+ output.append(" ".join(separator))
|
|
|
+
|
|
|
+ return "\n".join(output)
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ if len(sys.argv) == 3:
|
|
|
+ # Comparing two directories
|
|
|
+ headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
|
|
|
+ table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
|
|
|
+
|
|
|
+ if table:
|
|
|
+ # Print table output
|
|
|
+ print("### Code Changes in 'exo' Directory")
|
|
|
+ print("```")
|
|
|
+ print(format_table(
|
|
|
+ sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
|
|
|
+ headers,
|
|
|
+ (".1f", "d", "+d", ".1f", "+.1f")
|
|
|
+ ))
|
|
|
+ total_changes = sum(row[2] for row in table)
|
|
|
+ print(f"\nTotal line changes: {display_diff(total_changes)}")
|
|
|
+ print("```")
|
|
|
+
|
|
|
+ # Generate JSON report
|
|
|
+ report = create_json_report(table, is_diff=True)
|
|
|
+ with open('line-count-diff.json', 'w') as f:
|
|
|
+ json.dump(report, f, indent=2)
|
|
|
+ else:
|
|
|
+ # Single directory analysis
|
|
|
+ headers = ["File", "Lines", "Tokens/Line"]
|
|
|
+ table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
|
|
|
+
|
|
|
+ if table:
|
|
|
+ # Print table output
|
|
|
+ print("### Code Statistics for 'exo' Directory")
|
|
|
+ print("```")
|
|
|
+ print(format_table(
|
|
|
+ sorted(table, key=lambda x: x[1], reverse=True),
|
|
|
+ headers,
|
|
|
+ (".1f", "d", ".1f")
|
|
|
+ ))
|
|
|
+ total_lines = sum(row[1] for row in table)
|
|
|
+ print(f"\nTotal lines: {total_lines}")
|
|
|
+ print("```")
|
|
|
+
|
|
|
+ # Generate JSON report
|
|
|
+ report = create_json_report(table, is_diff=False)
|
|
|
+ with open('line-count-snapshot.json', 'w') as f:
|
|
|
+ json.dump(report, f, indent=2)
|