#!/usr/bin/env python3 import os import sys import json import token import tokenize from datetime import datetime, timezone TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING] def is_docstring(t): return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""') def gen_stats(base_path="."): table = [] exo_path = os.path.join(base_path, "exo") if not os.path.exists(exo_path): print(f"Warning: {exo_path} directory not found") return table for path, _, files in os.walk(exo_path): for name in files: if not name.endswith(".py"): continue filepath = os.path.join(path, name) relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/') try: with tokenize.open(filepath) as file_: tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)] token_count = len(tokens) line_count = len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)])) if line_count > 0: table.append([relfilepath, line_count, token_count/line_count]) except Exception as e: print(f"Error processing {filepath}: {e}") continue return table def gen_diff(table_old, table_new): table = [] files_new = set([x[0] for x in table_new]) files_old = set([x[0] for x in table_old]) added = files_new - files_old deleted = files_old - files_new unchanged = files_new & files_old for file in added: file_stat = [stats for stats in table_new if file in stats][0] table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]]) for file in deleted: file_stat = [stats for stats in table_old if file in stats][0] table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]]) for file in unchanged: file_stat_old = [stats for stats in table_old if file in stats][0] file_stat_new = [stats for stats in table_new if file in stats][0] if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]: table.append([ file_stat_new[0], file_stat_new[1], file_stat_new[1] - file_stat_old[1], file_stat_new[2], file_stat_new[2] - file_stat_old[2] ]) return table def create_json_report(table, is_diff=False): timestamp = datetime.now(timezone.utc).isoformat() commit_sha = os.environ.get('GITHUB_SHA', 'unknown') branch = os.environ.get('GITHUB_REF_NAME', 'unknown') pr_number = os.environ.get('GITHUB_EVENT_NUMBER', '') if is_diff: files = [{ 'name': row[0], 'current_lines': row[1], 'line_diff': row[2], 'current_tokens_per_line': row[3], 'tokens_per_line_diff': row[4] } for row in table] report = { 'type': 'diff', 'timestamp': timestamp, 'commit_sha': commit_sha, 'branch': branch, 'pr_number': pr_number, 'files': files, 'total_line_changes': sum(row[2] for row in table), 'total_files_changed': len(files) } else: files = [{ 'name': row[0], 'lines': row[1], 'tokens_per_line': row[2] } for row in table] report = { 'type': 'snapshot', 'timestamp': timestamp, 'commit_sha': commit_sha, 'branch': branch, 'files': files, 'total_lines': sum(row[1] for row in table), 'total_files': len(files) } return report def display_diff(diff): return "+" + str(diff) if diff > 0 else str(diff) def format_table(rows, headers, floatfmt): if not rows: return "" # Add headers as first row all_rows = [headers] + rows # Calculate column widths col_widths = [] for col in range(len(headers)): col_width = max(len(str(row[col])) for row in all_rows) col_widths.append(col_width) # Format rows output = [] for row_idx, row in enumerate(all_rows): formatted_cols = [] for col_idx, (value, width) in enumerate(zip(row, col_widths)): if isinstance(value, float): # Handle float formatting based on floatfmt fmt = floatfmt[col_idx] if fmt.startswith('+'): value = f"{value:+.1f}" else: value = f"{value:.1f}" elif isinstance(value, int) and col_idx > 0: # Skip filename column # Handle integer formatting based on floatfmt fmt = floatfmt[col_idx] if fmt.startswith('+'): value = f"{value:+d}" else: value = f"{value:d}" formatted_cols.append(str(value).ljust(width)) output.append(" ".join(formatted_cols)) # Add separator line after headers if row_idx == 0: separator = [] for width in col_widths: separator.append("-" * width) output.append(" ".join(separator)) return "\n".join(output) if __name__ == "__main__": if len(sys.argv) == 3: # Comparing two directories headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"] table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2])) if table: # Print table output print("### Code Changes in 'exo' Directory") print("```") print(format_table( sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True), headers, (".1f", "d", "+d", ".1f", "+.1f") )) total_changes = sum(row[2] for row in table) print(f"\nTotal line changes: {display_diff(total_changes)}") print("```") # Generate JSON report report = create_json_report(table, is_diff=True) with open('line-count-diff.json', 'w') as f: json.dump(report, f, indent=2) else: # Single directory analysis headers = ["File", "Lines", "Tokens/Line"] table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".") if table: # Print table output print("### Code Statistics for 'exo' Directory") print("```") print(format_table( sorted(table, key=lambda x: x[1], reverse=True), headers, (".1f", "d", ".1f") )) total_lines = sum(row[1] for row in table) print(f"\nTotal lines: {total_lines}") print("```") # Generate JSON report report = create_json_report(table, is_diff=False) with open('line-count-snapshot.json', 'w') as f: json.dump(report, f, indent=2)