123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- #!/usr/bin/env python3
- import os
- import sys
- import json
- import token
- import tokenize
- from datetime import datetime, timezone
- TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
- def is_docstring(t):
- return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
- def gen_stats(base_path="."):
- table = []
- exo_path = os.path.join(base_path, "exo")
- if not os.path.exists(exo_path):
- print(f"Warning: {exo_path} directory not found")
- return table
- for path, _, files in os.walk(exo_path):
- for name in files:
- if not name.endswith(".py"):
- continue
- filepath = os.path.join(path, name)
- relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
- try:
- with tokenize.open(filepath) as file_:
- tokens = [t for t in tokenize.generate_tokens(file_.readline)
- if t.type in TOKEN_WHITELIST and not is_docstring(t)]
- token_count = len(tokens)
- line_count = len(set([x for t in tokens
- for x in range(t.start[0], t.end[0]+1)]))
- if line_count > 0:
- table.append([relfilepath, line_count, token_count/line_count])
- except Exception as e:
- print(f"Error processing {filepath}: {e}")
- continue
- return table
- def gen_diff(table_old, table_new):
- table = []
- files_new = set([x[0] for x in table_new])
- files_old = set([x[0] for x in table_old])
- added = files_new - files_old
- deleted = files_old - files_new
- unchanged = files_new & files_old
- for file in added:
- file_stat = [stats for stats in table_new if file in stats][0]
- table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
- for file in deleted:
- file_stat = [stats for stats in table_old if file in stats][0]
- table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
- for file in unchanged:
- file_stat_old = [stats for stats in table_old if file in stats][0]
- file_stat_new = [stats for stats in table_new if file in stats][0]
- if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
- table.append([
- file_stat_new[0],
- file_stat_new[1],
- file_stat_new[1] - file_stat_old[1],
- file_stat_new[2],
- file_stat_new[2] - file_stat_old[2]
- ])
- return table
- def create_json_report(table, is_diff=False):
- timestamp = datetime.now(timezone.utc).isoformat()
- commit_sha = os.environ.get('GITHUB_SHA', 'unknown')
- branch = os.environ.get('GITHUB_REF_NAME', 'unknown')
- pr_number = os.environ.get('GITHUB_EVENT_NUMBER', '')
- if is_diff:
- files = [{
- 'name': row[0],
- 'current_lines': row[1],
- 'line_diff': row[2],
- 'current_tokens_per_line': row[3],
- 'tokens_per_line_diff': row[4]
- } for row in table]
- report = {
- 'type': 'diff',
- 'timestamp': timestamp,
- 'commit_sha': commit_sha,
- 'branch': branch,
- 'pr_number': pr_number,
- 'files': files,
- 'total_line_changes': sum(row[2] for row in table),
- 'total_files_changed': len(files)
- }
- else:
- files = [{
- 'name': row[0],
- 'lines': row[1],
- 'tokens_per_line': row[2]
- } for row in table]
- report = {
- 'type': 'snapshot',
- 'timestamp': timestamp,
- 'commit_sha': commit_sha,
- 'branch': branch,
- 'files': files,
- 'total_lines': sum(row[1] for row in table),
- 'total_files': len(files)
- }
- return report
- def display_diff(diff):
- return "+" + str(diff) if diff > 0 else str(diff)
- def format_table(rows, headers, floatfmt):
- if not rows:
- return ""
- # Add headers as first row
- all_rows = [headers] + rows
- # Calculate column widths
- col_widths = []
- for col in range(len(headers)):
- col_width = max(len(str(row[col])) for row in all_rows)
- col_widths.append(col_width)
- # Format rows
- output = []
- for row_idx, row in enumerate(all_rows):
- formatted_cols = []
- for col_idx, (value, width) in enumerate(zip(row, col_widths)):
- if isinstance(value, float):
- # Handle float formatting based on floatfmt
- fmt = floatfmt[col_idx]
- if fmt.startswith('+'):
- value = f"{value:+.1f}"
- else:
- value = f"{value:.1f}"
- elif isinstance(value, int) and col_idx > 0: # Skip filename column
- # Handle integer formatting based on floatfmt
- fmt = floatfmt[col_idx]
- if fmt.startswith('+'):
- value = f"{value:+d}"
- else:
- value = f"{value:d}"
- formatted_cols.append(str(value).ljust(width))
- output.append(" ".join(formatted_cols))
- # Add separator line after headers
- if row_idx == 0:
- separator = []
- for width in col_widths:
- separator.append("-" * width)
- output.append(" ".join(separator))
- return "\n".join(output)
- if __name__ == "__main__":
- if len(sys.argv) == 3:
- # Comparing two directories
- headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
- table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
- if table:
- # Print table output
- print("### Code Changes in 'exo' Directory")
- print("```")
- print(format_table(
- sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
- headers,
- (".1f", "d", "+d", ".1f", "+.1f")
- ))
- total_changes = sum(row[2] for row in table)
- print(f"\nTotal line changes: {display_diff(total_changes)}")
- print("```")
- # Generate JSON report
- report = create_json_report(table, is_diff=True)
- with open('line-count-diff.json', 'w') as f:
- json.dump(report, f, indent=2)
- else:
- # Single directory analysis
- headers = ["File", "Lines", "Tokens/Line"]
- table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
- if table:
- # Print table output
- print("### Code Statistics for 'exo' Directory")
- print("```")
- print(format_table(
- sorted(table, key=lambda x: x[1], reverse=True),
- headers,
- (".1f", "d", ".1f")
- ))
- total_lines = sum(row[1] for row in table)
- print(f"\nTotal lines: {total_lines}")
- print("```")
- # Generate JSON report
- report = create_json_report(table, is_diff=False)
- with open('line-count-snapshot.json', 'w') as f:
- json.dump(report, f, indent=2)
|