Browse Source

Merge pull request #493 from exo-explore/linecount

Continuously measure line count
Alex Cheema 5 months ago
parent
commit
4f13aeee9b
3 changed files with 469 additions and 105 deletions
  1. 57 0
      .circleci/config.yml
  2. 202 105
      extra/dashboard/dashboard.py
  3. 210 0
      extra/line_counter.py

+ 57 - 0
.circleci/config.yml

@@ -269,11 +269,68 @@ jobs:
           path: ./pipsize.json
           path: ./pipsize.json
           destination: pip-sizes.json
           destination: pip-sizes.json
 
 
+  check_line_count:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+
+      - run:
+          name: Setup git for PR comparison
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
+              BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
+                "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
+                | jq -r '.target_branch')
+
+              git clone -b $BASE_BRANCH --single-branch \
+                https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
+                base_branch
+            fi
+
+      - run:
+          name: Install dependencies
+          command: |
+            python -m pip install --upgrade pip
+            pip install tabulate
+
+      - run:
+          name: Run line count check
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
+              python extra/line_counter.py base_branch .
+            else
+              python extra/line_counter.py .
+            fi
+
+      - store_artifacts:
+          path: line-count-snapshot.json
+          destination: line-count-snapshot.json
+
+      - store_artifacts:
+          path: line-count-diff.json
+          destination: line-count-diff.json
+
+      - run:
+          name: Create test results directory
+          command: |
+            mkdir -p test-results/line-count
+            cp line-count-*.json test-results/line-count/
+
+      - store_test_results:
+          path: test-results
 
 
 workflows:
 workflows:
   version: 2
   version: 2
   build_and_test:
   build_and_test:
     jobs:
     jobs:
+      - check_line_count:
+          filters:
+            branches:
+              only: /.*/
+            tags:
+              only: /.*/
       - unit_test
       - unit_test
       - discovery_integration_test
       - discovery_integration_test
       - chatgpt_api_integration_test_mlx
       - chatgpt_api_integration_test_mlx

+ 202 - 105
extra/dashboard/dashboard.py

@@ -7,6 +7,8 @@ import pandas as pd
 import plotly.express as px
 import plotly.express as px
 from typing import List, Dict, Optional
 from typing import List, Dict, Optional
 from pathlib import Path
 from pathlib import Path
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
 
 
 class AsyncCircleCIClient:
 class AsyncCircleCIClient:
     def __init__(self, token: str, project_slug: str):
     def __init__(self, token: str, project_slug: str):
@@ -24,7 +26,7 @@ class AsyncCircleCIClient:
             response.raise_for_status()
             response.raise_for_status()
             return await response.json()
             return await response.json()
 
 
-    async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 50) -> List[Dict]:
+    async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 100) -> List[Dict]:
         self.logger.info(f"Fetching {limit} recent pipelines...")
         self.logger.info(f"Fetching {limit} recent pipelines...")
         url = f"{self.base_url}/project/{self.project_slug}/pipeline"
         url = f"{self.base_url}/project/{self.project_slug}/pipeline"
         params = {"limit": limit * 2}
         params = {"limit": limit * 2}
@@ -111,39 +113,69 @@ class PackageSizeTracker:
                 return None
                 return None
 
 
             jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
             jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
+
+            # Get package size data
             size_job = next(
             size_job = next(
                 (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
                 (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
                 None
                 None
             )
             )
 
 
-            if not size_job:
-                self.logger.debug(f"No measure_pip_sizes job found for pipeline {pipeline['id']}")
-                return None
-
-            artifacts = await self.client.get_artifacts(session, size_job["job_number"])
-            size_report = next(
-                (a for a in artifacts if a["path"].endswith("pip-sizes.json")),
+            # Get line count data
+            linecount_job = next(
+                (j for j in jobs if j["name"] == "check_line_count" and j["status"] == "success"),
                 None
                 None
             )
             )
 
 
-            if not size_report:
-                self.logger.debug(f"No pip-sizes.json artifact found for job {size_job['job_number']}")
+            # Return None if no relevant jobs found
+            if not size_job and not linecount_job:
+                self.logger.debug(f"No relevant jobs found for pipeline {pipeline['id']}")
                 return None
                 return None
 
 
-            json_data = await self.client.get_json(session, size_report["url"])
             data_point = {
             data_point = {
                 "commit_hash": commit_info['commit_hash'],
                 "commit_hash": commit_info['commit_hash'],
                 "commit_url": commit_info['web_url'],
                 "commit_url": commit_info['web_url'],
                 "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
                 "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
-                "total_size_mb": json_data["total_size_mb"],
-                "packages": json_data["packages"]
             }
             }
 
 
-            self.logger.info(
-                f"Processed pipeline {pipeline['id']}: "
-                f"commit {commit_info['commit_hash'][:7]}, "
-                f"size {json_data['total_size_mb']:.2f}MB"
-            )
+            # Process size data if available
+            if size_job:
+                size_artifacts = await self.client.get_artifacts(session, size_job["job_number"])
+                size_report = next(
+                    (a for a in size_artifacts if a["path"].endswith("pip-sizes.json")),
+                    None
+                )
+                if size_report:
+                    size_data = await self.client.get_json(session, size_report["url"])
+                    data_point.update({
+                        "total_size_mb": size_data["total_size_mb"],
+                        "packages": size_data["packages"]
+                    })
+                    self.logger.info(
+                        f"Processed size data for pipeline {pipeline['id']}: "
+                        f"commit {commit_info['commit_hash'][:7]}, "
+                        f"size {size_data['total_size_mb']:.2f}MB"
+                    )
+
+            # Process linecount data if available
+            if linecount_job:
+                linecount_artifacts = await self.client.get_artifacts(session, linecount_job["job_number"])
+                linecount_report = next(
+                    (a for a in linecount_artifacts if a["path"].endswith("line-count-snapshot.json")),
+                    None
+                )
+                if linecount_report:
+                    linecount_data = await self.client.get_json(session, linecount_report["url"])
+                    data_point.update({
+                        "total_lines": linecount_data["total_lines"],
+                        "total_files": linecount_data["total_files"],
+                        "files": linecount_data["files"]
+                    })
+                    self.logger.info(
+                        f"Processed line count data for pipeline {pipeline['id']}: "
+                        f"commit {commit_info['commit_hash'][:7]}, "
+                        f"lines {linecount_data['total_lines']:,}"
+                    )
+
             return data_point
             return data_point
 
 
         except Exception as e:
         except Exception as e:
@@ -154,7 +186,7 @@ class PackageSizeTracker:
         self.logger.info("Starting data collection...")
         self.logger.info("Starting data collection...")
         async with aiohttp.ClientSession(headers=self.client.headers) as session:
         async with aiohttp.ClientSession(headers=self.client.headers) as session:
             # Get pipelines
             # Get pipelines
-            pipelines = await self.client.get_recent_pipelines(session, 50)
+            pipelines = await self.client.get_recent_pipelines(session, 100)
 
 
             # Process all pipelines in parallel
             # Process all pipelines in parallel
             tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
             tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
@@ -171,108 +203,173 @@ class PackageSizeTracker:
             self.logger.error("No data to generate report from!")
             self.logger.error("No data to generate report from!")
             return None
             return None
 
 
-        df = pd.DataFrame(data)
-        df['timestamp'] = pd.to_datetime(df['timestamp'])
-        df = df.sort_values('timestamp')
-        # commit_url is already in the data from process_pipeline
-
-        # Create trend plot with updated styling
-        fig = px.line(
-            df,
-            x='timestamp',
-            y='total_size_mb',
-            title='Package Size Trend',
-            markers=True,
-            hover_data={'commit_hash': True, 'timestamp': True, 'total_size_mb': ':.2f'},
-            custom_data=['commit_hash', 'commit_url']
-        )
-        fig.update_layout(
-            xaxis_title="Date",
-            yaxis_title="Total Size (MB)",
-            hovermode='x unified',
-            plot_bgcolor='white',
-            paper_bgcolor='white',
-            font=dict(size=12),
-            title_x=0.5,
-        )
-        fig.update_traces(
-            line=dict(width=2),
-            marker=dict(size=8),
-            hovertemplate="<br>".join([
-                "Commit: %{customdata[0]}",
-                "Size: %{y:.2f}MB",
-                "Date: %{x}",
-                "<extra>Click to view commit</extra>"
-            ])
-        )
-
-        # Add JavaScript for click handling
-        fig.update_layout(
-            clickmode='event',
-            annotations=[
-                dict(
-                    text="Click any point to view the commit on GitHub",
-                    xref="paper", yref="paper",
-                    x=0, y=1.05,
-                    showarrow=False
-                )
-            ]
-        )
+        # Create separate dataframes for each metric
+        df_size = pd.DataFrame([d for d in data if 'total_size_mb' in d])
+        df_lines = pd.DataFrame([d for d in data if 'total_lines' in d])
 
 
         # Ensure output directory exists
         # Ensure output directory exists
         output_dir = Path(output_dir)
         output_dir = Path(output_dir)
         output_dir.mkdir(parents=True, exist_ok=True)
         output_dir.mkdir(parents=True, exist_ok=True)
 
 
-        # Save plot
-        plot_path = output_dir / "package_size_trend.html"
-        fig.write_html(
-            str(plot_path),
-            include_plotlyjs=True,
-            full_html=True,
-            post_script="""
-            const plot = document.getElementsByClassName('plotly-graph-div')[0];
-            plot.on('plotly_click', function(data) {
-                const point = data.points[0];
-                const commitUrl = point.customdata[1];
-                window.open(commitUrl, '_blank');
-            });
-            """
+        # Create a single figure with subplots
+        fig = make_subplots(
+            rows=2, cols=1,
+            subplot_titles=('Package Size Trend', 'Line Count Trend'),
+            vertical_spacing=0.2
         )
         )
 
 
-        # Generate summary
-        latest = df.iloc[-1]
-        previous = df.iloc[-2] if len(df) > 1 else latest
-        size_change = latest['total_size_mb'] - previous['total_size_mb']
-
-        latest_data = {
-            'timestamp': latest['timestamp'].isoformat(),
-            'commit_hash': latest['commit_hash'],
-            'total_size_mb': latest['total_size_mb'],
-            'size_change_mb': size_change,
-            'packages': latest['packages']
-        }
+        # Add package size trace if we have data
+        if not df_size.empty:
+            df_size['timestamp'] = pd.to_datetime(df_size['timestamp'])
+            df_size = df_size.sort_values('timestamp')
+
+            fig.add_trace(
+                go.Scatter(
+                    x=df_size['timestamp'],
+                    y=df_size['total_size_mb'],
+                    mode='lines+markers',
+                    name='Package Size',
+                    customdata=df_size[['commit_hash', 'commit_url']].values,
+                    hovertemplate="<br>".join([
+                        "Size: %{y:.2f}MB",
+                        "Date: %{x}",
+                        "Commit: %{customdata[0]}",
+                        "<extra></extra>"
+                    ])
+                ),
+                row=1, col=1
+            )
+            fig.update_yaxes(title_text="Size (MB)", row=1, col=1)
+
+        # Add line count trace if we have data
+        if not df_lines.empty:
+            df_lines['timestamp'] = pd.to_datetime(df_lines['timestamp'])
+            df_lines = df_lines.sort_values('timestamp')
+
+            fig.add_trace(
+                go.Scatter(
+                    x=df_lines['timestamp'],
+                    y=df_lines['total_lines'],
+                    mode='lines+markers',
+                    name='Line Count',
+                    customdata=df_lines[['commit_hash', 'commit_url']].values,
+                    hovertemplate="<br>".join([
+                        "Lines: %{y:,.0f}",
+                        "Date: %{x}",
+                        "Commit: %{customdata[0]}",
+                        "<extra></extra>"
+                    ])
+                ),
+                row=2, col=1
+            )
+            fig.update_yaxes(title_text="Total Lines", row=2, col=1)
 
 
-        with open(output_dir / 'latest_data.json', 'w') as f:
-            json.dump(latest_data, f, indent=2)
+        # Update layout
+        fig.update_layout(
+            height=800,  # Taller to accommodate both plots
+            showlegend=False,
+            title_text="Package Metrics Dashboard",
+            title_x=0.5,
+            plot_bgcolor='white',
+            paper_bgcolor='white',
+            font=dict(size=12),
+            hovermode='x unified',
+            xaxis=dict(title_text="Date"),
+            xaxis2=dict(title_text="Date")
+        )
 
 
-        self._print_summary(latest_data)
-        self.logger.info(f"Report generated in {output_dir}")
-        return str(plot_path)
+        # Add click event handling
+        dashboard_html = f"""
+        <html>
+        <head>
+            <title>Package Metrics Dashboard</title>
+        </head>
+        <body>
+            <div id="dashboard">
+                {fig.to_html(include_plotlyjs=True, full_html=False)}
+            </div>
+            <script>
+                const plot = document.getElementById('dashboard').getElementsByClassName('plotly-graph-div')[0];
+                plot.on('plotly_click', function(data) {{
+                    const point = data.points[0];
+                    const commitUrl = point.customdata[1];
+                    window.open(commitUrl, '_blank');
+                }});
+            </script>
+        </body>
+        </html>
+        """
+
+        # Write the dashboard
+        dashboard_path = output_dir / "dashboard.html"
+        with open(dashboard_path, "w") as f:
+            f.write(dashboard_html)
+
+        # Generate summary with available metrics
+        latest_data = {}
+
+        if not df_size.empty:
+            latest = df_size.iloc[-1]
+            previous = df_size.iloc[-2] if len(df_size) > 1 else latest
+            size_change = latest['total_size_mb'] - previous['total_size_mb']
+            latest_data.update({
+                'timestamp': latest['timestamp'].isoformat(),
+                'commit_hash': latest['commit_hash'],
+                'commit_url': latest['commit_url'],
+                'total_size_mb': latest['total_size_mb'],
+                'size_change_mb': size_change,
+                'packages': latest.get('packages', [])
+            })
+
+        if not df_lines.empty:
+            latest = df_lines.iloc[-1]
+            previous = df_lines.iloc[-2] if len(df_lines) > 1 else latest
+            linecount_change = latest['total_lines'] - previous['total_lines']
+            if not latest_data:  # Only add timestamp and commit info if not already added
+                latest_data.update({
+                    'timestamp': latest['timestamp'].isoformat(),
+                    'commit_hash': latest['commit_hash'],
+                    'commit_url': latest['commit_url'],
+                })
+            latest_data.update({
+                'total_lines': latest['total_lines'],
+                'linecount_change': linecount_change
+            })
+
+        if latest_data:
+            with open(output_dir / 'latest_data.json', 'w') as f:
+                json.dump(latest_data, f, indent=2)
+
+            self._print_summary(latest_data)
+            self.logger.info(f"Report generated in {output_dir}")
+            return str(output_dir)
+
+        return None
 
 
     def _print_summary(self, latest_data: Dict):
     def _print_summary(self, latest_data: Dict):
         print("\n=== Package Size Summary ===")
         print("\n=== Package Size Summary ===")
         print(f"Timestamp: {latest_data['timestamp']}")
         print(f"Timestamp: {latest_data['timestamp']}")
         print(f"Commit: {latest_data['commit_hash'][:7]}")
         print(f"Commit: {latest_data['commit_hash'][:7]}")
-        print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
 
 
-        change = latest_data['size_change_mb']
-        change_symbol = "↓" if change <= 0 else "↑"
-        print(f"Change: {change_symbol} {abs(change):.2f}MB")
+        if 'total_size_mb' in latest_data:
+            print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
+            change = latest_data['size_change_mb']
+            change_symbol = "↓" if change <= 0 else "↑"
+            print(f"Change: {change_symbol} {abs(change):.2f}MB")
+
+            if latest_data.get('packages'):
+                print("\nTop 5 Largest Packages:")
+                sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
+                for pkg in sorted_packages[:5]:
+                    print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
+
+        if 'total_lines' in latest_data:
+            print("\nLine Count Stats:")
+            print(f"Total Lines: {latest_data['total_lines']:,}")
+            change = latest_data['linecount_change']
+            change_symbol = "↓" if change <= 0 else "↑"
+            print(f"Change: {change_symbol} {abs(change):,}")
 
 
-        print("\nTop 5 Largest Packages:")
-        sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
-        for pkg in sorted_packages[:5]:
-            print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
         print("\n")
         print("\n")
 
 
 async def main():
 async def main():

+ 210 - 0
extra/line_counter.py

@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import token
+import tokenize
+from datetime import datetime, timezone
+
+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
+
+def is_docstring(t):
+    return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
+
+def gen_stats(base_path="."):
+    table = []
+    exo_path = os.path.join(base_path, "exo")
+    if not os.path.exists(exo_path):
+        print(f"Warning: {exo_path} directory not found")
+        return table
+
+    for path, _, files in os.walk(exo_path):
+        for name in files:
+            if not name.endswith(".py"):
+                continue
+
+            filepath = os.path.join(path, name)
+            relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
+
+            try:
+                with tokenize.open(filepath) as file_:
+                    tokens = [t for t in tokenize.generate_tokens(file_.readline)
+                            if t.type in TOKEN_WHITELIST and not is_docstring(t)]
+                    token_count = len(tokens)
+                    line_count = len(set([x for t in tokens
+                                        for x in range(t.start[0], t.end[0]+1)]))
+                    if line_count > 0:
+                        table.append([relfilepath, line_count, token_count/line_count])
+            except Exception as e:
+                print(f"Error processing {filepath}: {e}")
+                continue
+
+    return table
+
+def gen_diff(table_old, table_new):
+    table = []
+    files_new = set([x[0] for x in table_new])
+    files_old = set([x[0] for x in table_old])
+
+    added = files_new - files_old
+    deleted = files_old - files_new
+    unchanged = files_new & files_old
+
+    for file in added:
+        file_stat = [stats for stats in table_new if file in stats][0]
+        table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
+
+    for file in deleted:
+        file_stat = [stats for stats in table_old if file in stats][0]
+        table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
+
+    for file in unchanged:
+        file_stat_old = [stats for stats in table_old if file in stats][0]
+        file_stat_new = [stats for stats in table_new if file in stats][0]
+        if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
+            table.append([
+                file_stat_new[0],
+                file_stat_new[1],
+                file_stat_new[1] - file_stat_old[1],
+                file_stat_new[2],
+                file_stat_new[2] - file_stat_old[2]
+            ])
+
+    return table
+
+def create_json_report(table, is_diff=False):
+    timestamp = datetime.now(timezone.utc).isoformat()
+    commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
+    branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
+    pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
+
+    if is_diff:
+        files = [{
+            'name': row[0],
+            'current_lines': row[1],
+            'line_diff': row[2],
+            'current_tokens_per_line': row[3],
+            'tokens_per_line_diff': row[4]
+        } for row in table]
+
+        report = {
+            'type': 'diff',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'pr_number': pr_number,
+            'files': files,
+            'total_line_changes': sum(row[2] for row in table),
+            'total_files_changed': len(files)
+        }
+    else:
+        files = [{
+            'name': row[0],
+            'lines': row[1],
+            'tokens_per_line': row[2]
+        } for row in table]
+
+        report = {
+            'type': 'snapshot',
+            'timestamp': timestamp,
+            'commit_sha': commit_sha,
+            'branch': branch,
+            'files': files,
+            'total_lines': sum(row[1] for row in table),
+            'total_files': len(files)
+        }
+
+    return report
+
+def display_diff(diff):
+    return "+" + str(diff) if diff > 0 else str(diff)
+
+def format_table(rows, headers, floatfmt):
+    if not rows:
+        return ""
+
+    # Add headers as first row
+    all_rows = [headers] + rows
+
+    # Calculate column widths
+    col_widths = []
+    for col in range(len(headers)):
+        col_width = max(len(str(row[col])) for row in all_rows)
+        col_widths.append(col_width)
+
+    # Format rows
+    output = []
+    for row_idx, row in enumerate(all_rows):
+        formatted_cols = []
+        for col_idx, (value, width) in enumerate(zip(row, col_widths)):
+            if isinstance(value, float):
+                # Handle float formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+.1f}"
+                else:
+                    value = f"{value:.1f}"
+            elif isinstance(value, int) and col_idx > 0:  # Skip filename column
+                # Handle integer formatting based on floatfmt
+                fmt = floatfmt[col_idx]
+                if fmt.startswith('+'):
+                    value = f"{value:+d}"
+                else:
+                    value = f"{value:d}"
+            formatted_cols.append(str(value).ljust(width))
+        output.append("  ".join(formatted_cols))
+
+        # Add separator line after headers
+        if row_idx == 0:
+            separator = []
+            for width in col_widths:
+                separator.append("-" * width)
+            output.append("  ".join(separator))
+
+    return "\n".join(output)
+
+if __name__ == "__main__":
+    if len(sys.argv) == 3:
+        # Comparing two directories
+        headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
+        table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
+
+        if table:
+            # Print table output
+            print("### Code Changes in 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
+                headers,
+                (".1f", "d", "+d", ".1f", "+.1f")
+            ))
+            total_changes = sum(row[2] for row in table)
+            print(f"\nTotal line changes: {display_diff(total_changes)}")
+            print("```")
+
+            # Generate JSON report
+            report = create_json_report(table, is_diff=True)
+            with open('line-count-diff.json', 'w') as f:
+                json.dump(report, f, indent=2)
+    else:
+        # Single directory analysis
+        headers = ["File", "Lines", "Tokens/Line"]
+        table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
+
+        if table:
+            # Print table output
+            print("### Code Statistics for 'exo' Directory")
+            print("```")
+            print(format_table(
+                sorted(table, key=lambda x: x[1], reverse=True),
+                headers,
+                (".1f", "d", ".1f")
+            ))
+            total_lines = sum(row[1] for row in table)
+            print(f"\nTotal lines: {total_lines}")
+            print("```")
+
+            # Generate JSON report
+            report = create_json_report(table, is_diff=False)
+            with open('line-count-snapshot.json', 'w') as f:
+                json.dump(report, f, indent=2)