7 months ago · 4f13aeee9b
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -269,11 +269,68 @@ jobs:
 
															           path: ./pipsize.json
														
 
															           destination: pip-sizes.json
														
 
															+  check_line_count:
														
 
															+    docker:
														
 
															+      - image: cimg/python:3.10
														
 
															+    steps:
														
 
															+      - checkout
														
 
															+
														
 
															+      - run:
														
 
															+          name: Setup git for PR comparison
														
 
															+          command: |
														
 
															+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
														
 
															+              PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev)
														
 
															+              BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \
														
 
															+                "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \
														
 
															+                | jq -r '.target_branch')
														
 
															+
														
 
															+              git clone -b $BASE_BRANCH --single-branch \
														
 
															+                https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \
														
 
															+                base_branch
														
 
															+            fi
														
 
															+
														
 
															+      - run:
														
 
															+          name: Install dependencies
														
 
															+          command: |
														
 
															+            python -m pip install --upgrade pip
														
 
															+            pip install tabulate
														
 
															+
														
 
															+      - run:
														
 
															+          name: Run line count check
														
 
															+          command: |
														
 
															+            if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then
														
 
															+              python extra/line_counter.py base_branch .
														
 
															+            else
														
 
															+              python extra/line_counter.py .
														
 
															+            fi
														
 
															+
														
 
															+      - store_artifacts:
														
 
															+          path: line-count-snapshot.json
														
 
															+          destination: line-count-snapshot.json
														
 
															+
														
 
															+      - store_artifacts:
														
 
															+          path: line-count-diff.json
														
 
															+          destination: line-count-diff.json
														
 
															+
														
 
															+      - run:
														
 
															+          name: Create test results directory
														
 
															+          command: |
														
 
															+            mkdir -p test-results/line-count
														
 
															+            cp line-count-*.json test-results/line-count/
														
 
															+
														
 
															+      - store_test_results:
														
 
															+          path: test-results
														
 
															 workflows:
														
 
															   version: 2
														
 
															   build_and_test:
														
 
															     jobs:
														
 
															+      - check_line_count:
														
 
															+          filters:
														
 
															+            branches:
														
 
															+              only: /.*/
														
 
															+            tags:
														
 
															+              only: /.*/
														
 
															       - unit_test
														
 
															       - discovery_integration_test
														
 
															       - chatgpt_api_integration_test_mlx
														
--- a/extra/dashboard/dashboard.py
+++ b/extra/dashboard/dashboard.py
@@ -7,6 +7,8 @@ import pandas as pd
 
															 import plotly.express as px
														
 
															 from typing import List, Dict, Optional
														
 
															 from pathlib import Path
														
 
															+from plotly.subplots import make_subplots
														
 
															+import plotly.graph_objects as go
														
 
															 class AsyncCircleCIClient:
														
 
															     def __init__(self, token: str, project_slug: str):
														
@@ -24,7 +26,7 @@ class AsyncCircleCIClient:
 
															             response.raise_for_status()
														
 
															             return await response.json()
														
 
															-    async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 50) -> List[Dict]:
														
 
															+    async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 100) -> List[Dict]:
														
 
															         self.logger.info(f"Fetching {limit} recent pipelines...")
														
 
															         url = f"{self.base_url}/project/{self.project_slug}/pipeline"
														
 
															         params = {"limit": limit * 2}
														
@@ -111,39 +113,69 @@ class PackageSizeTracker:
 
															                 return None
														
 
															             jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
														
 
															+
														
 
															+            # Get package size data
														
 
															             size_job = next(
														
 
															                 (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
														
 
															                 None
														
 
															             )
														
 
															-            if not size_job:
														
 
															-                self.logger.debug(f"No measure_pip_sizes job found for pipeline {pipeline['id']}")
														
 
															-                return None
														
 
															-
														
 
															-            artifacts = await self.client.get_artifacts(session, size_job["job_number"])
														
 
															-            size_report = next(
														
 
															-                (a for a in artifacts if a["path"].endswith("pip-sizes.json")),
														
 
															+            # Get line count data
														
 
															+            linecount_job = next(
														
 
															+                (j for j in jobs if j["name"] == "check_line_count" and j["status"] == "success"),
														
 
															                 None
														
 
															             )
														
 
															-            if not size_report:
														
 
															-                self.logger.debug(f"No pip-sizes.json artifact found for job {size_job['job_number']}")
														
 
															+            # Return None if no relevant jobs found
														
 
															+            if not size_job and not linecount_job:
														
 
															+                self.logger.debug(f"No relevant jobs found for pipeline {pipeline['id']}")
														
 
															                 return None
														
 
															-            json_data = await self.client.get_json(session, size_report["url"])
														
 
															             data_point = {
														
 
															                 "commit_hash": commit_info['commit_hash'],
														
 
															                 "commit_url": commit_info['web_url'],
														
 
															                 "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
														
 
															-                "total_size_mb": json_data["total_size_mb"],
														
 
															-                "packages": json_data["packages"]
														
 
															             }
														
 
															-            self.logger.info(
														
 
															-                f"Processed pipeline {pipeline['id']}: "
														
 
															-                f"commit {commit_info['commit_hash'][:7]}, "
														
 
															-                f"size {json_data['total_size_mb']:.2f}MB"
														
 
															-            )
														
 
															+            # Process size data if available
														
 
															+            if size_job:
														
 
															+                size_artifacts = await self.client.get_artifacts(session, size_job["job_number"])
														
 
															+                size_report = next(
														
 
															+                    (a for a in size_artifacts if a["path"].endswith("pip-sizes.json")),
														
 
															+                    None
														
 
															+                )
														
 
															+                if size_report:
														
 
															+                    size_data = await self.client.get_json(session, size_report["url"])
														
 
															+                    data_point.update({
														
 
															+                        "total_size_mb": size_data["total_size_mb"],
														
 
															+                        "packages": size_data["packages"]
														
 
															+                    })
														
 
															+                    self.logger.info(
														
 
															+                        f"Processed size data for pipeline {pipeline['id']}: "
														
 
															+                        f"commit {commit_info['commit_hash'][:7]}, "
														
 
															+                        f"size {size_data['total_size_mb']:.2f}MB"
														
 
															+                    )
														
 
															+
														
 
															+            # Process linecount data if available
														
 
															+            if linecount_job:
														
 
															+                linecount_artifacts = await self.client.get_artifacts(session, linecount_job["job_number"])
														
 
															+                linecount_report = next(
														
 
															+                    (a for a in linecount_artifacts if a["path"].endswith("line-count-snapshot.json")),
														
 
															+                    None
														
 
															+                )
														
 
															+                if linecount_report:
														
 
															+                    linecount_data = await self.client.get_json(session, linecount_report["url"])
														
 
															+                    data_point.update({
														
 
															+                        "total_lines": linecount_data["total_lines"],
														
 
															+                        "total_files": linecount_data["total_files"],
														
 
															+                        "files": linecount_data["files"]
														
 
															+                    })
														
 
															+                    self.logger.info(
														
 
															+                        f"Processed line count data for pipeline {pipeline['id']}: "
														
 
															+                        f"commit {commit_info['commit_hash'][:7]}, "
														
 
															+                        f"lines {linecount_data['total_lines']:,}"
														
 
															+                    )
														
 
															+
														
 
															             return data_point
														
 
															         except Exception as e:
														
@@ -154,7 +186,7 @@ class PackageSizeTracker:
 
															         self.logger.info("Starting data collection...")
														
 
															         async with aiohttp.ClientSession(headers=self.client.headers) as session:
														
 
															             # Get pipelines
														
 
															-            pipelines = await self.client.get_recent_pipelines(session, 50)
														
 
															+            pipelines = await self.client.get_recent_pipelines(session, 100)
														
 
															             # Process all pipelines in parallel
														
 
															             tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
														
@@ -171,108 +203,173 @@ class PackageSizeTracker:
 
															             self.logger.error("No data to generate report from!")
														
 
															             return None
														
 
															-        df = pd.DataFrame(data)
														
 
															-        df['timestamp'] = pd.to_datetime(df['timestamp'])
														
 
															-        df = df.sort_values('timestamp')
														
 
															-        # commit_url is already in the data from process_pipeline
														
 
															-
														
 
															-        # Create trend plot with updated styling
														
 
															-        fig = px.line(
														
 
															-            df,
														
 
															-            x='timestamp',
														
 
															-            y='total_size_mb',
														
 
															-            title='Package Size Trend',
														
 
															-            markers=True,
														
 
															-            hover_data={'commit_hash': True, 'timestamp': True, 'total_size_mb': ':.2f'},
														
 
															-            custom_data=['commit_hash', 'commit_url']
														
 
															-        )
														
 
															-        fig.update_layout(
														
 
															-            xaxis_title="Date",
														
 
															-            yaxis_title="Total Size (MB)",
														
 
															-            hovermode='x unified',
														
 
															-            plot_bgcolor='white',
														
 
															-            paper_bgcolor='white',
														
 
															-            font=dict(size=12),
														
 
															-            title_x=0.5,
														
 
															-        )
														
 
															-        fig.update_traces(
														
 
															-            line=dict(width=2),
														
 
															-            marker=dict(size=8),
														
 
															-            hovertemplate="<br>".join([
														
 
															-                "Commit: %{customdata[0]}",
														
 
															-                "Size: %{y:.2f}MB",
														
 
															-                "Date: %{x}",
														
 
															-                "<extra>Click to view commit</extra>"
														
 
															-            ])
														
 
															-        )
														
 
															-
														
 
															-        # Add JavaScript for click handling
														
 
															-        fig.update_layout(
														
 
															-            clickmode='event',
														
 
															-            annotations=[
														
 
															-                dict(
														
 
															-                    text="Click any point to view the commit on GitHub",
														
 
															-                    xref="paper", yref="paper",
														
 
															-                    x=0, y=1.05,
														
 
															-                    showarrow=False
														
 
															-                )
														
 
															-            ]
														
 
															-        )
														
 
															+        # Create separate dataframes for each metric
														
 
															+        df_size = pd.DataFrame([d for d in data if 'total_size_mb' in d])
														
 
															+        df_lines = pd.DataFrame([d for d in data if 'total_lines' in d])
														
 
															         # Ensure output directory exists
														
 
															         output_dir = Path(output_dir)
														
 
															         output_dir.mkdir(parents=True, exist_ok=True)
														
 
															-        # Save plot
														
 
															-        plot_path = output_dir / "package_size_trend.html"
														
 
															-        fig.write_html(
														
 
															-            str(plot_path),
														
 
															-            include_plotlyjs=True,
														
 
															-            full_html=True,
														
 
															-            post_script="""
														
 
															-            const plot = document.getElementsByClassName('plotly-graph-div')[0];
														
 
															-            plot.on('plotly_click', function(data) {
														
 
															-                const point = data.points[0];
														
 
															-                const commitUrl = point.customdata[1];
														
 
															-                window.open(commitUrl, '_blank');
														
 
															-            });
														
 
															-            """
														
 
															+        # Create a single figure with subplots
														
 
															+        fig = make_subplots(
														
 
															+            rows=2, cols=1,
														
 
															+            subplot_titles=('Package Size Trend', 'Line Count Trend'),
														
 
															+            vertical_spacing=0.2
														
 
															         )
														
 
															-        # Generate summary
														
 
															-        latest = df.iloc[-1]
														
 
															-        previous = df.iloc[-2] if len(df) > 1 else latest
														
 
															-        size_change = latest['total_size_mb'] - previous['total_size_mb']
														
 
															-
														
 
															-        latest_data = {
														
 
															-            'timestamp': latest['timestamp'].isoformat(),
														
 
															-            'commit_hash': latest['commit_hash'],
														
 
															-            'total_size_mb': latest['total_size_mb'],
														
 
															-            'size_change_mb': size_change,
														
 
															-            'packages': latest['packages']
														
 
															-        }
														
 
															+        # Add package size trace if we have data
														
 
															+        if not df_size.empty:
														
 
															+            df_size['timestamp'] = pd.to_datetime(df_size['timestamp'])
														
 
															+            df_size = df_size.sort_values('timestamp')
														
 
															+
														
 
															+            fig.add_trace(
														
 
															+                go.Scatter(
														
 
															+                    x=df_size['timestamp'],
														
 
															+                    y=df_size['total_size_mb'],
														
 
															+                    mode='lines+markers',
														
 
															+                    name='Package Size',
														
 
															+                    customdata=df_size[['commit_hash', 'commit_url']].values,
														
 
															+                    hovertemplate="<br>".join([
														
 
															+                        "Size: %{y:.2f}MB",
														
 
															+                        "Date: %{x}",
														
 
															+                        "Commit: %{customdata[0]}",
														
 
															+                        "<extra></extra>"
														
 
															+                    ])
														
 
															+                ),
														
 
															+                row=1, col=1
														
 
															+            )
														
 
															+            fig.update_yaxes(title_text="Size (MB)", row=1, col=1)
														
 
															+
														
 
															+        # Add line count trace if we have data
														
 
															+        if not df_lines.empty:
														
 
															+            df_lines['timestamp'] = pd.to_datetime(df_lines['timestamp'])
														
 
															+            df_lines = df_lines.sort_values('timestamp')
														
 
															+
														
 
															+            fig.add_trace(
														
 
															+                go.Scatter(
														
 
															+                    x=df_lines['timestamp'],
														
 
															+                    y=df_lines['total_lines'],
														
 
															+                    mode='lines+markers',
														
 
															+                    name='Line Count',
														
 
															+                    customdata=df_lines[['commit_hash', 'commit_url']].values,
														
 
															+                    hovertemplate="<br>".join([
														
 
															+                        "Lines: %{y:,.0f}",
														
 
															+                        "Date: %{x}",
														
 
															+                        "Commit: %{customdata[0]}",
														
 
															+                        "<extra></extra>"
														
 
															+                    ])
														
 
															+                ),
														
 
															+                row=2, col=1
														
 
															+            )
														
 
															+            fig.update_yaxes(title_text="Total Lines", row=2, col=1)
														
 
															-        with open(output_dir / 'latest_data.json', 'w') as f:
														
 
															-            json.dump(latest_data, f, indent=2)
														
 
															+        # Update layout
														
 
															+        fig.update_layout(
														
 
															+            height=800,  # Taller to accommodate both plots
														
 
															+            showlegend=False,
														
 
															+            title_text="Package Metrics Dashboard",
														
 
															+            title_x=0.5,
														
 
															+            plot_bgcolor='white',
														
 
															+            paper_bgcolor='white',
														
 
															+            font=dict(size=12),
														
 
															+            hovermode='x unified',
														
 
															+            xaxis=dict(title_text="Date"),
														
 
															+            xaxis2=dict(title_text="Date")
														
 
															+        )
														
 
															-        self._print_summary(latest_data)
														
 
															-        self.logger.info(f"Report generated in {output_dir}")
														
 
															-        return str(plot_path)
														
 
															+        # Add click event handling
														
 
															+        dashboard_html = f"""
														
 
															+        <html>
														
 
															+        <head>
														
 
															+            <title>Package Metrics Dashboard</title>
														
 
															+        </head>
														
 
															+        <body>
														
 
															+            <div id="dashboard">
														
 
															+                {fig.to_html(include_plotlyjs=True, full_html=False)}
														
 
															+            </div>
														
 
															+            <script>
														
 
															+                const plot = document.getElementById('dashboard').getElementsByClassName('plotly-graph-div')[0];
														
 
															+                plot.on('plotly_click', function(data) {{
														
 
															+                    const point = data.points[0];
														
 
															+                    const commitUrl = point.customdata[1];
														
 
															+                    window.open(commitUrl, '_blank');
														
 
															+                }});
														
 
															+            </script>
														
 
															+        </body>
														
 
															+        </html>
														
 
															+        """
														
 
															+
														
 
															+        # Write the dashboard
														
 
															+        dashboard_path = output_dir / "dashboard.html"
														
 
															+        with open(dashboard_path, "w") as f:
														
 
															+            f.write(dashboard_html)
														
 
															+
														
 
															+        # Generate summary with available metrics
														
 
															+        latest_data = {}
														
 
															+
														
 
															+        if not df_size.empty:
														
 
															+            latest = df_size.iloc[-1]
														
 
															+            previous = df_size.iloc[-2] if len(df_size) > 1 else latest
														
 
															+            size_change = latest['total_size_mb'] - previous['total_size_mb']
														
 
															+            latest_data.update({
														
 
															+                'timestamp': latest['timestamp'].isoformat(),
														
 
															+                'commit_hash': latest['commit_hash'],
														
 
															+                'commit_url': latest['commit_url'],
														
 
															+                'total_size_mb': latest['total_size_mb'],
														
 
															+                'size_change_mb': size_change,
														
 
															+                'packages': latest.get('packages', [])
														
 
															+            })
														
 
															+
														
 
															+        if not df_lines.empty:
														
 
															+            latest = df_lines.iloc[-1]
														
 
															+            previous = df_lines.iloc[-2] if len(df_lines) > 1 else latest
														
 
															+            linecount_change = latest['total_lines'] - previous['total_lines']
														
 
															+            if not latest_data:  # Only add timestamp and commit info if not already added
														
 
															+                latest_data.update({
														
 
															+                    'timestamp': latest['timestamp'].isoformat(),
														
 
															+                    'commit_hash': latest['commit_hash'],
														
 
															+                    'commit_url': latest['commit_url'],
														
 
															+                })
														
 
															+            latest_data.update({
														
 
															+                'total_lines': latest['total_lines'],
														
 
															+                'linecount_change': linecount_change
														
 
															+            })
														
 
															+
														
 
															+        if latest_data:
														
 
															+            with open(output_dir / 'latest_data.json', 'w') as f:
														
 
															+                json.dump(latest_data, f, indent=2)
														
 
															+
														
 
															+            self._print_summary(latest_data)
														
 
															+            self.logger.info(f"Report generated in {output_dir}")
														
 
															+            return str(output_dir)
														
 
															+
														
 
															+        return None
														
 
															     def _print_summary(self, latest_data: Dict):
														
 
															         print("\n=== Package Size Summary ===")
														
 
															         print(f"Timestamp: {latest_data['timestamp']}")
														
 
															         print(f"Commit: {latest_data['commit_hash'][:7]}")
														
 
															-        print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
														
 
															-        change = latest_data['size_change_mb']
														
 
															-        change_symbol = "↓" if change <= 0 else "↑"
														
 
															-        print(f"Change: {change_symbol} {abs(change):.2f}MB")
														
 
															+        if 'total_size_mb' in latest_data:
														
 
															+            print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
														
 
															+            change = latest_data['size_change_mb']
														
 
															+            change_symbol = "↓" if change <= 0 else "↑"
														
 
															+            print(f"Change: {change_symbol} {abs(change):.2f}MB")
														
 
															+
														
 
															+            if latest_data.get('packages'):
														
 
															+                print("\nTop 5 Largest Packages:")
														
 
															+                sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
														
 
															+                for pkg in sorted_packages[:5]:
														
 
															+                    print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
														
 
															+
														
 
															+        if 'total_lines' in latest_data:
														
 
															+            print("\nLine Count Stats:")
														
 
															+            print(f"Total Lines: {latest_data['total_lines']:,}")
														
 
															+            change = latest_data['linecount_change']
														
 
															+            change_symbol = "↓" if change <= 0 else "↑"
														
 
															+            print(f"Change: {change_symbol} {abs(change):,}")
														
 
															-        print("\nTop 5 Largest Packages:")
														
 
															-        sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
														
 
															-        for pkg in sorted_packages[:5]:
														
 
															-            print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
														
 
															         print("\n")
														
 
															 async def main():
														
--- a/extra/line_counter.py
+++ b/extra/line_counter.py
@@ -0,0 +1,210 @@
 
															+#!/usr/bin/env python3
														
 
															+import os
														
 
															+import sys
														
 
															+import json
														
 
															+import token
														
 
															+import tokenize
														
 
															+from datetime import datetime, timezone
														
 
															+
														
 
															+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
														
 
															+
														
 
															+def is_docstring(t):
														
 
															+    return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
														
 
															+
														
 
															+def gen_stats(base_path="."):
														
 
															+    table = []
														
 
															+    exo_path = os.path.join(base_path, "exo")
														
 
															+    if not os.path.exists(exo_path):
														
 
															+        print(f"Warning: {exo_path} directory not found")
														
 
															+        return table
														
 
															+
														
 
															+    for path, _, files in os.walk(exo_path):
														
 
															+        for name in files:
														
 
															+            if not name.endswith(".py"):
														
 
															+                continue
														
 
															+
														
 
															+            filepath = os.path.join(path, name)
														
 
															+            relfilepath = os.path.relpath(filepath, base_path).replace('\\', '/')
														
 
															+
														
 
															+            try:
														
 
															+                with tokenize.open(filepath) as file_:
														
 
															+                    tokens = [t for t in tokenize.generate_tokens(file_.readline)
														
 
															+                            if t.type in TOKEN_WHITELIST and not is_docstring(t)]
														
 
															+                    token_count = len(tokens)
														
 
															+                    line_count = len(set([x for t in tokens
														
 
															+                                        for x in range(t.start[0], t.end[0]+1)]))
														
 
															+                    if line_count > 0:
														
 
															+                        table.append([relfilepath, line_count, token_count/line_count])
														
 
															+            except Exception as e:
														
 
															+                print(f"Error processing {filepath}: {e}")
														
 
															+                continue
														
 
															+
														
 
															+    return table
														
 
															+
														
 
															+def gen_diff(table_old, table_new):
														
 
															+    table = []
														
 
															+    files_new = set([x[0] for x in table_new])
														
 
															+    files_old = set([x[0] for x in table_old])
														
 
															+
														
 
															+    added = files_new - files_old
														
 
															+    deleted = files_old - files_new
														
 
															+    unchanged = files_new & files_old
														
 
															+
														
 
															+    for file in added:
														
 
															+        file_stat = [stats for stats in table_new if file in stats][0]
														
 
															+        table.append([file_stat[0], file_stat[1], file_stat[1], file_stat[2], file_stat[2]])
														
 
															+
														
 
															+    for file in deleted:
														
 
															+        file_stat = [stats for stats in table_old if file in stats][0]
														
 
															+        table.append([file_stat[0], 0, -file_stat[1], 0, -file_stat[2]])
														
 
															+
														
 
															+    for file in unchanged:
														
 
															+        file_stat_old = [stats for stats in table_old if file in stats][0]
														
 
															+        file_stat_new = [stats for stats in table_new if file in stats][0]
														
 
															+        if file_stat_new[1] != file_stat_old[1] or file_stat_new[2] != file_stat_old[2]:
														
 
															+            table.append([
														
 
															+                file_stat_new[0],
														
 
															+                file_stat_new[1],
														
 
															+                file_stat_new[1] - file_stat_old[1],
														
 
															+                file_stat_new[2],
														
 
															+                file_stat_new[2] - file_stat_old[2]
														
 
															+            ])
														
 
															+
														
 
															+    return table
														
 
															+
														
 
															+def create_json_report(table, is_diff=False):
														
 
															+    timestamp = datetime.now(timezone.utc).isoformat()
														
 
															+    commit_sha = os.environ.get('CIRCLE_SHA1', 'unknown')
														
 
															+    branch = os.environ.get('CIRCLE_BRANCH', 'unknown')
														
 
															+    pr_number = os.environ.get('CIRCLE_PR_NUMBER', '')
														
 
															+
														
 
															+    if is_diff:
														
 
															+        files = [{
														
 
															+            'name': row[0],
														
 
															+            'current_lines': row[1],
														
 
															+            'line_diff': row[2],
														
 
															+            'current_tokens_per_line': row[3],
														
 
															+            'tokens_per_line_diff': row[4]
														
 
															+        } for row in table]
														
 
															+
														
 
															+        report = {
														
 
															+            'type': 'diff',
														
 
															+            'timestamp': timestamp,
														
 
															+            'commit_sha': commit_sha,
														
 
															+            'branch': branch,
														
 
															+            'pr_number': pr_number,
														
 
															+            'files': files,
														
 
															+            'total_line_changes': sum(row[2] for row in table),
														
 
															+            'total_files_changed': len(files)
														
 
															+        }
														
 
															+    else:
														
 
															+        files = [{
														
 
															+            'name': row[0],
														
 
															+            'lines': row[1],
														
 
															+            'tokens_per_line': row[2]
														
 
															+        } for row in table]
														
 
															+
														
 
															+        report = {
														
 
															+            'type': 'snapshot',
														
 
															+            'timestamp': timestamp,
														
 
															+            'commit_sha': commit_sha,
														
 
															+            'branch': branch,
														
 
															+            'files': files,
														
 
															+            'total_lines': sum(row[1] for row in table),
														
 
															+            'total_files': len(files)
														
 
															+        }
														
 
															+
														
 
															+    return report
														
 
															+
														
 
															+def display_diff(diff):
														
 
															+    return "+" + str(diff) if diff > 0 else str(diff)
														
 
															+
														
 
															+def format_table(rows, headers, floatfmt):
														
 
															+    if not rows:
														
 
															+        return ""
														
 
															+
														
 
															+    # Add headers as first row
														
 
															+    all_rows = [headers] + rows
														
 
															+
														
 
															+    # Calculate column widths
														
 
															+    col_widths = []
														
 
															+    for col in range(len(headers)):
														
 
															+        col_width = max(len(str(row[col])) for row in all_rows)
														
 
															+        col_widths.append(col_width)
														
 
															+
														
 
															+    # Format rows
														
 
															+    output = []
														
 
															+    for row_idx, row in enumerate(all_rows):
														
 
															+        formatted_cols = []
														
 
															+        for col_idx, (value, width) in enumerate(zip(row, col_widths)):
														
 
															+            if isinstance(value, float):
														
 
															+                # Handle float formatting based on floatfmt
														
 
															+                fmt = floatfmt[col_idx]
														
 
															+                if fmt.startswith('+'):
														
 
															+                    value = f"{value:+.1f}"
														
 
															+                else:
														
 
															+                    value = f"{value:.1f}"
														
 
															+            elif isinstance(value, int) and col_idx > 0:  # Skip filename column
														
 
															+                # Handle integer formatting based on floatfmt
														
 
															+                fmt = floatfmt[col_idx]
														
 
															+                if fmt.startswith('+'):
														
 
															+                    value = f"{value:+d}"
														
 
															+                else:
														
 
															+                    value = f"{value:d}"
														
 
															+            formatted_cols.append(str(value).ljust(width))
														
 
															+        output.append("  ".join(formatted_cols))
														
 
															+
														
 
															+        # Add separator line after headers
														
 
															+        if row_idx == 0:
														
 
															+            separator = []
														
 
															+            for width in col_widths:
														
 
															+                separator.append("-" * width)
														
 
															+            output.append("  ".join(separator))
														
 
															+
														
 
															+    return "\n".join(output)
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    if len(sys.argv) == 3:
														
 
															+        # Comparing two directories
														
 
															+        headers = ["File", "Lines", "Diff", "Tokens/Line", "Diff"]
														
 
															+        table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
														
 
															+
														
 
															+        if table:
														
 
															+            # Print table output
														
 
															+            print("### Code Changes in 'exo' Directory")
														
 
															+            print("```")
														
 
															+            print(format_table(
														
 
															+                sorted(table, key=lambda x: abs(x[2]) if len(x) > 2 else 0, reverse=True),
														
 
															+                headers,
														
 
															+                (".1f", "d", "+d", ".1f", "+.1f")
														
 
															+            ))
														
 
															+            total_changes = sum(row[2] for row in table)
														
 
															+            print(f"\nTotal line changes: {display_diff(total_changes)}")
														
 
															+            print("```")
														
 
															+
														
 
															+            # Generate JSON report
														
 
															+            report = create_json_report(table, is_diff=True)
														
 
															+            with open('line-count-diff.json', 'w') as f:
														
 
															+                json.dump(report, f, indent=2)
														
 
															+    else:
														
 
															+        # Single directory analysis
														
 
															+        headers = ["File", "Lines", "Tokens/Line"]
														
 
															+        table = gen_stats(sys.argv[1] if len(sys.argv) > 1 else ".")
														
 
															+
														
 
															+        if table:
														
 
															+            # Print table output
														
 
															+            print("### Code Statistics for 'exo' Directory")
														
 
															+            print("```")
														
 
															+            print(format_table(
														
 
															+                sorted(table, key=lambda x: x[1], reverse=True),
														
 
															+                headers,
														
 
															+                (".1f", "d", ".1f")
														
 
															+            ))
														
 
															+            total_lines = sum(row[1] for row in table)
														
 
															+            print(f"\nTotal lines: {total_lines}")
														
 
															+            print("```")
														
 
															+
														
 
															+            # Generate JSON report
														
 
															+            report = create_json_report(table, is_diff=False)
														
 
															+            with open('line-count-snapshot.json', 'w') as f:
														
 
															+                json.dump(report, f, indent=2)