dashboard.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. import os
  2. import json
  3. import logging
  4. import asyncio
  5. import aiohttp
  6. import pandas as pd
  7. import plotly.express as px
  8. from typing import List, Dict, Optional
  9. from pathlib import Path
  10. from plotly.subplots import make_subplots
  11. import plotly.graph_objects as go
  12. class AsyncCircleCIClient:
  13. def __init__(self, token: str, project_slug: str):
  14. self.token = token
  15. self.project_slug = project_slug
  16. self.base_url = "https://circleci.com/api/v2"
  17. self.headers = {
  18. "Circle-Token": token,
  19. "Accept": "application/json"
  20. }
  21. self.logger = logging.getLogger("CircleCI")
  22. async def get_json(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict:
  23. async with session.get(url, params=params) as response:
  24. response.raise_for_status()
  25. return await response.json()
  26. async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 100) -> List[Dict]:
  27. self.logger.info(f"Fetching {limit} recent pipelines...")
  28. url = f"{self.base_url}/project/{self.project_slug}/pipeline"
  29. params = {"limit": limit * 2}
  30. data = await self.get_json(session, url, params)
  31. pipelines = [
  32. p for p in data["items"]
  33. if p["state"] == "created"
  34. and p.get("trigger_parameters", {}).get("git", {}).get("branch") == "main"
  35. ][:limit]
  36. self.logger.info(f"Found {len(pipelines)} successful main branch pipelines")
  37. return pipelines
  38. async def get_workflow_jobs(self, session: aiohttp.ClientSession, pipeline_id: str) -> List[Dict]:
  39. self.logger.debug(f"Fetching workflows for pipeline {pipeline_id}")
  40. url = f"{self.base_url}/pipeline/{pipeline_id}/workflow"
  41. workflows_data = await self.get_json(session, url)
  42. workflows = workflows_data["items"]
  43. # Fetch all jobs for all workflows in parallel
  44. jobs_tasks = []
  45. for workflow in workflows:
  46. url = f"{self.base_url}/workflow/{workflow['id']}/job"
  47. jobs_tasks.append(self.get_json(session, url))
  48. jobs_responses = await asyncio.gather(*jobs_tasks, return_exceptions=True)
  49. all_jobs = []
  50. for jobs_data in jobs_responses:
  51. if isinstance(jobs_data, Exception):
  52. continue
  53. all_jobs.extend(jobs_data["items"])
  54. return all_jobs
  55. async def get_artifacts(self, session: aiohttp.ClientSession, job_number: str) -> List[Dict]:
  56. url = f"{self.base_url}/project/{self.project_slug}/{job_number}/artifacts"
  57. data = await self.get_json(session, url)
  58. return data["items"]
  59. class PackageSizeTracker:
  60. def __init__(self, token: str, project_slug: str, debug: bool = False):
  61. self.setup_logging(debug)
  62. self.client = AsyncCircleCIClient(token, project_slug)
  63. self.logger = logging.getLogger("PackageSizeTracker")
  64. def setup_logging(self, debug: bool):
  65. level = logging.DEBUG if debug else logging.INFO
  66. logging.basicConfig(
  67. level=level,
  68. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  69. datefmt='%H:%M:%S'
  70. )
  71. def extract_commit_info(self, pipeline: Dict) -> Optional[Dict]:
  72. try:
  73. if 'trigger_parameters' in pipeline:
  74. github_app = pipeline['trigger_parameters'].get('github_app', {})
  75. if github_app:
  76. return {
  77. 'commit_hash': github_app.get('checkout_sha'),
  78. 'web_url': f"{github_app.get('repo_url')}/commit/{github_app.get('checkout_sha')}"
  79. }
  80. git_params = pipeline['trigger_parameters'].get('git', {})
  81. if git_params:
  82. return {
  83. 'commit_hash': git_params.get('checkout_sha'),
  84. 'web_url': f"{git_params.get('repo_url')}/commit/{git_params.get('checkout_sha')}"
  85. }
  86. self.logger.warning(f"Could not find commit info in pipeline {pipeline['id']}")
  87. return None
  88. except Exception as e:
  89. self.logger.error(f"Error extracting commit info: {str(e)}")
  90. return None
  91. async def process_pipeline(self, session: aiohttp.ClientSession, pipeline: Dict) -> Optional[Dict]:
  92. try:
  93. commit_info = self.extract_commit_info(pipeline)
  94. if not commit_info:
  95. return None
  96. jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
  97. # Get package size data
  98. size_job = next(
  99. (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
  100. None
  101. )
  102. # Get line count data
  103. linecount_job = next(
  104. (j for j in jobs if j["name"] == "check_line_count" and j["status"] == "success"),
  105. None
  106. )
  107. # Return None if no relevant jobs found
  108. if not size_job and not linecount_job:
  109. self.logger.debug(f"No relevant jobs found for pipeline {pipeline['id']}")
  110. return None
  111. data_point = {
  112. "commit_hash": commit_info['commit_hash'],
  113. "commit_url": commit_info['web_url'],
  114. "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
  115. }
  116. # Process size data if available
  117. if size_job:
  118. size_artifacts = await self.client.get_artifacts(session, size_job["job_number"])
  119. size_report = next(
  120. (a for a in size_artifacts if a["path"].endswith("pip-sizes.json")),
  121. None
  122. )
  123. if size_report:
  124. size_data = await self.client.get_json(session, size_report["url"])
  125. data_point.update({
  126. "total_size_mb": size_data["total_size_mb"],
  127. "packages": size_data["packages"]
  128. })
  129. self.logger.info(
  130. f"Processed size data for pipeline {pipeline['id']}: "
  131. f"commit {commit_info['commit_hash'][:7]}, "
  132. f"size {size_data['total_size_mb']:.2f}MB"
  133. )
  134. # Process linecount data if available
  135. if linecount_job:
  136. linecount_artifacts = await self.client.get_artifacts(session, linecount_job["job_number"])
  137. linecount_report = next(
  138. (a for a in linecount_artifacts if a["path"].endswith("line-count-snapshot.json")),
  139. None
  140. )
  141. if linecount_report:
  142. linecount_data = await self.client.get_json(session, linecount_report["url"])
  143. data_point.update({
  144. "total_lines": linecount_data["total_lines"],
  145. "total_files": linecount_data["total_files"],
  146. "files": linecount_data["files"]
  147. })
  148. self.logger.info(
  149. f"Processed line count data for pipeline {pipeline['id']}: "
  150. f"commit {commit_info['commit_hash'][:7]}, "
  151. f"lines {linecount_data['total_lines']:,}"
  152. )
  153. return data_point
  154. except Exception as e:
  155. self.logger.error(f"Error processing pipeline {pipeline['id']}: {str(e)}")
  156. return None
  157. async def collect_data(self) -> List[Dict]:
  158. self.logger.info("Starting data collection...")
  159. async with aiohttp.ClientSession(headers=self.client.headers) as session:
  160. # Get pipelines
  161. pipelines = await self.client.get_recent_pipelines(session, 100)
  162. # Process all pipelines in parallel
  163. tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
  164. results = await asyncio.gather(*tasks)
  165. # Filter out None results
  166. data_points = [r for r in results if r is not None]
  167. return data_points
  168. def generate_report(self, data: List[Dict], output_dir: str = "reports") -> Optional[str]:
  169. self.logger.info("Generating report...")
  170. if not data:
  171. self.logger.error("No data to generate report from!")
  172. return None
  173. # Create separate dataframes for each metric
  174. df_size = pd.DataFrame([d for d in data if 'total_size_mb' in d])
  175. df_lines = pd.DataFrame([d for d in data if 'total_lines' in d])
  176. # Ensure output directory exists
  177. output_dir = Path(output_dir)
  178. output_dir.mkdir(parents=True, exist_ok=True)
  179. # Create a single figure with subplots
  180. fig = make_subplots(
  181. rows=2, cols=1,
  182. subplot_titles=('Package Size Trend', 'Line Count Trend'),
  183. vertical_spacing=0.2
  184. )
  185. # Add package size trace if we have data
  186. if not df_size.empty:
  187. df_size['timestamp'] = pd.to_datetime(df_size['timestamp'])
  188. df_size = df_size.sort_values('timestamp')
  189. fig.add_trace(
  190. go.Scatter(
  191. x=df_size['timestamp'],
  192. y=df_size['total_size_mb'],
  193. mode='lines+markers',
  194. name='Package Size',
  195. customdata=df_size[['commit_hash', 'commit_url']].values,
  196. hovertemplate="<br>".join([
  197. "Size: %{y:.2f}MB",
  198. "Date: %{x}",
  199. "Commit: %{customdata[0]}",
  200. "<extra></extra>"
  201. ])
  202. ),
  203. row=1, col=1
  204. )
  205. fig.update_yaxes(title_text="Size (MB)", row=1, col=1)
  206. # Add line count trace if we have data
  207. if not df_lines.empty:
  208. df_lines['timestamp'] = pd.to_datetime(df_lines['timestamp'])
  209. df_lines = df_lines.sort_values('timestamp')
  210. fig.add_trace(
  211. go.Scatter(
  212. x=df_lines['timestamp'],
  213. y=df_lines['total_lines'],
  214. mode='lines+markers',
  215. name='Line Count',
  216. customdata=df_lines[['commit_hash', 'commit_url']].values,
  217. hovertemplate="<br>".join([
  218. "Lines: %{y:,.0f}",
  219. "Date: %{x}",
  220. "Commit: %{customdata[0]}",
  221. "<extra></extra>"
  222. ])
  223. ),
  224. row=2, col=1
  225. )
  226. fig.update_yaxes(title_text="Total Lines", row=2, col=1)
  227. # Update layout
  228. fig.update_layout(
  229. height=800, # Taller to accommodate both plots
  230. showlegend=False,
  231. title_text="Package Metrics Dashboard",
  232. title_x=0.5,
  233. plot_bgcolor='white',
  234. paper_bgcolor='white',
  235. font=dict(size=12),
  236. hovermode='x unified',
  237. xaxis=dict(title_text="Date"),
  238. xaxis2=dict(title_text="Date")
  239. )
  240. # Add click event handling
  241. dashboard_html = f"""
  242. <html>
  243. <head>
  244. <title>Package Metrics Dashboard</title>
  245. </head>
  246. <body>
  247. <div id="dashboard">
  248. {fig.to_html(include_plotlyjs=True, full_html=False)}
  249. </div>
  250. <script>
  251. const plot = document.getElementById('dashboard').getElementsByClassName('plotly-graph-div')[0];
  252. plot.on('plotly_click', function(data) {{
  253. const point = data.points[0];
  254. const commitUrl = point.customdata[1];
  255. window.open(commitUrl, '_blank');
  256. }});
  257. </script>
  258. </body>
  259. </html>
  260. """
  261. # Write the dashboard
  262. dashboard_path = output_dir / "dashboard.html"
  263. with open(dashboard_path, "w") as f:
  264. f.write(dashboard_html)
  265. # Generate summary with available metrics
  266. latest_data = {}
  267. if not df_size.empty:
  268. latest = df_size.iloc[-1]
  269. previous = df_size.iloc[-2] if len(df_size) > 1 else latest
  270. size_change = float(latest['total_size_mb'] - previous['total_size_mb'])
  271. latest_data.update({
  272. 'timestamp': latest['timestamp'].isoformat(),
  273. 'commit_hash': latest['commit_hash'],
  274. 'commit_url': latest['commit_url'],
  275. 'total_size_mb': float(latest['total_size_mb']),
  276. 'size_change_mb': size_change,
  277. 'packages': latest.get('packages', [])
  278. })
  279. if not df_lines.empty:
  280. latest = df_lines.iloc[-1]
  281. previous = df_lines.iloc[-2] if len(df_lines) > 1 else latest
  282. linecount_change = int(latest['total_lines'] - previous['total_lines'])
  283. if not latest_data: # Only add timestamp and commit info if not already added
  284. latest_data.update({
  285. 'timestamp': latest['timestamp'].isoformat(),
  286. 'commit_hash': latest['commit_hash'],
  287. 'commit_url': latest['commit_url'],
  288. })
  289. latest_data.update({
  290. 'total_lines': int(latest['total_lines']),
  291. 'linecount_change': linecount_change
  292. })
  293. if latest_data:
  294. with open(output_dir / 'latest_data.json', 'w') as f:
  295. json.dump(latest_data, f, indent=2)
  296. self._print_summary(latest_data)
  297. self.logger.info(f"Report generated in {output_dir}")
  298. return str(output_dir)
  299. return None
  300. def _print_summary(self, latest_data: Dict):
  301. print("\n=== Package Size Summary ===")
  302. print(f"Timestamp: {latest_data['timestamp']}")
  303. print(f"Commit: {latest_data['commit_hash'][:7]}")
  304. if 'total_size_mb' in latest_data:
  305. print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
  306. change = latest_data['size_change_mb']
  307. change_symbol = "↓" if change <= 0 else "↑"
  308. print(f"Change: {change_symbol} {abs(change):.2f}MB")
  309. if latest_data.get('packages'):
  310. print("\nTop 5 Largest Packages:")
  311. sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
  312. for pkg in sorted_packages[:5]:
  313. print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
  314. if 'total_lines' in latest_data:
  315. print("\nLine Count Stats:")
  316. print(f"Total Lines: {latest_data['total_lines']:,}")
  317. change = latest_data['linecount_change']
  318. change_symbol = "↓" if change <= 0 else "↑"
  319. print(f"Change: {change_symbol} {abs(change):,}")
  320. print("\n")
  321. async def main():
  322. token = os.getenv("CIRCLECI_TOKEN")
  323. project_slug = os.getenv("CIRCLECI_PROJECT_SLUG")
  324. debug = os.getenv("DEBUG", "").lower() in ("true", "1", "yes")
  325. if not token or not project_slug:
  326. print("Error: Please set CIRCLECI_TOKEN and CIRCLECI_PROJECT_SLUG environment variables")
  327. return
  328. tracker = PackageSizeTracker(token, project_slug, debug)
  329. try:
  330. data = await tracker.collect_data()
  331. if not data:
  332. print("No data found!")
  333. return
  334. report_path = tracker.generate_report(data)
  335. if report_path:
  336. print(f"\nDetailed report available at: {report_path}")
  337. except Exception as e:
  338. logging.error(f"Error: {str(e)}")
  339. if debug:
  340. raise
  341. if __name__ == "__main__":
  342. asyncio.run(main())