dashboard.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. import os
  2. import json
  3. import logging
  4. import asyncio
  5. import aiohttp
  6. import pandas as pd
  7. import plotly.express as px
  8. from datetime import datetime
  9. from typing import List, Dict, Optional
  10. from pathlib import Path
  11. class AsyncCircleCIClient:
  12. def __init__(self, token: str, project_slug: str):
  13. self.token = token
  14. self.project_slug = project_slug
  15. self.base_url = "https://circleci.com/api/v2"
  16. self.headers = {
  17. "Circle-Token": token,
  18. "Accept": "application/json"
  19. }
  20. self.logger = logging.getLogger("CircleCI")
  21. async def get_json(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict:
  22. async with session.get(url, params=params) as response:
  23. response.raise_for_status()
  24. return await response.json()
  25. async def get_recent_pipelines(self, session: aiohttp.ClientSession, limit: int = 50) -> List[Dict]:
  26. self.logger.info(f"Fetching {limit} recent pipelines...")
  27. url = f"{self.base_url}/project/{self.project_slug}/pipeline"
  28. params = {"limit": limit * 2}
  29. data = await self.get_json(session, url, params)
  30. pipelines = [
  31. p for p in data["items"]
  32. if p["state"] == "created"
  33. and p.get("trigger_parameters", {}).get("git", {}).get("branch") == "main"
  34. ][:limit]
  35. self.logger.info(f"Found {len(pipelines)} successful main branch pipelines")
  36. return pipelines
  37. async def get_workflow_jobs(self, session: aiohttp.ClientSession, pipeline_id: str) -> List[Dict]:
  38. self.logger.debug(f"Fetching workflows for pipeline {pipeline_id}")
  39. url = f"{self.base_url}/pipeline/{pipeline_id}/workflow"
  40. workflows_data = await self.get_json(session, url)
  41. workflows = workflows_data["items"]
  42. # Fetch all jobs for all workflows in parallel
  43. jobs_tasks = []
  44. for workflow in workflows:
  45. url = f"{self.base_url}/workflow/{workflow['id']}/job"
  46. jobs_tasks.append(self.get_json(session, url))
  47. jobs_responses = await asyncio.gather(*jobs_tasks, return_exceptions=True)
  48. all_jobs = []
  49. for jobs_data in jobs_responses:
  50. if isinstance(jobs_data, Exception):
  51. continue
  52. all_jobs.extend(jobs_data["items"])
  53. return all_jobs
  54. async def get_artifacts(self, session: aiohttp.ClientSession, job_number: str) -> List[Dict]:
  55. url = f"{self.base_url}/project/{self.project_slug}/{job_number}/artifacts"
  56. data = await self.get_json(session, url)
  57. return data["items"]
  58. class PackageSizeTracker:
  59. def __init__(self, token: str, project_slug: str, debug: bool = False):
  60. self.setup_logging(debug)
  61. self.client = AsyncCircleCIClient(token, project_slug)
  62. self.logger = logging.getLogger("PackageSizeTracker")
  63. def setup_logging(self, debug: bool):
  64. level = logging.DEBUG if debug else logging.INFO
  65. logging.basicConfig(
  66. level=level,
  67. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  68. datefmt='%H:%M:%S'
  69. )
  70. def extract_commit_info(self, pipeline: Dict) -> Optional[Dict]:
  71. try:
  72. if 'trigger_parameters' in pipeline:
  73. github_app = pipeline['trigger_parameters'].get('github_app', {})
  74. if github_app:
  75. return {
  76. 'commit_hash': github_app.get('checkout_sha'),
  77. 'web_url': f"{github_app.get('repo_url')}/commit/{github_app.get('checkout_sha')}"
  78. }
  79. git_params = pipeline['trigger_parameters'].get('git', {})
  80. if git_params:
  81. return {
  82. 'commit_hash': git_params.get('checkout_sha'),
  83. 'web_url': f"{git_params.get('repo_url')}/commit/{git_params.get('checkout_sha')}"
  84. }
  85. self.logger.warning(f"Could not find commit info in pipeline {pipeline['id']}")
  86. return None
  87. except Exception as e:
  88. self.logger.error(f"Error extracting commit info: {str(e)}")
  89. return None
  90. async def process_pipeline(self, session: aiohttp.ClientSession, pipeline: Dict) -> Optional[Dict]:
  91. try:
  92. commit_info = self.extract_commit_info(pipeline)
  93. if not commit_info:
  94. return None
  95. jobs = await self.client.get_workflow_jobs(session, pipeline["id"])
  96. size_job = next(
  97. (j for j in jobs if j["name"] == "measure_pip_sizes" and j["status"] == "success"),
  98. None
  99. )
  100. if not size_job:
  101. self.logger.debug(f"No measure_pip_sizes job found for pipeline {pipeline['id']}")
  102. return None
  103. artifacts = await self.client.get_artifacts(session, size_job["job_number"])
  104. size_report = next(
  105. (a for a in artifacts if a["path"].endswith("pip-sizes.json")),
  106. None
  107. )
  108. if not size_report:
  109. self.logger.debug(f"No pip-sizes.json artifact found for job {size_job['job_number']}")
  110. return None
  111. json_data = await self.client.get_json(session, size_report["url"])
  112. data_point = {
  113. "commit_hash": commit_info['commit_hash'],
  114. "commit_url": commit_info['web_url'],
  115. "timestamp": pipeline.get("created_at", pipeline.get("updated_at")),
  116. "total_size_mb": json_data["total_size_mb"],
  117. "packages": json_data["packages"]
  118. }
  119. self.logger.info(
  120. f"Processed pipeline {pipeline['id']}: "
  121. f"commit {commit_info['commit_hash'][:7]}, "
  122. f"size {json_data['total_size_mb']:.2f}MB"
  123. )
  124. return data_point
  125. except Exception as e:
  126. self.logger.error(f"Error processing pipeline {pipeline['id']}: {str(e)}")
  127. return None
  128. async def collect_data(self) -> List[Dict]:
  129. self.logger.info("Starting data collection...")
  130. async with aiohttp.ClientSession(headers=self.client.headers) as session:
  131. # Get pipelines
  132. pipelines = await self.client.get_recent_pipelines(session, 50)
  133. # Process all pipelines in parallel
  134. tasks = [self.process_pipeline(session, pipeline) for pipeline in pipelines]
  135. results = await asyncio.gather(*tasks)
  136. # Filter out None results
  137. data_points = [r for r in results if r is not None]
  138. return data_points
  139. def generate_report(self, data: List[Dict], output_dir: str = "reports") -> Optional[str]:
  140. self.logger.info("Generating report...")
  141. if not data:
  142. self.logger.error("No data to generate report from!")
  143. return None
  144. df = pd.DataFrame(data)
  145. df['timestamp'] = pd.to_datetime(df['timestamp'])
  146. df = df.sort_values('timestamp')
  147. # commit_url is already in the data from process_pipeline
  148. # Create trend plot with updated styling
  149. fig = px.line(
  150. df,
  151. x='timestamp',
  152. y='total_size_mb',
  153. title='Package Size Trend',
  154. markers=True,
  155. hover_data={'commit_hash': True, 'timestamp': True, 'total_size_mb': ':.2f'},
  156. custom_data=['commit_hash', 'commit_url']
  157. )
  158. fig.update_layout(
  159. xaxis_title="Date",
  160. yaxis_title="Total Size (MB)",
  161. hovermode='x unified',
  162. plot_bgcolor='white',
  163. paper_bgcolor='white',
  164. font=dict(size=12),
  165. title_x=0.5,
  166. )
  167. fig.update_traces(
  168. line=dict(width=2),
  169. marker=dict(size=8),
  170. hovertemplate="<br>".join([
  171. "Commit: %{customdata[0]}",
  172. "Size: %{y:.2f}MB",
  173. "Date: %{x}",
  174. "<extra>Click to view commit</extra>"
  175. ])
  176. )
  177. # Add JavaScript for click handling
  178. fig.update_layout(
  179. clickmode='event',
  180. annotations=[
  181. dict(
  182. text="Click any point to view the commit on GitHub",
  183. xref="paper", yref="paper",
  184. x=0, y=1.05,
  185. showarrow=False
  186. )
  187. ]
  188. )
  189. # Ensure output directory exists
  190. output_dir = Path(output_dir)
  191. output_dir.mkdir(parents=True, exist_ok=True)
  192. # Save plot
  193. plot_path = output_dir / "package_size_trend.html"
  194. fig.write_html(
  195. str(plot_path),
  196. include_plotlyjs=True,
  197. full_html=True,
  198. post_script="""
  199. const plot = document.getElementsByClassName('plotly-graph-div')[0];
  200. plot.on('plotly_click', function(data) {
  201. const point = data.points[0];
  202. const commitUrl = point.customdata[1];
  203. window.open(commitUrl, '_blank');
  204. });
  205. """
  206. )
  207. # Generate summary
  208. latest = df.iloc[-1]
  209. previous = df.iloc[-2] if len(df) > 1 else latest
  210. size_change = latest['total_size_mb'] - previous['total_size_mb']
  211. latest_data = {
  212. 'timestamp': latest['timestamp'].isoformat(),
  213. 'commit_hash': latest['commit_hash'],
  214. 'total_size_mb': latest['total_size_mb'],
  215. 'size_change_mb': size_change,
  216. 'packages': latest['packages']
  217. }
  218. with open(output_dir / 'latest_data.json', 'w') as f:
  219. json.dump(latest_data, f, indent=2)
  220. self._print_summary(latest_data)
  221. self.logger.info(f"Report generated in {output_dir}")
  222. return str(plot_path)
  223. def _print_summary(self, latest_data: Dict):
  224. print("\n=== Package Size Summary ===")
  225. print(f"Timestamp: {latest_data['timestamp']}")
  226. print(f"Commit: {latest_data['commit_hash'][:7]}")
  227. print(f"Total Size: {latest_data['total_size_mb']:.2f}MB")
  228. change = latest_data['size_change_mb']
  229. change_symbol = "↓" if change <= 0 else "↑"
  230. print(f"Change: {change_symbol} {abs(change):.2f}MB")
  231. print("\nTop 5 Largest Packages:")
  232. sorted_packages = sorted(latest_data['packages'], key=lambda x: x['size_mb'], reverse=True)
  233. for pkg in sorted_packages[:5]:
  234. print(f"- {pkg['name']}: {pkg['size_mb']:.2f}MB")
  235. print("\n")
  236. async def main():
  237. token = os.getenv("CIRCLECI_TOKEN")
  238. project_slug = os.getenv("CIRCLECI_PROJECT_SLUG")
  239. debug = os.getenv("DEBUG", "").lower() in ("true", "1", "yes")
  240. if not token or not project_slug:
  241. print("Error: Please set CIRCLECI_TOKEN and CIRCLECI_PROJECT_SLUG environment variables")
  242. return
  243. tracker = PackageSizeTracker(token, project_slug, debug)
  244. try:
  245. data = await tracker.collect_data()
  246. if not data:
  247. print("No data found!")
  248. return
  249. report_path = tracker.generate_report(data)
  250. if report_path:
  251. print(f"\nDetailed report available at: {report_path}")
  252. except Exception as e:
  253. logging.error(f"Error: {str(e)}")
  254. if debug:
  255. raise
  256. if __name__ == "__main__":
  257. asyncio.run(main())