from pathlib import Path from typing import Generator, Dict, Tuple import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.stats import linregress from datetime import datetime from ..utils import style_plot, LIME def _generate_projection_data( metr_results: Dict, df: pd.DataFrame, percentile_key: str, ) -> Tuple[pd.DataFrame, pd.DataFrame] | None: """ Generates projection data for a given percentile key (e.g., 'p50_horizon_length'). Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient. """ # 1. Process METR data to get all model performance over time for the given percentile all_model_data = [] for model_name, data in metr_results.get("results", {}).items(): for agent_name, agent_data in data.get("agents", {}).items(): release_date_str = data.get("release_date") horizon = agent_data.get(percentile_key, {}).get("estimate") if release_date_str and horizon is not None: unique_model_name = f"{model_name}-{agent_name}" all_model_data.append({ "model": unique_model_name, "release_date": release_date_str, "horizon_minutes": horizon, }) if not all_model_data: print(f"Warning: No models with {percentile_key} found in METR data. Skipping.") return None metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True) metr_df['release_date'] = pd.to_datetime(metr_df['release_date']) # 2. Perform log-linear regression on coherence over time metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy() if len(metr_df) < 2: print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.") return None metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days log_y = np.log(metr_df['horizon_minutes']) x = metr_df['days_since_start'] slope, intercept, r_value, _, _ = linregress(x, log_y) doubling_time_days = np.log(2) / slope print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days") # 3. Project coherence into the future start_date = metr_df['release_date'].min() future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME")) future_days = (future_dates - start_date).days.to_numpy() projected_log_horizon = intercept + slope * future_days projected_horizon_minutes = np.exp(projected_log_horizon) projection_df = pd.DataFrame({ "date": future_dates, "projected_coherence_minutes": projected_horizon_minutes, }) # 4. Calculate the percentage of tasks automated over time based on our estimates total_tasks = len(df) if total_tasks == 0: return None for bound in ["lb", "mid", "ub"]: col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes' projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply( lambda h: (df[col_name] <= h).sum() / total_tasks * 100 ) metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply( lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100 ) return metr_df, projection_df def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'): """Helper function to draw a single projection on a given axis.""" # Plot the projected automation percentage ax.plot( projection_df["date"], projection_df["pct_automatable_mid"], label=f"Mid-point", color=color, linewidth=2.5, linestyle=line_style, zorder=3 ) ax.fill_between( projection_df["date"], projection_df["pct_automatable_lb"], projection_df["pct_automatable_ub"], color=color, alpha=0.15, label=f"Lower/upper bound range", zorder=2 ) # Plot the actual METR data points ax.scatter( metr_df['release_date'], metr_df['pct_automatable_mid'], color=color, edgecolor='black', s=60, zorder=4, label=f"Model with {label[1:]}% success rate" ) def generate_projected_task_automation_plot( output_dir: Path, metr_results: Dict, df: pd.DataFrame, **kwargs, ) -> Generator[Path, None, None]: """ Generates plots projecting task automation based on METR's p50 and p80 coherence data. """ style_plot() p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length') p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length') # Plot P50 alone if p50_data: p50_metr_df, p50_proj_df = p50_data fig, ax = plt.subplots(figsize=(12, 8)) _plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600']) ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20) ax.set_xlabel("Year") ax.set_ylabel("% of task automatable (50% success rate)") ax.set_ylim(0, 100.5) ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max()) ax.grid(True, which="both", linestyle="--", linewidth=0.5) ax.legend(loc="upper left") plt.tight_layout() output_path = output_dir / "projected_task_automation_p50.png" plt.savefig(output_path) plt.close(fig) yield output_path # Plot P80 alone if p80_data: p80_metr_df, p80_proj_df = p80_data fig, ax = plt.subplots(figsize=(12, 8)) _plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan') ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20) ax.set_xlabel("Year") ax.set_ylabel("% of Estimable Economic Tasks Automatable") ax.set_ylim(0, 100.5) ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max()) ax.grid(True, which="both", linestyle="--", linewidth=0.5) ax.legend(loc="upper left") plt.tight_layout() output_path = output_dir / "projected_task_automation_p80.png" plt.savefig(output_path) plt.close(fig) yield output_path