168 lines
6.3 KiB
Python
168 lines
6.3 KiB
Python
from pathlib import Path
|
|
from typing import Generator, Dict, Tuple
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.stats import linregress
|
|
from datetime import datetime
|
|
from ..utils import style_plot, LIME
|
|
|
|
def _generate_projection_data(
|
|
metr_results: Dict,
|
|
df: pd.DataFrame,
|
|
percentile_key: str,
|
|
) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
|
|
"""
|
|
Generates projection data for a given percentile key (e.g., 'p50_horizon_length').
|
|
Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient.
|
|
"""
|
|
# 1. Process METR data to get all model performance over time for the given percentile
|
|
all_model_data = []
|
|
for model_name, data in metr_results.get("results", {}).items():
|
|
for agent_name, agent_data in data.get("agents", {}).items():
|
|
release_date_str = data.get("release_date")
|
|
horizon = agent_data.get(percentile_key, {}).get("estimate")
|
|
|
|
if release_date_str and horizon is not None:
|
|
unique_model_name = f"{model_name}-{agent_name}"
|
|
all_model_data.append({
|
|
"model": unique_model_name,
|
|
"release_date": release_date_str,
|
|
"horizon_minutes": horizon,
|
|
})
|
|
|
|
if not all_model_data:
|
|
print(f"Warning: No models with {percentile_key} found in METR data. Skipping.")
|
|
return None
|
|
|
|
metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
|
|
metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])
|
|
|
|
# 2. Perform log-linear regression on coherence over time
|
|
metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
|
|
if len(metr_df) < 2:
|
|
print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.")
|
|
return None
|
|
|
|
metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
|
|
log_y = np.log(metr_df['horizon_minutes'])
|
|
x = metr_df['days_since_start']
|
|
|
|
slope, intercept, r_value, _, _ = linregress(x, log_y)
|
|
doubling_time_days = np.log(2) / slope
|
|
print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days")
|
|
|
|
# 3. Project coherence into the future
|
|
start_date = metr_df['release_date'].min()
|
|
future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
|
|
future_days = (future_dates - start_date).days.to_numpy()
|
|
|
|
projected_log_horizon = intercept + slope * future_days
|
|
projected_horizon_minutes = np.exp(projected_log_horizon)
|
|
|
|
projection_df = pd.DataFrame({
|
|
"date": future_dates,
|
|
"projected_coherence_minutes": projected_horizon_minutes,
|
|
})
|
|
|
|
# 4. Calculate the percentage of tasks automated over time based on our estimates
|
|
total_tasks = len(df)
|
|
if total_tasks == 0:
|
|
return None
|
|
|
|
for bound in ["lb", "mid", "ub"]:
|
|
col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
|
|
projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply(
|
|
lambda h: (df[col_name] <= h).sum() / total_tasks * 100
|
|
)
|
|
|
|
metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply(
|
|
lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100
|
|
)
|
|
|
|
return metr_df, projection_df
|
|
|
|
|
|
def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'):
|
|
"""Helper function to draw a single projection on a given axis."""
|
|
# Plot the projected automation percentage
|
|
ax.plot(
|
|
projection_df["date"],
|
|
projection_df["pct_automatable_mid"],
|
|
label=f"Mid-point",
|
|
color=color,
|
|
linewidth=2.5,
|
|
linestyle=line_style,
|
|
zorder=3
|
|
)
|
|
ax.fill_between(
|
|
projection_df["date"],
|
|
projection_df["pct_automatable_lb"],
|
|
projection_df["pct_automatable_ub"],
|
|
color=color,
|
|
alpha=0.15,
|
|
label=f"Lower/upper bound range",
|
|
zorder=2
|
|
)
|
|
# Plot the actual METR data points
|
|
ax.scatter(
|
|
metr_df['release_date'],
|
|
metr_df['pct_automatable_mid'],
|
|
color=color,
|
|
edgecolor='black',
|
|
s=60,
|
|
zorder=4,
|
|
label=f"Model with {label[1:]}% success rate"
|
|
)
|
|
|
|
|
|
def generate_projected_task_automation_plot(
|
|
output_dir: Path,
|
|
metr_results: Dict,
|
|
df: pd.DataFrame,
|
|
**kwargs,
|
|
) -> Generator[Path, None, None]:
|
|
"""
|
|
Generates plots projecting task automation based on METR's p50 and p80
|
|
coherence data.
|
|
"""
|
|
style_plot()
|
|
|
|
p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length')
|
|
p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length')
|
|
|
|
# Plot P50 alone
|
|
if p50_data:
|
|
p50_metr_df, p50_proj_df = p50_data
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
|
_plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600'])
|
|
ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20)
|
|
ax.set_xlabel("Year")
|
|
ax.set_ylabel("% of task automatable (50% success rate)")
|
|
ax.set_ylim(0, 100.5)
|
|
ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max())
|
|
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
|
ax.legend(loc="upper left")
|
|
plt.tight_layout()
|
|
output_path = output_dir / "projected_task_automation_p50.png"
|
|
plt.savefig(output_path)
|
|
plt.close(fig)
|
|
yield output_path
|
|
|
|
# Plot P80 alone
|
|
if p80_data:
|
|
p80_metr_df, p80_proj_df = p80_data
|
|
fig, ax = plt.subplots(figsize=(12, 8))
|
|
_plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan')
|
|
ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20)
|
|
ax.set_xlabel("Year")
|
|
ax.set_ylabel("% of Estimable Economic Tasks Automatable")
|
|
ax.set_ylim(0, 100.5)
|
|
ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max())
|
|
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
|
ax.legend(loc="upper left")
|
|
plt.tight_layout()
|
|
output_path = output_dir / "projected_task_automation_p80.png"
|
|
plt.savefig(output_path)
|
|
plt.close(fig)
|
|
yield output_path
|