sprint-econtai/pipeline/generators/projected_task_automation.py
Félix Dorn 65dc648797 wip
2025-07-15 00:34:54 +02:00

168 lines
6.3 KiB
Python

from pathlib import Path
from typing import Generator, Dict, Tuple
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
from datetime import datetime
from ..utils import style_plot, LIME
def _generate_projection_data(
metr_results: Dict,
df: pd.DataFrame,
percentile_key: str,
) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
"""
Generates projection data for a given percentile key (e.g., 'p50_horizon_length').
Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient.
"""
# 1. Process METR data to get all model performance over time for the given percentile
all_model_data = []
for model_name, data in metr_results.get("results", {}).items():
for agent_name, agent_data in data.get("agents", {}).items():
release_date_str = data.get("release_date")
horizon = agent_data.get(percentile_key, {}).get("estimate")
if release_date_str and horizon is not None:
unique_model_name = f"{model_name}-{agent_name}"
all_model_data.append({
"model": unique_model_name,
"release_date": release_date_str,
"horizon_minutes": horizon,
})
if not all_model_data:
print(f"Warning: No models with {percentile_key} found in METR data. Skipping.")
return None
metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])
# 2. Perform log-linear regression on coherence over time
metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
if len(metr_df) < 2:
print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.")
return None
metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
log_y = np.log(metr_df['horizon_minutes'])
x = metr_df['days_since_start']
slope, intercept, r_value, _, _ = linregress(x, log_y)
doubling_time_days = np.log(2) / slope
print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days")
# 3. Project coherence into the future
start_date = metr_df['release_date'].min()
future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
future_days = (future_dates - start_date).days.to_numpy()
projected_log_horizon = intercept + slope * future_days
projected_horizon_minutes = np.exp(projected_log_horizon)
projection_df = pd.DataFrame({
"date": future_dates,
"projected_coherence_minutes": projected_horizon_minutes,
})
# 4. Calculate the percentage of tasks automated over time based on our estimates
total_tasks = len(df)
if total_tasks == 0:
return None
for bound in ["lb", "mid", "ub"]:
col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply(
lambda h: (df[col_name] <= h).sum() / total_tasks * 100
)
metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply(
lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100
)
return metr_df, projection_df
def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'):
"""Helper function to draw a single projection on a given axis."""
# Plot the projected automation percentage
ax.plot(
projection_df["date"],
projection_df["pct_automatable_mid"],
label=f"Mid-point",
color=color,
linewidth=2.5,
linestyle=line_style,
zorder=3
)
ax.fill_between(
projection_df["date"],
projection_df["pct_automatable_lb"],
projection_df["pct_automatable_ub"],
color=color,
alpha=0.15,
label=f"Lower/upper bound range",
zorder=2
)
# Plot the actual METR data points
ax.scatter(
metr_df['release_date'],
metr_df['pct_automatable_mid'],
color=color,
edgecolor='black',
s=60,
zorder=4,
label=f"Model with {label[1:]}% success rate"
)
def generate_projected_task_automation_plot(
output_dir: Path,
metr_results: Dict,
df: pd.DataFrame,
**kwargs,
) -> Generator[Path, None, None]:
"""
Generates plots projecting task automation based on METR's p50 and p80
coherence data.
"""
style_plot()
p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length')
p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length')
# Plot P50 alone
if p50_data:
p50_metr_df, p50_proj_df = p50_data
fig, ax = plt.subplots(figsize=(12, 8))
_plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600'])
ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20)
ax.set_xlabel("Year")
ax.set_ylabel("% of task automatable (50% success rate)")
ax.set_ylim(0, 100.5)
ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max())
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
ax.legend(loc="upper left")
plt.tight_layout()
output_path = output_dir / "projected_task_automation_p50.png"
plt.savefig(output_path)
plt.close(fig)
yield output_path
# Plot P80 alone
if p80_data:
p80_metr_df, p80_proj_df = p80_data
fig, ax = plt.subplots(figsize=(12, 8))
_plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan')
ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20)
ax.set_xlabel("Year")
ax.set_ylabel("% of Estimable Economic Tasks Automatable")
ax.set_ylim(0, 100.5)
ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max())
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
ax.legend(loc="upper left")
plt.tight_layout()
output_path = output_dir / "projected_task_automation_p80.png"
plt.savefig(output_path)
plt.close(fig)
yield output_path