wip
This commit is contained in:
parent
62296e1b69
commit
65dc648797
37 changed files with 1413 additions and 2433 deletions
168
pipeline/generators/projected_task_automation.py
Normal file
168
pipeline/generators/projected_task_automation.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
from pathlib import Path
|
||||
from typing import Generator, Dict, Tuple
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import linregress
|
||||
from datetime import datetime
|
||||
from ..utils import style_plot, LIME
|
||||
|
||||
def _generate_projection_data(
|
||||
metr_results: Dict,
|
||||
df: pd.DataFrame,
|
||||
percentile_key: str,
|
||||
) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
|
||||
"""
|
||||
Generates projection data for a given percentile key (e.g., 'p50_horizon_length').
|
||||
Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient.
|
||||
"""
|
||||
# 1. Process METR data to get all model performance over time for the given percentile
|
||||
all_model_data = []
|
||||
for model_name, data in metr_results.get("results", {}).items():
|
||||
for agent_name, agent_data in data.get("agents", {}).items():
|
||||
release_date_str = data.get("release_date")
|
||||
horizon = agent_data.get(percentile_key, {}).get("estimate")
|
||||
|
||||
if release_date_str and horizon is not None:
|
||||
unique_model_name = f"{model_name}-{agent_name}"
|
||||
all_model_data.append({
|
||||
"model": unique_model_name,
|
||||
"release_date": release_date_str,
|
||||
"horizon_minutes": horizon,
|
||||
})
|
||||
|
||||
if not all_model_data:
|
||||
print(f"Warning: No models with {percentile_key} found in METR data. Skipping.")
|
||||
return None
|
||||
|
||||
metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
|
||||
metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])
|
||||
|
||||
# 2. Perform log-linear regression on coherence over time
|
||||
metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
|
||||
if len(metr_df) < 2:
|
||||
print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.")
|
||||
return None
|
||||
|
||||
metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
|
||||
log_y = np.log(metr_df['horizon_minutes'])
|
||||
x = metr_df['days_since_start']
|
||||
|
||||
slope, intercept, r_value, _, _ = linregress(x, log_y)
|
||||
doubling_time_days = np.log(2) / slope
|
||||
print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days")
|
||||
|
||||
# 3. Project coherence into the future
|
||||
start_date = metr_df['release_date'].min()
|
||||
future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
|
||||
future_days = (future_dates - start_date).days.to_numpy()
|
||||
|
||||
projected_log_horizon = intercept + slope * future_days
|
||||
projected_horizon_minutes = np.exp(projected_log_horizon)
|
||||
|
||||
projection_df = pd.DataFrame({
|
||||
"date": future_dates,
|
||||
"projected_coherence_minutes": projected_horizon_minutes,
|
||||
})
|
||||
|
||||
# 4. Calculate the percentage of tasks automated over time based on our estimates
|
||||
total_tasks = len(df)
|
||||
if total_tasks == 0:
|
||||
return None
|
||||
|
||||
for bound in ["lb", "mid", "ub"]:
|
||||
col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
|
||||
projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply(
|
||||
lambda h: (df[col_name] <= h).sum() / total_tasks * 100
|
||||
)
|
||||
|
||||
metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply(
|
||||
lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100
|
||||
)
|
||||
|
||||
return metr_df, projection_df
|
||||
|
||||
|
||||
def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'):
|
||||
"""Helper function to draw a single projection on a given axis."""
|
||||
# Plot the projected automation percentage
|
||||
ax.plot(
|
||||
projection_df["date"],
|
||||
projection_df["pct_automatable_mid"],
|
||||
label=f"Mid-point",
|
||||
color=color,
|
||||
linewidth=2.5,
|
||||
linestyle=line_style,
|
||||
zorder=3
|
||||
)
|
||||
ax.fill_between(
|
||||
projection_df["date"],
|
||||
projection_df["pct_automatable_lb"],
|
||||
projection_df["pct_automatable_ub"],
|
||||
color=color,
|
||||
alpha=0.15,
|
||||
label=f"Lower/upper bound range",
|
||||
zorder=2
|
||||
)
|
||||
# Plot the actual METR data points
|
||||
ax.scatter(
|
||||
metr_df['release_date'],
|
||||
metr_df['pct_automatable_mid'],
|
||||
color=color,
|
||||
edgecolor='black',
|
||||
s=60,
|
||||
zorder=4,
|
||||
label=f"Model with {label[1:]}% success rate"
|
||||
)
|
||||
|
||||
|
||||
def generate_projected_task_automation_plot(
|
||||
output_dir: Path,
|
||||
metr_results: Dict,
|
||||
df: pd.DataFrame,
|
||||
**kwargs,
|
||||
) -> Generator[Path, None, None]:
|
||||
"""
|
||||
Generates plots projecting task automation based on METR's p50 and p80
|
||||
coherence data.
|
||||
"""
|
||||
style_plot()
|
||||
|
||||
p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length')
|
||||
p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length')
|
||||
|
||||
# Plot P50 alone
|
||||
if p50_data:
|
||||
p50_metr_df, p50_proj_df = p50_data
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
_plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600'])
|
||||
ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20)
|
||||
ax.set_xlabel("Year")
|
||||
ax.set_ylabel("% of task automatable (50% success rate)")
|
||||
ax.set_ylim(0, 100.5)
|
||||
ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max())
|
||||
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
||||
ax.legend(loc="upper left")
|
||||
plt.tight_layout()
|
||||
output_path = output_dir / "projected_task_automation_p50.png"
|
||||
plt.savefig(output_path)
|
||||
plt.close(fig)
|
||||
yield output_path
|
||||
|
||||
# Plot P80 alone
|
||||
if p80_data:
|
||||
p80_metr_df, p80_proj_df = p80_data
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
_plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan')
|
||||
ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20)
|
||||
ax.set_xlabel("Year")
|
||||
ax.set_ylabel("% of Estimable Economic Tasks Automatable")
|
||||
ax.set_ylim(0, 100.5)
|
||||
ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max())
|
||||
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
||||
ax.legend(loc="upper left")
|
||||
plt.tight_layout()
|
||||
output_path = output_dir / "projected_task_automation_p80.png"
|
||||
plt.savefig(output_path)
|
||||
plt.close(fig)
|
||||
yield output_path
|
Loading…
Add table
Add a link
Reference in a new issue