sprint-econtai/pipeline/generators/projected_task_automation.py

from pathlib import Path
from typing import Generator, Dict, Tuple
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
from datetime import datetime
from ..utils import style_plot, LIME

def _generate_projection_data(
    metr_results: Dict,
    df: pd.DataFrame,
    percentile_key: str,
) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
    """
    Generates projection data for a given percentile key (e.g., 'p50_horizon_length').
    Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient.
    """
    # 1. Process METR data to get all model performance over time for the given percentile
    all_model_data = []
    for model_name, data in metr_results.get("results", {}).items():
        for agent_name, agent_data in data.get("agents", {}).items():
            release_date_str = data.get("release_date")
            horizon = agent_data.get(percentile_key, {}).get("estimate")

            if release_date_str and horizon is not None:
                unique_model_name = f"{model_name}-{agent_name}"
                all_model_data.append({
                    "model": unique_model_name,
                    "release_date": release_date_str,
                    "horizon_minutes": horizon,
                })

    if not all_model_data:
        print(f"Warning: No models with {percentile_key} found in METR data. Skipping.")
        return None

    metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
    metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])

    # 2. Perform log-linear regression on coherence over time
    metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
    if len(metr_df) < 2:
        print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.")
        return None

    metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
    log_y = np.log(metr_df['horizon_minutes'])
    x = metr_df['days_since_start']

    slope, intercept, r_value, _, _ = linregress(x, log_y)
    doubling_time_days = np.log(2) / slope
    print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days")

    # 3. Project coherence into the future
    start_date = metr_df['release_date'].min()
    future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
    future_days = (future_dates - start_date).days.to_numpy()

    projected_log_horizon = intercept + slope * future_days
    projected_horizon_minutes = np.exp(projected_log_horizon)

    projection_df = pd.DataFrame({
        "date": future_dates,
        "projected_coherence_minutes": projected_horizon_minutes,
    })

    # 4. Calculate the percentage of tasks automated over time based on our estimates
    total_tasks = len(df)
    if total_tasks == 0:
        return None

    for bound in ["lb", "mid", "ub"]:
        col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
        projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply(
            lambda h: (df[col_name] <= h).sum() / total_tasks * 100
        )

    metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply(
         lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100
    )

    return metr_df, projection_df


def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'):
    """Helper function to draw a single projection on a given axis."""
    # Plot the projected automation percentage
    ax.plot(
        projection_df["date"],
        projection_df["pct_automatable_mid"],
        label=f"Mid-point",
        color=color,
        linewidth=2.5,
        linestyle=line_style,
        zorder=3
    )
    ax.fill_between(
        projection_df["date"],
        projection_df["pct_automatable_lb"],
        projection_df["pct_automatable_ub"],
        color=color,
        alpha=0.15,
        label=f"Lower/upper bound range",
        zorder=2
    )
    # Plot the actual METR data points
    ax.scatter(
        metr_df['release_date'],
        metr_df['pct_automatable_mid'],
        color=color,
        edgecolor='black',
        s=60,
        zorder=4,
        label=f"Model with {label[1:]}% success rate"
    )


def generate_projected_task_automation_plot(
    output_dir: Path,
    metr_results: Dict,
    df: pd.DataFrame,
    **kwargs,
) -> Generator[Path, None, None]:
    """
    Generates plots projecting task automation based on METR's p50 and p80
    coherence data.
    """
    style_plot()

    p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length')
    p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length')

    # Plot P50 alone
    if p50_data:
        p50_metr_df, p50_proj_df = p50_data
        fig, ax = plt.subplots(figsize=(12, 8))
        _plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600'])
        ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20)
        ax.set_xlabel("Year")
        ax.set_ylabel("% of task automatable (50% success rate)")
        ax.set_ylim(0, 100.5)
        ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max())
        ax.grid(True, which="both", linestyle="--", linewidth=0.5)
        ax.legend(loc="upper left")
        plt.tight_layout()
        output_path = output_dir / "projected_task_automation_p50.png"
        plt.savefig(output_path)
        plt.close(fig)
        yield output_path

    # Plot P80 alone
    if p80_data:
        p80_metr_df, p80_proj_df = p80_data
        fig, ax = plt.subplots(figsize=(12, 8))
        _plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan')
        ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20)
        ax.set_xlabel("Year")
        ax.set_ylabel("% of Estimable Economic Tasks Automatable")
        ax.set_ylim(0, 100.5)
        ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max())
        ax.grid(True, which="both", linestyle="--", linewidth=0.5)
        ax.legend(loc="upper left")
        plt.tight_layout()
        output_path = output_dir / "projected_task_automation_p80.png"
        plt.savefig(output_path)
        plt.close(fig)
        yield output_path