wip
This commit is contained in:
parent
62296e1b69
commit
65dc648797
37 changed files with 1413 additions and 2433 deletions
|
@ -1,5 +1,15 @@
|
|||
from .estimate_histplot import generate_estimate_histplot
|
||||
from .estimates_spread_per_occupation import generate_estimate_spread_per_occupation
|
||||
from .estimates_lower_vs_upper_scatter import generate_estimates_lower_vs_upper_scatter
|
||||
from .sequential_coherence_cdf import plot_sequential_coherence_cdf
|
||||
from .projected_automatable_wage_bill import generate_projected_automatable_wage_bill
|
||||
from .projected_task_automation import generate_projected_task_automation_plot
|
||||
|
||||
GENERATORS = [
|
||||
generate_estimate_histplot
|
||||
generate_estimate_histplot,
|
||||
generate_estimate_spread_per_occupation,
|
||||
generate_estimates_lower_vs_upper_scatter,
|
||||
#plot_sequential_coherence_cdf,
|
||||
generate_projected_automatable_wage_bill,
|
||||
generate_projected_task_automation_plot,
|
||||
]
|
||||
|
|
|
@ -1,6 +1,32 @@
|
|||
from ..run import Run
|
||||
from pathlib import Path
|
||||
from typing import Generator
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from ..utils import style_plot
|
||||
|
||||
def generate_estimate_histplot(run: Run) -> Generator[Path]:
|
||||
raise NotImplementedError
|
||||
def generate_estimate_histplot(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
|
||||
"""
|
||||
Generates a styled histogram of the distribution of midpoint time estimates.
|
||||
"""
|
||||
style_plot()
|
||||
OUTPUT_PATH = output_dir / "estimate_distribution_histplot.png"
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
sns.histplot(
|
||||
data=df,
|
||||
x='estimate_midpoint',
|
||||
log_scale=True,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
ax.set_xlabel("Task Time (minutes, log scale)")
|
||||
ax.set_ylabel("Number of Tasks")
|
||||
ax.set_title("Distribution of Time Estimates for Atomic Tasks")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(OUTPUT_PATH)
|
||||
plt.close(fig)
|
||||
|
||||
yield OUTPUT_PATH
|
||||
|
|
56
pipeline/generators/estimates_lower_vs_upper_scatter.py
Normal file
56
pipeline/generators/estimates_lower_vs_upper_scatter.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
from pathlib import Path
|
||||
from typing import Generator
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from ..utils import OCCUPATION_MAJOR_CODES, style_plot
|
||||
|
||||
|
||||
def generate_estimates_lower_vs_upper_scatter(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
|
||||
"""
|
||||
Generates a styled scatter plot of lower-bound vs upper-bound time estimates for tasks.
|
||||
"""
|
||||
style_plot()
|
||||
OUTPUT_PATH = output_dir / "estimates_lower_vs_upper_scatter.png"
|
||||
|
||||
plot_df = df.copy()
|
||||
# Replace onetsoc_major codes with their corresponding labels for the plot legend
|
||||
plot_df['onetsoc_major'] = plot_df['onetsoc_major'].map(OCCUPATION_MAJOR_CODES)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 10))
|
||||
sns.scatterplot(
|
||||
data=plot_df,
|
||||
x='lb_estimate_in_minutes',
|
||||
y='ub_estimate_in_minutes',
|
||||
alpha=0.3,
|
||||
edgecolor=None,
|
||||
hue="onetsoc_major",
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# 45° reference line (y=x)
|
||||
lims = (
|
||||
min(df['lb_estimate_in_minutes'].min(), df['ub_estimate_in_minutes'].min()),
|
||||
max(df['lb_estimate_in_minutes'].max(), df['ub_estimate_in_minutes'].max())
|
||||
)
|
||||
lims = (lims[0] * 0.9, lims[1] * 1.1)
|
||||
ax.plot(lims, lims, color='black', linestyle='--', linewidth=1, zorder=0)
|
||||
|
||||
# Optional helper lines for ratios
|
||||
for k in [2, 10, 100]:
|
||||
ax.plot(lims, [k*l for l in lims],
|
||||
linestyle=':', color='grey', linewidth=1, zorder=0)
|
||||
|
||||
ax.set_xscale('log')
|
||||
ax.set_yscale('log')
|
||||
ax.set_xlabel('Lower-bound (min, log scale)')
|
||||
ax.set_ylabel('Upper-bound (min, log scale)')
|
||||
ax.set_title('Lower vs Upper Estimates for All Tasks')
|
||||
|
||||
ax.legend(title="Occupation Major Group", bbox_to_anchor=(1.02, 1), loc='upper left')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(OUTPUT_PATH, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
yield OUTPUT_PATH
|
39
pipeline/generators/estimates_spread_per_occupation.py
Normal file
39
pipeline/generators/estimates_spread_per_occupation.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from pathlib import Path
|
||||
from typing import Generator
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from ..utils import OCCUPATION_MAJOR_CODES, style_plot
|
||||
|
||||
|
||||
def generate_estimate_spread_per_occupation(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
|
||||
"""
|
||||
Generates a styled boxplot of the estimate range spread per major occupation group.
|
||||
"""
|
||||
style_plot()
|
||||
OUTPUT_PATH = output_dir / "estimates_spread_per_occupation.png"
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 12))
|
||||
|
||||
sns.boxplot(
|
||||
data=df,
|
||||
x='onetsoc_major',
|
||||
y='estimate_range',
|
||||
showfliers=False,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
ax.set_yscale('log')
|
||||
ax.set_xlabel('Occupation')
|
||||
ax.set_ylabel('Range (upper-lower, minutes)')
|
||||
ax.set_title('Spread of time-range estimates per occupation')
|
||||
|
||||
# Get occupation labels from codes for x-axis ticks
|
||||
labels = [OCCUPATION_MAJOR_CODES.get(code.get_text(), code.get_text()) for code in ax.get_xticklabels()]
|
||||
ax.set_xticklabels(labels, rotation=60, ha='right')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(OUTPUT_PATH)
|
||||
plt.close(fig)
|
||||
|
||||
yield OUTPUT_PATH
|
|
@ -1,6 +0,0 @@
|
|||
import pandas as pd
|
||||
from typings import List
|
||||
|
||||
def must_have_columns(df: pd.DataFrame, columns: List[str]):
|
||||
if not all(col in df.columns for col in columns):
|
||||
raise ValueError(f"DataFrame is missing required columns: {columns}")
|
229
pipeline/generators/projected_automatable_wage_bill.py
Normal file
229
pipeline/generators/projected_automatable_wage_bill.py
Normal file
|
@ -0,0 +1,229 @@
|
|||
from pathlib import Path
|
||||
from typing import Generator, Dict, Tuple, Optional
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mticker
|
||||
from scipy.stats import linregress
|
||||
from datetime import datetime
|
||||
from ..utils import style_plot, LIME
|
||||
|
||||
def _generate_wage_projection_data(
|
||||
metr_results: Dict,
|
||||
df_with_wages: pd.DataFrame,
|
||||
percentile_key: str,
|
||||
doubling_time_modifier: float,
|
||||
) -> Optional[Tuple[pd.DataFrame, pd.DataFrame, float]]:
|
||||
"""
|
||||
Generates wage projection data for different AI progress scenarios.
|
||||
|
||||
Args:
|
||||
metr_results: The METR benchmark data.
|
||||
df_with_wages: DataFrame containing tasks with their estimated wage value.
|
||||
percentile_key: The percentile to use from METR data (e.g., 'p50_horizon_length').
|
||||
doubling_time_modifier: Multiplier for the doubling time (e.g., 1.0 for baseline,
|
||||
0.5 for optimistic, 2.0 for pessimistic).
|
||||
|
||||
Returns:
|
||||
A tuple of (metr_df, projection_df, doubling_time_days), or None if data is insufficient.
|
||||
"""
|
||||
all_model_data = []
|
||||
for model_name, data in metr_results.get("results", {}).items():
|
||||
for agent_name, agent_data in data.get("agents", {}).items():
|
||||
release_date_str = data.get("release_date")
|
||||
horizon = agent_data.get(percentile_key, {}).get("estimate")
|
||||
if release_date_str and horizon is not None:
|
||||
all_model_data.append({
|
||||
"release_date": release_date_str,
|
||||
"horizon_minutes": horizon,
|
||||
})
|
||||
|
||||
if not all_model_data:
|
||||
return None
|
||||
|
||||
metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
|
||||
metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])
|
||||
metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
|
||||
|
||||
if len(metr_df) < 2:
|
||||
return None
|
||||
|
||||
metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
|
||||
log_y = np.log(metr_df['horizon_minutes'])
|
||||
slope, intercept, r_value, _, _ = linregress(metr_df['days_since_start'], log_y)
|
||||
|
||||
# Apply the scenario modifier to the doubling time
|
||||
base_doubling_time_days = np.log(2) / slope
|
||||
modified_doubling_time_days = base_doubling_time_days * doubling_time_modifier
|
||||
modified_slope = np.log(2) / modified_doubling_time_days
|
||||
|
||||
start_date = metr_df['release_date'].min()
|
||||
future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
|
||||
future_days = (future_dates - start_date).days.to_numpy()
|
||||
|
||||
projected_log_horizon = intercept + modified_slope * future_days
|
||||
projected_horizon_minutes = np.exp(projected_log_horizon)
|
||||
|
||||
projection_df = pd.DataFrame({
|
||||
"date": future_dates,
|
||||
"projected_coherence_minutes": projected_horizon_minutes,
|
||||
})
|
||||
|
||||
# Calculate the total wage bill of tasks automated over time
|
||||
for bound in ["lb", "mid", "ub"]:
|
||||
col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
|
||||
projection_df[f"automatable_wage_bill_{bound}"] = projection_df["projected_coherence_minutes"].apply(
|
||||
lambda h: df_with_wages.loc[df_with_wages[col_name] <= h, 'wage_per_task'].sum()
|
||||
)
|
||||
|
||||
# Also calculate for the actual METR data points for plotting
|
||||
metr_df["automatable_wage_bill_mid"] = metr_df["horizon_minutes"].apply(
|
||||
lambda h: df_with_wages.loc[df_with_wages['estimate_midpoint'] <= h, 'wage_per_task'].sum()
|
||||
)
|
||||
|
||||
return metr_df, projection_df, modified_doubling_time_days
|
||||
|
||||
|
||||
def _plot_scenario(ax, projection_df, metr_df, label, color, line_style='-'):
|
||||
"""Helper function to draw a single projection scenario on a given axis."""
|
||||
# Plot the projected wage bill
|
||||
ax.plot(
|
||||
projection_df["date"],
|
||||
projection_df["automatable_wage_bill_mid"],
|
||||
label=label,
|
||||
color=color,
|
||||
linewidth=2.5,
|
||||
linestyle=line_style,
|
||||
zorder=3
|
||||
)
|
||||
# Plot the shaded range for lower/upper bounds
|
||||
ax.fill_between(
|
||||
projection_df["date"],
|
||||
projection_df["automatable_wage_bill_lb"],
|
||||
projection_df["automatable_wage_bill_ub"],
|
||||
color=color,
|
||||
alpha=0.15,
|
||||
zorder=2
|
||||
)
|
||||
# Plot the actual METR data points against the wage bill
|
||||
ax.scatter(
|
||||
metr_df['release_date'],
|
||||
metr_df['automatable_wage_bill_mid'],
|
||||
color=color,
|
||||
edgecolor='black',
|
||||
s=60,
|
||||
zorder=4,
|
||||
label=f"Model Capabilities (P50)"
|
||||
)
|
||||
|
||||
|
||||
def generate_projected_automatable_wage_bill(
|
||||
output_dir: Path,
|
||||
df: pd.DataFrame,
|
||||
task_summary_by_occupation_df: pd.DataFrame,
|
||||
metr_results: Dict,
|
||||
**kwargs,
|
||||
) -> Generator[Path, None, None]:
|
||||
"""
|
||||
Generates a plot projecting the automatable wage bill under different
|
||||
AI progress scenarios (optimistic, baseline, pessimistic).
|
||||
"""
|
||||
style_plot()
|
||||
OUTPUT_PATH = output_dir / "projected_automatable_wage_bill_sensitivity.png"
|
||||
|
||||
# 1. Calculate wage_per_task for each occupation
|
||||
wage_bill_info = task_summary_by_occupation_df[['onetsoc_code', 'wage_bill', 'total_tasks']].copy()
|
||||
wage_bill_info['wage_per_task'] = wage_bill_info['wage_bill'] / wage_bill_info['total_tasks']
|
||||
wage_bill_info.replace([np.inf, -np.inf], 0, inplace=True) # Avoid division by zero issues
|
||||
wage_bill_info.drop(columns=['wage_bill', 'total_tasks'], inplace=True)
|
||||
|
||||
# 2. Merge wage_per_task into the main task dataframe
|
||||
df_with_wages = pd.merge(df, wage_bill_info, on='onetsoc_code', how='left')
|
||||
df_with_wages['wage_per_task'].fillna(0, inplace=True)
|
||||
|
||||
# 3. Generate data for all three scenarios
|
||||
scenarios = {
|
||||
"Optimistic": {"modifier": 0.5, "color": "tab:green", "style": "--"},
|
||||
"Baseline": {"modifier": 1.0, "color": LIME['600'], "style": "-"},
|
||||
"Pessimistic": {"modifier": 2.0, "color": "tab:red", "style": ":"},
|
||||
}
|
||||
|
||||
projection_results = {}
|
||||
for name, config in scenarios.items():
|
||||
result = _generate_wage_projection_data(metr_results, df_with_wages, 'p50_horizon_length', config['modifier'])
|
||||
if result:
|
||||
projection_results[name] = result
|
||||
|
||||
if not projection_results:
|
||||
print("Warning: Could not generate any projection data. Skipping wage bill plot.")
|
||||
return
|
||||
|
||||
# 4. Create the plot
|
||||
fig, ax = plt.subplots(figsize=(14, 9))
|
||||
|
||||
# We only need to plot the scatter points once, let's use the baseline ones.
|
||||
if "Baseline" in projection_results:
|
||||
metr_df, _, _ = projection_results["Baseline"]
|
||||
ax.scatter(
|
||||
metr_df['release_date'],
|
||||
metr_df['automatable_wage_bill_mid'],
|
||||
color='black',
|
||||
s=80,
|
||||
zorder=5,
|
||||
label=f"Model Capabilities (P50)"
|
||||
)
|
||||
|
||||
|
||||
legend_lines = []
|
||||
for name, (metr_df, proj_df, doubling_time) in projection_results.items():
|
||||
config = scenarios[name]
|
||||
ax.plot(
|
||||
proj_df["date"],
|
||||
proj_df["automatable_wage_bill_mid"],
|
||||
color=config['color'],
|
||||
linestyle=config['style'],
|
||||
linewidth=2.5,
|
||||
zorder=3
|
||||
)
|
||||
ax.fill_between(
|
||||
proj_df["date"],
|
||||
proj_df["automatable_wage_bill_lb"],
|
||||
proj_df["automatable_wage_bill_ub"],
|
||||
color=config['color'],
|
||||
alpha=0.15,
|
||||
zorder=2
|
||||
)
|
||||
# Create a custom line for the legend
|
||||
line = plt.Line2D([0], [0], color=config['color'], linestyle=config['style'], lw=2.5,
|
||||
label=f'{name} (Doubling Time: {doubling_time:.0f} days)')
|
||||
legend_lines.append(line)
|
||||
|
||||
|
||||
# 5. Styling and annotations
|
||||
ax.set_title("Projected Automatable Wage Bill (P50 Coherence)", fontsize=18, pad=20)
|
||||
ax.set_xlabel("Year", fontsize=12)
|
||||
ax.set_ylabel("Automatable Annual Wage Bill (Trillions of USD)", fontsize=12)
|
||||
|
||||
# Format Y-axis to show trillions
|
||||
def trillions_formatter(x, pos):
|
||||
return f'${x / 1e12:.1f}T'
|
||||
ax.yaxis.set_major_formatter(mticker.FuncFormatter(trillions_formatter))
|
||||
|
||||
total_wage_bill = df_with_wages['wage_per_task'].sum()
|
||||
ax.set_ylim(0, total_wage_bill * 1.05)
|
||||
|
||||
if "Baseline" in projection_results:
|
||||
_, proj_df, _ = projection_results["Baseline"]
|
||||
ax.set_xlim(datetime(2022, 1, 1), proj_df["date"].max())
|
||||
|
||||
# Create the legend from the custom lines and the scatter plot
|
||||
scatter_legend = ax.get_legend_handles_labels()[0]
|
||||
ax.legend(handles=legend_lines + scatter_legend, loc="upper left", fontsize=11)
|
||||
|
||||
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
||||
plt.tight_layout()
|
||||
plt.savefig(OUTPUT_PATH)
|
||||
plt.close(fig)
|
||||
|
||||
print(f"Generated sensitivity analysis plot: {OUTPUT_PATH}")
|
||||
yield OUTPUT_PATH
|
168
pipeline/generators/projected_task_automation.py
Normal file
168
pipeline/generators/projected_task_automation.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
from pathlib import Path
|
||||
from typing import Generator, Dict, Tuple
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.stats import linregress
|
||||
from datetime import datetime
|
||||
from ..utils import style_plot, LIME
|
||||
|
||||
def _generate_projection_data(
|
||||
metr_results: Dict,
|
||||
df: pd.DataFrame,
|
||||
percentile_key: str,
|
||||
) -> Tuple[pd.DataFrame, pd.DataFrame] | None:
|
||||
"""
|
||||
Generates projection data for a given percentile key (e.g., 'p50_horizon_length').
|
||||
Returns a tuple of (metr_df_with_pct, projection_df), or None if data is insufficient.
|
||||
"""
|
||||
# 1. Process METR data to get all model performance over time for the given percentile
|
||||
all_model_data = []
|
||||
for model_name, data in metr_results.get("results", {}).items():
|
||||
for agent_name, agent_data in data.get("agents", {}).items():
|
||||
release_date_str = data.get("release_date")
|
||||
horizon = agent_data.get(percentile_key, {}).get("estimate")
|
||||
|
||||
if release_date_str and horizon is not None:
|
||||
unique_model_name = f"{model_name}-{agent_name}"
|
||||
all_model_data.append({
|
||||
"model": unique_model_name,
|
||||
"release_date": release_date_str,
|
||||
"horizon_minutes": horizon,
|
||||
})
|
||||
|
||||
if not all_model_data:
|
||||
print(f"Warning: No models with {percentile_key} found in METR data. Skipping.")
|
||||
return None
|
||||
|
||||
metr_df = pd.DataFrame(all_model_data).sort_values("release_date").reset_index(drop=True)
|
||||
metr_df['release_date'] = pd.to_datetime(metr_df['release_date'])
|
||||
|
||||
# 2. Perform log-linear regression on coherence over time
|
||||
metr_df = metr_df[metr_df['horizon_minutes'] > 0].copy()
|
||||
if len(metr_df) < 2:
|
||||
print(f"Warning: Not enough data points for regression for {percentile_key}. Skipping.")
|
||||
return None
|
||||
|
||||
metr_df['days_since_start'] = (metr_df['release_date'] - metr_df['release_date'].min()).dt.days
|
||||
log_y = np.log(metr_df['horizon_minutes'])
|
||||
x = metr_df['days_since_start']
|
||||
|
||||
slope, intercept, r_value, _, _ = linregress(x, log_y)
|
||||
doubling_time_days = np.log(2) / slope
|
||||
print(f"METR all models {percentile_key} trend: R^2 = {r_value**2:.2f}, Doubling time = {doubling_time_days:.1f} days")
|
||||
|
||||
# 3. Project coherence into the future
|
||||
start_date = metr_df['release_date'].min()
|
||||
future_dates = pd.to_datetime(pd.date_range(start=start_date, end="2035-01-01", freq="ME"))
|
||||
future_days = (future_dates - start_date).days.to_numpy()
|
||||
|
||||
projected_log_horizon = intercept + slope * future_days
|
||||
projected_horizon_minutes = np.exp(projected_log_horizon)
|
||||
|
||||
projection_df = pd.DataFrame({
|
||||
"date": future_dates,
|
||||
"projected_coherence_minutes": projected_horizon_minutes,
|
||||
})
|
||||
|
||||
# 4. Calculate the percentage of tasks automated over time based on our estimates
|
||||
total_tasks = len(df)
|
||||
if total_tasks == 0:
|
||||
return None
|
||||
|
||||
for bound in ["lb", "mid", "ub"]:
|
||||
col_name = 'estimate_midpoint' if bound == 'mid' else f'{bound}_estimate_in_minutes'
|
||||
projection_df[f"pct_automatable_{bound}"] = projection_df["projected_coherence_minutes"].apply(
|
||||
lambda h: (df[col_name] <= h).sum() / total_tasks * 100
|
||||
)
|
||||
|
||||
metr_df["pct_automatable_mid"] = metr_df["horizon_minutes"].apply(
|
||||
lambda h: (df['estimate_midpoint'] <= h).sum() / total_tasks * 100
|
||||
)
|
||||
|
||||
return metr_df, projection_df
|
||||
|
||||
|
||||
def _plot_projection(ax, projection_df, metr_df, label, color, line_style='-'):
|
||||
"""Helper function to draw a single projection on a given axis."""
|
||||
# Plot the projected automation percentage
|
||||
ax.plot(
|
||||
projection_df["date"],
|
||||
projection_df["pct_automatable_mid"],
|
||||
label=f"Mid-point",
|
||||
color=color,
|
||||
linewidth=2.5,
|
||||
linestyle=line_style,
|
||||
zorder=3
|
||||
)
|
||||
ax.fill_between(
|
||||
projection_df["date"],
|
||||
projection_df["pct_automatable_lb"],
|
||||
projection_df["pct_automatable_ub"],
|
||||
color=color,
|
||||
alpha=0.15,
|
||||
label=f"Lower/upper bound range",
|
||||
zorder=2
|
||||
)
|
||||
# Plot the actual METR data points
|
||||
ax.scatter(
|
||||
metr_df['release_date'],
|
||||
metr_df['pct_automatable_mid'],
|
||||
color=color,
|
||||
edgecolor='black',
|
||||
s=60,
|
||||
zorder=4,
|
||||
label=f"Model with {label[1:]}% success rate"
|
||||
)
|
||||
|
||||
|
||||
def generate_projected_task_automation_plot(
|
||||
output_dir: Path,
|
||||
metr_results: Dict,
|
||||
df: pd.DataFrame,
|
||||
**kwargs,
|
||||
) -> Generator[Path, None, None]:
|
||||
"""
|
||||
Generates plots projecting task automation based on METR's p50 and p80
|
||||
coherence data.
|
||||
"""
|
||||
style_plot()
|
||||
|
||||
p50_data = _generate_projection_data(metr_results, df, 'p50_horizon_length')
|
||||
p80_data = _generate_projection_data(metr_results, df, 'p80_horizon_length')
|
||||
|
||||
# Plot P50 alone
|
||||
if p50_data:
|
||||
p50_metr_df, p50_proj_df = p50_data
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
_plot_projection(ax, p50_proj_df, p50_metr_df, "P50", LIME['600'])
|
||||
ax.set_title("How long before sequential coherence stops being a bottleneck?", fontsize=16, pad=20)
|
||||
ax.set_xlabel("Year")
|
||||
ax.set_ylabel("% of task automatable (50% success rate)")
|
||||
ax.set_ylim(0, 100.5)
|
||||
ax.set_xlim(datetime(2022, 1, 1), p50_proj_df["date"].max())
|
||||
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
||||
ax.legend(loc="upper left")
|
||||
plt.tight_layout()
|
||||
output_path = output_dir / "projected_task_automation_p50.png"
|
||||
plt.savefig(output_path)
|
||||
plt.close(fig)
|
||||
yield output_path
|
||||
|
||||
# Plot P80 alone
|
||||
if p80_data:
|
||||
p80_metr_df, p80_proj_df = p80_data
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
_plot_projection(ax, p80_proj_df, p80_metr_df, "P80", 'tab:cyan')
|
||||
ax.set_title("Projected Task Automation (P80 AI Coherence)", fontsize=16, pad=20)
|
||||
ax.set_xlabel("Year")
|
||||
ax.set_ylabel("% of Estimable Economic Tasks Automatable")
|
||||
ax.set_ylim(0, 100.5)
|
||||
ax.set_xlim(datetime(2022, 1, 1), p80_proj_df["date"].max())
|
||||
ax.grid(True, which="both", linestyle="--", linewidth=0.5)
|
||||
ax.legend(loc="upper left")
|
||||
plt.tight_layout()
|
||||
output_path = output_dir / "projected_task_automation_p80.png"
|
||||
plt.savefig(output_path)
|
||||
plt.close(fig)
|
||||
yield output_path
|
54
pipeline/generators/sequential_coherence_cdf.py
Normal file
54
pipeline/generators/sequential_coherence_cdf.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mtick
|
||||
from ..utils import LIME, style_plot
|
||||
|
||||
def plot_sequential_coherence_cdf(output_dir: Path, df: pd.DataFrame, **kwargs):
|
||||
style_plot()
|
||||
output_path = output_dir / "sequential_coherence_cdf.png"
|
||||
|
||||
def cdf(series):
|
||||
"""Helper function to calculate CDF data."""
|
||||
s = series.sort_values().reset_index(drop=True)
|
||||
# Calculate cumulative percentage
|
||||
return s.values, ((s.index + 1) / len(s)) * 100
|
||||
|
||||
# Calculate CDF for lower, upper, and midpoint estimates
|
||||
x_lb, y_lb = cdf(df['lb_estimate_in_minutes'])
|
||||
x_ub, y_ub = cdf(df['ub_estimate_in_minutes'])
|
||||
x_mid, y_mid = cdf(df['estimate_midpoint'])
|
||||
|
||||
# Create the plot
|
||||
fig, ax = plt.subplots(figsize=(12, 7))
|
||||
|
||||
# Plot the CDFs as step plots
|
||||
ax.step(x_lb, y_lb, where='post', color=LIME['300'], linewidth=1.8, linestyle='--', zorder=2, label='Lower bound estimate')
|
||||
ax.step(x_ub, y_ub, where='post', color=LIME['900'], linewidth=1.8, linestyle=':', zorder=3, label='Upper bound estimate')
|
||||
ax.step(x_mid, y_mid, where='post', color=LIME['600'], linewidth=2.2, zorder=4, label='Mid-point')
|
||||
|
||||
# --- Styling and Annotations ---
|
||||
ax.set_xscale('log')
|
||||
ax.set_ylim(0, 100)
|
||||
ax.yaxis.set_major_formatter(mtick.PercentFormatter(decimals=0))
|
||||
|
||||
# Set titles and labels using the standard axes methods
|
||||
ax.set_title("% of Tasks With Sequential Coherence ≤ X")
|
||||
ax.set_xlabel("Sequential Coherence (X)")
|
||||
ax.set_ylabel("Cumulative Percentage of Tasks")
|
||||
|
||||
# Define custom x-axis ticks and labels for better readability
|
||||
ticks = [1, 5, 10, 30, 60, 120, 240, 480, 1440, 2880, 10080, 43200, 129600, 259200, 525600]
|
||||
ticklabels = ['1 min', '5 min', '10 min', '30 min', '1 hr', '2 hr', '4 hr', '8 hr', '1 day', '2 days',
|
||||
'1 wk', '30 days', '90 days', '180 days', '1 yr']
|
||||
ax.set_xticks(ticks)
|
||||
ax.set_xticklabels(ticklabels, rotation=45, ha='right')
|
||||
|
||||
ax.legend(loc='lower right')
|
||||
|
||||
# --- Save and close ---
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
|
||||
yield output_path
|
Loading…
Add table
Add a link
Reference in a new issue