from pathlib import Path from typing import Generator import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from ..utils import OCCUPATION_MAJOR_CODES, style_plot def generate_estimates_lower_vs_upper_scatter(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]: """ Generates a styled scatter plot of lower-bound vs upper-bound time estimates for tasks. """ style_plot() OUTPUT_PATH = output_dir / "estimates_lower_vs_upper_scatter.png" plot_df = df.copy() # Replace onetsoc_major codes with their corresponding labels for the plot legend plot_df['onetsoc_major'] = plot_df['onetsoc_major'].map(OCCUPATION_MAJOR_CODES) fig, ax = plt.subplots(figsize=(12, 10)) sns.scatterplot( data=plot_df, x='lb_estimate_in_minutes', y='ub_estimate_in_minutes', alpha=0.3, edgecolor=None, hue="onetsoc_major", ax=ax ) # 45° reference line (y=x) lims = ( min(df['lb_estimate_in_minutes'].min(), df['ub_estimate_in_minutes'].min()), max(df['lb_estimate_in_minutes'].max(), df['ub_estimate_in_minutes'].max()) ) lims = (lims[0] * 0.9, lims[1] * 1.1) ax.plot(lims, lims, color='black', linestyle='--', linewidth=1, zorder=0) # Optional helper lines for ratios for k in [2, 10, 100]: ax.plot(lims, [k*l for l in lims], linestyle=':', color='grey', linewidth=1, zorder=0) ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('Lower-bound (min, log scale)') ax.set_ylabel('Upper-bound (min, log scale)') ax.set_title('Lower vs Upper Estimates for All Tasks') ax.legend(title="Occupation Major Group", bbox_to_anchor=(1.02, 1), loc='upper left') plt.tight_layout() plt.savefig(OUTPUT_PATH, bbox_inches='tight') plt.close(fig) yield OUTPUT_PATH