56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
from pathlib import Path
|
|
from typing import Generator
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import pandas as pd
|
|
from ..utils import OCCUPATION_MAJOR_CODES, style_plot
|
|
|
|
|
|
def generate_estimates_lower_vs_upper_scatter(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
|
|
"""
|
|
Generates a styled scatter plot of lower-bound vs upper-bound time estimates for tasks.
|
|
"""
|
|
style_plot()
|
|
OUTPUT_PATH = output_dir / "estimates_lower_vs_upper_scatter.png"
|
|
|
|
plot_df = df.copy()
|
|
# Replace onetsoc_major codes with their corresponding labels for the plot legend
|
|
plot_df['onetsoc_major'] = plot_df['onetsoc_major'].map(OCCUPATION_MAJOR_CODES)
|
|
|
|
fig, ax = plt.subplots(figsize=(12, 10))
|
|
sns.scatterplot(
|
|
data=plot_df,
|
|
x='lb_estimate_in_minutes',
|
|
y='ub_estimate_in_minutes',
|
|
alpha=0.3,
|
|
edgecolor=None,
|
|
hue="onetsoc_major",
|
|
ax=ax
|
|
)
|
|
|
|
# 45° reference line (y=x)
|
|
lims = (
|
|
min(df['lb_estimate_in_minutes'].min(), df['ub_estimate_in_minutes'].min()),
|
|
max(df['lb_estimate_in_minutes'].max(), df['ub_estimate_in_minutes'].max())
|
|
)
|
|
lims = (lims[0] * 0.9, lims[1] * 1.1)
|
|
ax.plot(lims, lims, color='black', linestyle='--', linewidth=1, zorder=0)
|
|
|
|
# Optional helper lines for ratios
|
|
for k in [2, 10, 100]:
|
|
ax.plot(lims, [k*l for l in lims],
|
|
linestyle=':', color='grey', linewidth=1, zorder=0)
|
|
|
|
ax.set_xscale('log')
|
|
ax.set_yscale('log')
|
|
ax.set_xlabel('Lower-bound (min, log scale)')
|
|
ax.set_ylabel('Upper-bound (min, log scale)')
|
|
ax.set_title('Lower vs Upper Estimates for All Tasks')
|
|
|
|
ax.legend(title="Occupation Major Group", bbox_to_anchor=(1.02, 1), loc='upper left')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(OUTPUT_PATH, bbox_inches='tight')
|
|
plt.close(fig)
|
|
|
|
yield OUTPUT_PATH
|