from pathlib import Path from typing import Generator import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from ..utils import OCCUPATION_MAJOR_CODES, style_plot def generate_estimate_spread_per_occupation(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]: """ Generates a styled boxplot of the estimate range spread per major occupation group. """ style_plot() OUTPUT_PATH = output_dir / "estimates_spread_per_occupation.png" fig, ax = plt.subplots(figsize=(10, 12)) sns.boxplot( data=df, x='onetsoc_major', y='estimate_range', showfliers=False, ax=ax ) ax.set_yscale('log') ax.set_xlabel('Occupation') ax.set_ylabel('Range (upper-lower, minutes)') ax.set_title('Spread of time-range estimates per occupation') # Get occupation labels from codes for x-axis ticks labels = [OCCUPATION_MAJOR_CODES.get(code.get_text(), code.get_text()) for code in ax.get_xticklabels()] ax.set_xticklabels(labels, rotation=60, ha='right') plt.tight_layout() plt.savefig(OUTPUT_PATH) plt.close(fig) yield OUTPUT_PATH