This commit is contained in:
Félix Dorn 2025-07-15 00:34:54 +02:00
parent 62296e1b69
commit 65dc648797
37 changed files with 1413 additions and 2433 deletions

View file

@ -0,0 +1,39 @@
from pathlib import Path
from typing import Generator
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from ..utils import OCCUPATION_MAJOR_CODES, style_plot
def generate_estimate_spread_per_occupation(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
"""
Generates a styled boxplot of the estimate range spread per major occupation group.
"""
style_plot()
OUTPUT_PATH = output_dir / "estimates_spread_per_occupation.png"
fig, ax = plt.subplots(figsize=(10, 12))
sns.boxplot(
data=df,
x='onetsoc_major',
y='estimate_range',
showfliers=False,
ax=ax
)
ax.set_yscale('log')
ax.set_xlabel('Occupation')
ax.set_ylabel('Range (upper-lower, minutes)')
ax.set_title('Spread of time-range estimates per occupation')
# Get occupation labels from codes for x-axis ticks
labels = [OCCUPATION_MAJOR_CODES.get(code.get_text(), code.get_text()) for code in ax.get_xticklabels()]
ax.set_xticklabels(labels, rotation=60, ha='right')
plt.tight_layout()
plt.savefig(OUTPUT_PATH)
plt.close(fig)
yield OUTPUT_PATH