39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
from pathlib import Path
|
|
from typing import Generator
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import pandas as pd
|
|
from ..utils import OCCUPATION_MAJOR_CODES, style_plot
|
|
|
|
|
|
def generate_estimate_spread_per_occupation(output_dir: Path, df: pd.DataFrame, **kwargs) -> Generator[Path]:
|
|
"""
|
|
Generates a styled boxplot of the estimate range spread per major occupation group.
|
|
"""
|
|
style_plot()
|
|
OUTPUT_PATH = output_dir / "estimates_spread_per_occupation.png"
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 12))
|
|
|
|
sns.boxplot(
|
|
data=df,
|
|
x='onetsoc_major',
|
|
y='estimate_range',
|
|
showfliers=False,
|
|
ax=ax
|
|
)
|
|
|
|
ax.set_yscale('log')
|
|
ax.set_xlabel('Occupation')
|
|
ax.set_ylabel('Range (upper-lower, minutes)')
|
|
ax.set_title('Spread of time-range estimates per occupation')
|
|
|
|
# Get occupation labels from codes for x-axis ticks
|
|
labels = [OCCUPATION_MAJOR_CODES.get(code.get_text(), code.get_text()) for code in ax.get_xticklabels()]
|
|
ax.set_xticklabels(labels, rotation=60, ha='right')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(OUTPUT_PATH)
|
|
plt.close(fig)
|
|
|
|
yield OUTPUT_PATH
|