old
This commit is contained in:
parent
720f21a85b
commit
43076bcbb1
42 changed files with 237415 additions and 7831 deletions
161
analysis/generators/task_breakdown_by_occupation.py
Normal file
161
analysis/generators/task_breakdown_by_occupation.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mtick
|
||||
import matplotlib.colors as mcolors
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import logging
|
||||
|
||||
# This mapping helps translate the O*NET 2-digit major group codes
|
||||
# into human-readable labels for the plot's y-axis.
|
||||
OCCUPATION_MAJOR_CODES = {
|
||||
'11': 'Management',
|
||||
'13': 'Business & Financial',
|
||||
'15': 'Computer & Mathematical',
|
||||
'17': 'Architecture & Engineering',
|
||||
'19': 'Life, Physical, & Social Science',
|
||||
'21': 'Community & Social Service',
|
||||
'23': 'Legal',
|
||||
'25': 'Education, Training, & Library',
|
||||
'27': 'Arts, Design, & Media',
|
||||
'29': 'Healthcare Practitioners',
|
||||
'31': 'Healthcare Support',
|
||||
'33': 'Protective Service',
|
||||
'35': 'Food Preparation & Serving',
|
||||
'37': 'Building & Grounds Maintenance',
|
||||
'39': 'Personal Care & Service',
|
||||
'41': 'Sales & Related',
|
||||
'43': 'Office & Admin Support',
|
||||
'45': 'Farming, Fishing, & Forestry',
|
||||
'47': 'Construction & Extraction',
|
||||
'49': 'Installation, Maintenance, & Repair',
|
||||
'51': 'Production',
|
||||
'53': 'Transportation & Material Moving',
|
||||
'55': 'Military Specific',
|
||||
}
|
||||
|
||||
# Define colors to match the original notebook's palette.
|
||||
# These are standard hex codes for gray and lime shades.
|
||||
BAR_COLORS = [
|
||||
'#D1D5DB', # gray-300
|
||||
'#84CC16', # lime-500
|
||||
'#D9F99D', # lime-200
|
||||
]
|
||||
|
||||
|
||||
def _get_contrasting_text_color(bg_color_hex):
|
||||
"""
|
||||
Determines if black or white text provides better contrast against a given background color.
|
||||
"""
|
||||
try:
|
||||
rgba = mcolors.to_rgba(bg_color_hex)
|
||||
# Calculate luminance (Y) using the sRGB formula
|
||||
luminance = 0.2126 * rgba[0] + 0.7152 * rgba[1] + 0.0722 * rgba[2]
|
||||
return 'black' if luminance > 0.55 else 'white'
|
||||
except ValueError:
|
||||
return 'black' # Default to black if color is invalid
|
||||
|
||||
|
||||
def generate(processed_df: pd.DataFrame):
|
||||
"""
|
||||
Generates a stacked bar chart breaking down tasks by remote status and estimability.
|
||||
|
||||
This corresponds to 'cell10' from the original analysis notebook. It shows,
|
||||
for each occupation, the percentage of tasks that are not remote, remote and
|
||||
estimable, or remote and not estimable.
|
||||
|
||||
Args:
|
||||
processed_df (pd.DataFrame): The preprocessed data. Expected columns:
|
||||
'onetsoc_major', 'remote_status', 'estimateable'.
|
||||
|
||||
Returns:
|
||||
Path: The path to the generated temporary image file, or None on failure.
|
||||
"""
|
||||
logging.info("Generating task breakdown by occupation plot...")
|
||||
|
||||
# --- Data Validation ---
|
||||
required_cols = ['onetsoc_major', 'remote_status', 'estimateable']
|
||||
if not all(col in processed_df.columns for col in required_cols):
|
||||
logging.error(f"Missing one or more required columns: {required_cols}. Cannot generate plot.")
|
||||
return None
|
||||
|
||||
df = processed_df.copy()
|
||||
|
||||
# --- Data Summarization ---
|
||||
summary_data = []
|
||||
for code, label in OCCUPATION_MAJOR_CODES.items():
|
||||
occ_df = df[df['onetsoc_major'] == code]
|
||||
total_tasks = len(occ_df)
|
||||
if total_tasks == 0:
|
||||
continue
|
||||
|
||||
not_remote_count = len(occ_df[occ_df['remote_status'] != 'remote'])
|
||||
remote_df = occ_df[occ_df['remote_status'] == 'remote']
|
||||
remote_atomic_count = len(remote_df[remote_df['estimateable'] == 'ATOMIC'])
|
||||
remote_ongoing_count = len(remote_df[remote_df['estimateable'] == 'ONGOING-CONSTRAINT'])
|
||||
|
||||
summary_data.append({
|
||||
'occupation_label': label,
|
||||
'count_not_remote': not_remote_count,
|
||||
'count_remote_atomic': remote_atomic_count,
|
||||
'count_remote_ongoing': remote_ongoing_count,
|
||||
'total_tasks': total_tasks
|
||||
})
|
||||
|
||||
if not summary_data:
|
||||
logging.warning("No data available to generate the task breakdown plot.")
|
||||
return None
|
||||
|
||||
summary_df = pd.DataFrame(summary_data)
|
||||
|
||||
# --- Percentage Calculation ---
|
||||
summary_df['pct_not_remote'] = (summary_df['count_not_remote'] / summary_df['total_tasks']) * 100
|
||||
summary_df['pct_remote_atomic'] = (summary_df['count_remote_atomic'] / summary_df['total_tasks']) * 100
|
||||
summary_df['pct_remote_ongoing'] = (summary_df['count_remote_ongoing'] / summary_df['total_tasks']) * 100
|
||||
|
||||
plot_df = summary_df.set_index('occupation_label')[
|
||||
['pct_not_remote', 'pct_remote_atomic', 'pct_remote_ongoing']
|
||||
]
|
||||
plot_df.columns = ['Not Remote', 'Remote & Estimable', 'Remote & Not Estimable']
|
||||
plot_df = plot_df.sort_values(by='Not Remote', ascending=False)
|
||||
|
||||
|
||||
# --- Plotting ---
|
||||
try:
|
||||
fig, ax = plt.subplots(figsize=(14, 10))
|
||||
plot_df.plot(kind='barh', stacked=True, ax=ax, color=BAR_COLORS, width=0.8)
|
||||
|
||||
ax.set_xlabel("Percentage of Tasks", fontsize=12)
|
||||
ax.set_ylabel("Occupation Major Group", fontsize=12)
|
||||
ax.set_title("Task Breakdown by Occupation, Remote Status, and Estimability", fontsize=16, pad=20)
|
||||
ax.xaxis.set_major_formatter(mtick.PercentFormatter())
|
||||
ax.set_xlim(0, 100)
|
||||
ax.spines['right'].set_visible(False)
|
||||
ax.spines['top'].set_visible(False)
|
||||
|
||||
# Add percentage labels inside each bar segment
|
||||
for i, container in enumerate(ax.containers):
|
||||
text_color = _get_contrasting_text_color(BAR_COLORS[i])
|
||||
for patch in container.patches:
|
||||
width = patch.get_width()
|
||||
if width > 3: # Only label segments wider than 3%
|
||||
x = patch.get_x() + width / 2
|
||||
y = patch.get_y() + patch.get_height() / 2
|
||||
ax.text(x, y, f"{width:.1f}%", ha='center', va='center',
|
||||
fontsize=8, color=text_color, fontweight='medium')
|
||||
|
||||
ax.legend(title="Task Category", bbox_to_anchor=(1.02, 1), loc='upper left', frameon=False)
|
||||
|
||||
# --- File Saving ---
|
||||
temp_dir = tempfile.gettempdir()
|
||||
temp_path = Path(temp_dir) / "task_breakdown_by_occupation.png"
|
||||
plt.savefig(temp_path, dpi=300, bbox_inches='tight')
|
||||
logging.info(f"Successfully saved plot to temporary file: {temp_path}")
|
||||
|
||||
return temp_path
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while generating the plot: {e}", exc_info=True)
|
||||
return None
|
||||
finally:
|
||||
plt.close()
|
Loading…
Add table
Add a link
Reference in a new issue