sprint-econtai/pipeline/runner.py
Félix Dorn 62296e1b69 Feat: Implement task enrichment steps
Implement task estimateability and task estimate enrichment steps. Add a
`create_df_tasks` postprocessor.
2025-07-08 15:27:04 +02:00

74 lines
2.3 KiB
Python

from dotenv import load_dotenv
from .fetchers import fetch_oesm_data, fetch_epoch_remote_data, fetch_onet_database
from .enrichments import enrich_with_task_estimateability, enrich_with_task_estimates
from .postprocessors import check_for_insanity, create_df_tasks
from .generators import GENERATORS
from .run import Run
from .constants import GRAY
import argparse
import platformdirs
import seaborn as sns
import matplotlib as mpl
from pathlib import Path
from typing import Optional
CACHE_DIR = platformdirs.user_cache_dir("econtai")
def run(output_dir: Path | Optional[str] = None):
load_dotenv()
_setup_graph_rendering()
if output_dir is None:
output_dir = Path("dist/")
elif isinstance(output_dir, str):
output_dir = Path(output_dir).resolve()
output_dir.mkdir(parents=True, exist_ok=True)
current_run = Run(output_dir=output_dir, cache_dir=Path(CACHE_DIR).resolve())
current_run.cache_dir.mkdir(parents=True, exist_ok=True)
# Fetchers (fetchers.py)
current_run.onet_conn, current_run.onet_version = fetch_onet_database(current_run)
current_run.oesm_df, current_run.oesm_version = fetch_oesm_data(current_run)
current_run.epoch_df, current_run.epoch_version = fetch_epoch_remote_data(current_run)
current_run = create_df_tasks(current_run)
# Enrichments (enrichments.py)
current_run.task_estimateability_df = enrich_with_task_estimateability(current_run)
current_run.task_estimates_df = enrich_with_task_estimates(current_run)
# Postprocessors (postprocessors.py)
check_for_insanity(current_run)
# Generators (generators/)
for gen in GENERATORS:
gen(current_run)
def _setup_graph_rendering():
mpl.rcParams.update({
'figure.facecolor' : GRAY['50'],
'axes.facecolor' : GRAY['50'],
'axes.edgecolor' : GRAY['100'],
'axes.labelcolor' : GRAY['700'],
'xtick.color' : GRAY['700'],
'ytick.color' : GRAY['700'],
'font.family' : 'Inter',
'font.size' : 11,
})
sns.set_style("white")
def main():
parser = argparse.ArgumentParser(description="Run the econtai pipeline.")
parser.add_argument("--output-dir", type=str, help="The directory to write output files to.")
args = parser.parse_args()
run(output_dir=args.output_dir)
if __name__ == "__main__":
main()