sprint-econtai/pipeline/enrichments.py
2025-07-03 19:40:35 +02:00

28 lines
1 KiB
Python

"""
This module enriches data, they take time to run, and are usually expensive (API calls...),
they should manage their own state, and only be run if the data's version is different than
their save.
"""
from .run import Run
import pandas as pd
def enrich_with_task_estimateability(run: Run) -> pd.DataFrame:
"""
TODO: check run.cache_dir / computed_task_estimateability.parquet, if it exists, load it, return it, and don't compute this
call enrich with the right parameters, save the output to cache dir,
return it
"""
raise NotImplementedError
def enrich_with_task_estimates(run: Run) -> pd.DataFrame:
"""
TODO: check run.cache_dir / computed_task_estimates.parquet, if it exists, load it, return it, and don't compute this
call enrich with the right parameters, save the output to cache dir,
return it
"""
raise NotImplementedError
def enrich(model: str, system_prompt: str, schema: Any, rpm: int, chunk_size: int = 100, messages: Any):
raise NotImplementedError