sprint-econtai/pipeline/enrichments.py

"""
This module enriches data, they take time to run, and are usually expensive (API calls...),
they should manage their own state, and only be run if the data's version is different than
their save.
"""
from  .run  import Run
import pandas as pd

def enrich_with_task_estimateability(run: Run) -> pd.DataFrame:
    """
    TODO: check run.cache_dir / computed_task_estimateability.parquet, if it exists, load it, return it, and don't compute this

    call enrich with the right parameters, save the output to cache dir,
     return it
    """
    raise NotImplementedError

def enrich_with_task_estimates(run: Run) -> pd.DataFrame:
    """
    TODO: check run.cache_dir / computed_task_estimates.parquet, if it exists, load it, return it, and don't compute this

    call enrich with the right parameters, save the output to cache dir,
     return it
    """
    raise NotImplementedError

def enrich(model: str, system_prompt: str, schema: Any, rpm: int, chunk_size: int = 100, messages: Any):
    raise NotImplementedError