28 lines
1 KiB
Python
28 lines
1 KiB
Python
"""
|
|
This module enriches data, they take time to run, and are usually expensive (API calls...),
|
|
they should manage their own state, and only be run if the data's version is different than
|
|
their save.
|
|
"""
|
|
from .run import Run
|
|
import pandas as pd
|
|
|
|
def enrich_with_task_estimateability(run: Run) -> pd.DataFrame:
|
|
"""
|
|
TODO: check run.cache_dir / computed_task_estimateability.parquet, if it exists, load it, return it, and don't compute this
|
|
|
|
call enrich with the right parameters, save the output to cache dir,
|
|
return it
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def enrich_with_task_estimates(run: Run) -> pd.DataFrame:
|
|
"""
|
|
TODO: check run.cache_dir / computed_task_estimates.parquet, if it exists, load it, return it, and don't compute this
|
|
|
|
call enrich with the right parameters, save the output to cache dir,
|
|
return it
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def enrich(model: str, system_prompt: str, schema: Any, rpm: int, chunk_size: int = 100, messages: Any):
|
|
raise NotImplementedError
|