sprint-econtai/analysis/generate.py
Félix Dorn 43076bcbb1 old
2025-07-15 00:41:05 +02:00

76 lines
3.2 KiB
Python

import importlib
import logging
import pkgutil
import shutil
from pathlib import Path
# The final destination for all generated outputs
DIST_DIR = Path("dist")
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def create_all_outputs(processed_df):
"""
Dynamically discovers, imports, and runs all output generators.
This function iterates through all modules in the 'analysis.generators'
package. For each module, it assumes there is a 'generate(data)' function,
which it calls with the provided preprocessed DataFrame.
The generator function is expected to save its output to a temporary file
and return the path to that file. This function then moves the output
to the 'dist/' directory.
Args:
processed_df (pd.DataFrame): The fully preprocessed data to be used
by the generator functions.
"""
logging.info("Starting output generation...")
DIST_DIR.mkdir(exist_ok=True)
logging.info(f"Output directory is '{DIST_DIR.resolve()}'")
# Path to the generators package
from . import generators as generators_package
generators_path = generators_package.__path__
generators_prefix = generators_package.__name__ + "."
generated_files_count = 0
# Discover and run all modules in the generators package
for _, module_name, _ in pkgutil.iter_modules(generators_path, prefix=generators_prefix):
try:
logging.info(f"--- Running generator: {module_name} ---")
# Import the generator module
generator_module = importlib.import_module(module_name)
# Check if the module has the required 'generate' function
if not hasattr(generator_module, 'generate'):
logging.warning(f"Generator module {module_name} does not have a 'generate' function. Skipping.")
continue
# Call the generator function, passing in the preprocessed data
generator_func = getattr(generator_module, 'generate')
temp_output_path = generator_func(processed_df)
# If the generator returned a path, move the file to the dist directory
if temp_output_path and isinstance(temp_output_path, Path) and temp_output_path.exists():
# Sanitize the module name to create a valid filename
base_filename = module_name.split('.')[-1]
# Keep the original extension from the temp file
final_filename = base_filename + temp_output_path.suffix
final_output_path = DIST_DIR / final_filename
shutil.move(temp_output_path, final_output_path)
logging.info(f"Successfully generated '{final_output_path.name}'")
generated_files_count += 1
else:
logging.warning(f"Generator {module_name} did not return a valid output file path. Nothing was saved.")
except Exception as e:
logging.error(f"Failed to run generator {module_name}. Error: {e}", exc_info=True)
# Continue to the next generator
logging.info(f"--- Output generation complete. Total files generated: {generated_files_count} ---")