76 lines
3.2 KiB
Python
76 lines
3.2 KiB
Python
import importlib
|
|
import logging
|
|
import pkgutil
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
# The final destination for all generated outputs
|
|
DIST_DIR = Path("dist")
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
def create_all_outputs(processed_df):
|
|
"""
|
|
Dynamically discovers, imports, and runs all output generators.
|
|
|
|
This function iterates through all modules in the 'analysis.generators'
|
|
package. For each module, it assumes there is a 'generate(data)' function,
|
|
which it calls with the provided preprocessed DataFrame.
|
|
|
|
The generator function is expected to save its output to a temporary file
|
|
and return the path to that file. This function then moves the output
|
|
|
|
to the 'dist/' directory.
|
|
|
|
Args:
|
|
processed_df (pd.DataFrame): The fully preprocessed data to be used
|
|
by the generator functions.
|
|
"""
|
|
logging.info("Starting output generation...")
|
|
DIST_DIR.mkdir(exist_ok=True)
|
|
logging.info(f"Output directory is '{DIST_DIR.resolve()}'")
|
|
|
|
# Path to the generators package
|
|
from . import generators as generators_package
|
|
generators_path = generators_package.__path__
|
|
generators_prefix = generators_package.__name__ + "."
|
|
|
|
generated_files_count = 0
|
|
|
|
# Discover and run all modules in the generators package
|
|
for _, module_name, _ in pkgutil.iter_modules(generators_path, prefix=generators_prefix):
|
|
try:
|
|
logging.info(f"--- Running generator: {module_name} ---")
|
|
|
|
# Import the generator module
|
|
generator_module = importlib.import_module(module_name)
|
|
|
|
# Check if the module has the required 'generate' function
|
|
if not hasattr(generator_module, 'generate'):
|
|
logging.warning(f"Generator module {module_name} does not have a 'generate' function. Skipping.")
|
|
continue
|
|
|
|
# Call the generator function, passing in the preprocessed data
|
|
generator_func = getattr(generator_module, 'generate')
|
|
temp_output_path = generator_func(processed_df)
|
|
|
|
# If the generator returned a path, move the file to the dist directory
|
|
if temp_output_path and isinstance(temp_output_path, Path) and temp_output_path.exists():
|
|
# Sanitize the module name to create a valid filename
|
|
base_filename = module_name.split('.')[-1]
|
|
# Keep the original extension from the temp file
|
|
final_filename = base_filename + temp_output_path.suffix
|
|
final_output_path = DIST_DIR / final_filename
|
|
|
|
shutil.move(temp_output_path, final_output_path)
|
|
logging.info(f"Successfully generated '{final_output_path.name}'")
|
|
generated_files_count += 1
|
|
else:
|
|
logging.warning(f"Generator {module_name} did not return a valid output file path. Nothing was saved.")
|
|
|
|
except Exception as e:
|
|
logging.error(f"Failed to run generator {module_name}. Error: {e}", exc_info=True)
|
|
# Continue to the next generator
|
|
|
|
logging.info(f"--- Output generation complete. Total files generated: {generated_files_count} ---")
|