import importlib import logging import pkgutil import shutil from pathlib import Path # The final destination for all generated outputs DIST_DIR = Path("dist") # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def create_all_outputs(processed_df): """ Dynamically discovers, imports, and runs all output generators. This function iterates through all modules in the 'analysis.generators' package. For each module, it assumes there is a 'generate(data)' function, which it calls with the provided preprocessed DataFrame. The generator function is expected to save its output to a temporary file and return the path to that file. This function then moves the output to the 'dist/' directory. Args: processed_df (pd.DataFrame): The fully preprocessed data to be used by the generator functions. """ logging.info("Starting output generation...") DIST_DIR.mkdir(exist_ok=True) logging.info(f"Output directory is '{DIST_DIR.resolve()}'") # Path to the generators package from . import generators as generators_package generators_path = generators_package.__path__ generators_prefix = generators_package.__name__ + "." generated_files_count = 0 # Discover and run all modules in the generators package for _, module_name, _ in pkgutil.iter_modules(generators_path, prefix=generators_prefix): try: logging.info(f"--- Running generator: {module_name} ---") # Import the generator module generator_module = importlib.import_module(module_name) # Check if the module has the required 'generate' function if not hasattr(generator_module, 'generate'): logging.warning(f"Generator module {module_name} does not have a 'generate' function. Skipping.") continue # Call the generator function, passing in the preprocessed data generator_func = getattr(generator_module, 'generate') temp_output_path = generator_func(processed_df) # If the generator returned a path, move the file to the dist directory if temp_output_path and isinstance(temp_output_path, Path) and temp_output_path.exists(): # Sanitize the module name to create a valid filename base_filename = module_name.split('.')[-1] # Keep the original extension from the temp file final_filename = base_filename + temp_output_path.suffix final_output_path = DIST_DIR / final_filename shutil.move(temp_output_path, final_output_path) logging.info(f"Successfully generated '{final_output_path.name}'") generated_files_count += 1 else: logging.warning(f"Generator {module_name} did not return a valid output file path. Nothing was saved.") except Exception as e: logging.error(f"Failed to run generator {module_name}. Error: {e}", exc_info=True) # Continue to the next generator logging.info(f"--- Output generation complete. Total files generated: {generated_files_count} ---")