old
This commit is contained in:
parent
720f21a85b
commit
43076bcbb1
42 changed files with 237415 additions and 7831 deletions
76
analysis/generate.py
Normal file
76
analysis/generate.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import importlib
|
||||
import logging
|
||||
import pkgutil
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
# The final destination for all generated outputs
|
||||
DIST_DIR = Path("dist")
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def create_all_outputs(processed_df):
|
||||
"""
|
||||
Dynamically discovers, imports, and runs all output generators.
|
||||
|
||||
This function iterates through all modules in the 'analysis.generators'
|
||||
package. For each module, it assumes there is a 'generate(data)' function,
|
||||
which it calls with the provided preprocessed DataFrame.
|
||||
|
||||
The generator function is expected to save its output to a temporary file
|
||||
and return the path to that file. This function then moves the output
|
||||
|
||||
to the 'dist/' directory.
|
||||
|
||||
Args:
|
||||
processed_df (pd.DataFrame): The fully preprocessed data to be used
|
||||
by the generator functions.
|
||||
"""
|
||||
logging.info("Starting output generation...")
|
||||
DIST_DIR.mkdir(exist_ok=True)
|
||||
logging.info(f"Output directory is '{DIST_DIR.resolve()}'")
|
||||
|
||||
# Path to the generators package
|
||||
from . import generators as generators_package
|
||||
generators_path = generators_package.__path__
|
||||
generators_prefix = generators_package.__name__ + "."
|
||||
|
||||
generated_files_count = 0
|
||||
|
||||
# Discover and run all modules in the generators package
|
||||
for _, module_name, _ in pkgutil.iter_modules(generators_path, prefix=generators_prefix):
|
||||
try:
|
||||
logging.info(f"--- Running generator: {module_name} ---")
|
||||
|
||||
# Import the generator module
|
||||
generator_module = importlib.import_module(module_name)
|
||||
|
||||
# Check if the module has the required 'generate' function
|
||||
if not hasattr(generator_module, 'generate'):
|
||||
logging.warning(f"Generator module {module_name} does not have a 'generate' function. Skipping.")
|
||||
continue
|
||||
|
||||
# Call the generator function, passing in the preprocessed data
|
||||
generator_func = getattr(generator_module, 'generate')
|
||||
temp_output_path = generator_func(processed_df)
|
||||
|
||||
# If the generator returned a path, move the file to the dist directory
|
||||
if temp_output_path and isinstance(temp_output_path, Path) and temp_output_path.exists():
|
||||
# Sanitize the module name to create a valid filename
|
||||
base_filename = module_name.split('.')[-1]
|
||||
# Keep the original extension from the temp file
|
||||
final_filename = base_filename + temp_output_path.suffix
|
||||
final_output_path = DIST_DIR / final_filename
|
||||
|
||||
shutil.move(temp_output_path, final_output_path)
|
||||
logging.info(f"Successfully generated '{final_output_path.name}'")
|
||||
generated_files_count += 1
|
||||
else:
|
||||
logging.warning(f"Generator {module_name} did not return a valid output file path. Nothing was saved.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to run generator {module_name}. Error: {e}", exc_info=True)
|
||||
# Continue to the next generator
|
||||
|
||||
logging.info(f"--- Output generation complete. Total files generated: {generated_files_count} ---")
|
Loading…
Add table
Add a link
Reference in a new issue