sprint-econtai/analysis/main.py
Félix Dorn 43076bcbb1 old
2025-07-15 00:41:05 +02:00

64 lines
2.6 KiB
Python

import logging
import sys
# Since this file is inside the 'analysis' package, we use relative imports
# to access the other modules within the same package.
from . import data
from . import preprocess
from . import generate
# Configure logging for the entire application.
# This setup will apply to loggers in data, preprocess, and generate modules as well.
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
stream=sys.stdout
)
def main():
"""
The main entry point for the entire analysis pipeline.
This function orchestrates the three main stages of the analysis:
1. Data Setup: Downloads and prepares the necessary raw data and database.
2. Preprocessing: Cleans, enriches, and transforms the raw data into an
analysis-ready DataFrame.
3. Output Generation: Runs all registered generators to produce figures,
tables, and other outputs, saving them to the 'dist/' directory.
"""
logger = logging.getLogger(__name__)
logger.info("=================================================")
logger.info(" STARTING ECONTAI ANALYSIS PIPELINE ")
logger.info("=================================================")
try:
# Stage 1: Set up the data and database
logger.info("--- STAGE 1: DATA SETUP ---")
data.setup_data_and_database()
logger.info("--- DATA SETUP COMPLETE ---")
# Stage 2: Run the preprocessing pipeline
logger.info("--- STAGE 2: PREPROCESSING ---")
processed_dataframe = preprocess.run_preprocessing()
logger.info("--- PREPROCESSING COMPLETE ---")
# Stage 3: Generate all outputs
logger.info("--- STAGE 3: OUTPUT GENERATION ---")
generate.create_all_outputs(processed_dataframe)
logger.info("--- OUTPUT GENERATION COMPLETE ---")
logger.info("=================================================")
logger.info(" ANALYSIS PIPELINE COMPLETED SUCCESSFULLY ")
logger.info("=================================================")
except Exception as e:
logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True)
# Exit with a non-zero status code to indicate failure, which is useful for automation.
sys.exit(1)
# This allows the script to be run from the command line using `python -m analysis.main`.
# The `-m` flag is important because it adds the parent directory to the Python path,
# allowing the relative imports (e.g., `from . import data`) to work correctly.
if __name__ == '__main__':
main()