sprint-econtai/analysis/main.py

import logging
import sys

# Since this file is inside the 'analysis' package, we use relative imports
# to access the other modules within the same package.
from . import data
from . import preprocess
from . import generate

# Configure logging for the entire application.
# This setup will apply to loggers in data, preprocess, and generate modules as well.
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    stream=sys.stdout
)

def main():
    """
    The main entry point for the entire analysis pipeline.

    This function orchestrates the three main stages of the analysis:
    1. Data Setup: Downloads and prepares the necessary raw data and database.
    2. Preprocessing: Cleans, enriches, and transforms the raw data into an
       analysis-ready DataFrame.
    3. Output Generation: Runs all registered generators to produce figures,
       tables, and other outputs, saving them to the 'dist/' directory.
    """
    logger = logging.getLogger(__name__)
    logger.info("=================================================")
    logger.info("  STARTING ECONTAI ANALYSIS PIPELINE  ")
    logger.info("=================================================")

    try:
        # Stage 1: Set up the data and database
        logger.info("--- STAGE 1: DATA SETUP ---")
        data.setup_data_and_database()
        logger.info("--- DATA SETUP COMPLETE ---")

        # Stage 2: Run the preprocessing pipeline
        logger.info("--- STAGE 2: PREPROCESSING ---")
        processed_dataframe = preprocess.run_preprocessing()
        logger.info("--- PREPROCESSING COMPLETE ---")

        # Stage 3: Generate all outputs
        logger.info("--- STAGE 3: OUTPUT GENERATION ---")
        generate.create_all_outputs(processed_dataframe)
        logger.info("--- OUTPUT GENERATION COMPLETE ---")

        logger.info("=================================================")
        logger.info("  ANALYSIS PIPELINE COMPLETED SUCCESSFULLY  ")
        logger.info("=================================================")

    except Exception as e:
        logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True)
        # Exit with a non-zero status code to indicate failure, which is useful for automation.
        sys.exit(1)


# This allows the script to be run from the command line using `python -m analysis.main`.
# The `-m` flag is important because it adds the parent directory to the Python path,
# allowing the relative imports (e.g., `from . import data`) to work correctly.
if __name__ == '__main__':
    main()