import logging import sys # Since this file is inside the 'analysis' package, we use relative imports # to access the other modules within the same package. from . import data from . import preprocess from . import generate # Configure logging for the entire application. # This setup will apply to loggers in data, preprocess, and generate modules as well. logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stdout ) def main(): """ The main entry point for the entire analysis pipeline. This function orchestrates the three main stages of the analysis: 1. Data Setup: Downloads and prepares the necessary raw data and database. 2. Preprocessing: Cleans, enriches, and transforms the raw data into an analysis-ready DataFrame. 3. Output Generation: Runs all registered generators to produce figures, tables, and other outputs, saving them to the 'dist/' directory. """ logger = logging.getLogger(__name__) logger.info("=================================================") logger.info(" STARTING ECONTAI ANALYSIS PIPELINE ") logger.info("=================================================") try: # Stage 1: Set up the data and database logger.info("--- STAGE 1: DATA SETUP ---") data.setup_data_and_database() logger.info("--- DATA SETUP COMPLETE ---") # Stage 2: Run the preprocessing pipeline logger.info("--- STAGE 2: PREPROCESSING ---") processed_dataframe = preprocess.run_preprocessing() logger.info("--- PREPROCESSING COMPLETE ---") # Stage 3: Generate all outputs logger.info("--- STAGE 3: OUTPUT GENERATION ---") generate.create_all_outputs(processed_dataframe) logger.info("--- OUTPUT GENERATION COMPLETE ---") logger.info("=================================================") logger.info(" ANALYSIS PIPELINE COMPLETED SUCCESSFULLY ") logger.info("=================================================") except Exception as e: logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True) # Exit with a non-zero status code to indicate failure, which is useful for automation. sys.exit(1) # This allows the script to be run from the command line using `python -m analysis.main`. # The `-m` flag is important because it adds the parent directory to the Python path, # allowing the relative imports (e.g., `from . import data`) to work correctly. if __name__ == '__main__': main()