64 lines
2.6 KiB
Python
64 lines
2.6 KiB
Python
import logging
|
|
import sys
|
|
|
|
# Since this file is inside the 'analysis' package, we use relative imports
|
|
# to access the other modules within the same package.
|
|
from . import data
|
|
from . import preprocess
|
|
from . import generate
|
|
|
|
# Configure logging for the entire application.
|
|
# This setup will apply to loggers in data, preprocess, and generate modules as well.
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
stream=sys.stdout
|
|
)
|
|
|
|
def main():
|
|
"""
|
|
The main entry point for the entire analysis pipeline.
|
|
|
|
This function orchestrates the three main stages of the analysis:
|
|
1. Data Setup: Downloads and prepares the necessary raw data and database.
|
|
2. Preprocessing: Cleans, enriches, and transforms the raw data into an
|
|
analysis-ready DataFrame.
|
|
3. Output Generation: Runs all registered generators to produce figures,
|
|
tables, and other outputs, saving them to the 'dist/' directory.
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("=================================================")
|
|
logger.info(" STARTING ECONTAI ANALYSIS PIPELINE ")
|
|
logger.info("=================================================")
|
|
|
|
try:
|
|
# Stage 1: Set up the data and database
|
|
logger.info("--- STAGE 1: DATA SETUP ---")
|
|
data.setup_data_and_database()
|
|
logger.info("--- DATA SETUP COMPLETE ---")
|
|
|
|
# Stage 2: Run the preprocessing pipeline
|
|
logger.info("--- STAGE 2: PREPROCESSING ---")
|
|
processed_dataframe = preprocess.run_preprocessing()
|
|
logger.info("--- PREPROCESSING COMPLETE ---")
|
|
|
|
# Stage 3: Generate all outputs
|
|
logger.info("--- STAGE 3: OUTPUT GENERATION ---")
|
|
generate.create_all_outputs(processed_dataframe)
|
|
logger.info("--- OUTPUT GENERATION COMPLETE ---")
|
|
|
|
logger.info("=================================================")
|
|
logger.info(" ANALYSIS PIPELINE COMPLETED SUCCESSFULLY ")
|
|
logger.info("=================================================")
|
|
|
|
except Exception as e:
|
|
logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True)
|
|
# Exit with a non-zero status code to indicate failure, which is useful for automation.
|
|
sys.exit(1)
|
|
|
|
|
|
# This allows the script to be run from the command line using `python -m analysis.main`.
|
|
# The `-m` flag is important because it adds the parent directory to the Python path,
|
|
# allowing the relative imports (e.g., `from . import data`) to work correctly.
|
|
if __name__ == '__main__':
|
|
main()
|