old
This commit is contained in:
parent
720f21a85b
commit
43076bcbb1
42 changed files with 237415 additions and 7831 deletions
64
analysis/main.py
Normal file
64
analysis/main.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
# Since this file is inside the 'analysis' package, we use relative imports
|
||||
# to access the other modules within the same package.
|
||||
from . import data
|
||||
from . import preprocess
|
||||
from . import generate
|
||||
|
||||
# Configure logging for the entire application.
|
||||
# This setup will apply to loggers in data, preprocess, and generate modules as well.
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
|
||||
def main():
|
||||
"""
|
||||
The main entry point for the entire analysis pipeline.
|
||||
|
||||
This function orchestrates the three main stages of the analysis:
|
||||
1. Data Setup: Downloads and prepares the necessary raw data and database.
|
||||
2. Preprocessing: Cleans, enriches, and transforms the raw data into an
|
||||
analysis-ready DataFrame.
|
||||
3. Output Generation: Runs all registered generators to produce figures,
|
||||
tables, and other outputs, saving them to the 'dist/' directory.
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("=================================================")
|
||||
logger.info(" STARTING ECONTAI ANALYSIS PIPELINE ")
|
||||
logger.info("=================================================")
|
||||
|
||||
try:
|
||||
# Stage 1: Set up the data and database
|
||||
logger.info("--- STAGE 1: DATA SETUP ---")
|
||||
data.setup_data_and_database()
|
||||
logger.info("--- DATA SETUP COMPLETE ---")
|
||||
|
||||
# Stage 2: Run the preprocessing pipeline
|
||||
logger.info("--- STAGE 2: PREPROCESSING ---")
|
||||
processed_dataframe = preprocess.run_preprocessing()
|
||||
logger.info("--- PREPROCESSING COMPLETE ---")
|
||||
|
||||
# Stage 3: Generate all outputs
|
||||
logger.info("--- STAGE 3: OUTPUT GENERATION ---")
|
||||
generate.create_all_outputs(processed_dataframe)
|
||||
logger.info("--- OUTPUT GENERATION COMPLETE ---")
|
||||
|
||||
logger.info("=================================================")
|
||||
logger.info(" ANALYSIS PIPELINE COMPLETED SUCCESSFULLY ")
|
||||
logger.info("=================================================")
|
||||
|
||||
except Exception as e:
|
||||
logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True)
|
||||
# Exit with a non-zero status code to indicate failure, which is useful for automation.
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# This allows the script to be run from the command line using `python -m analysis.main`.
|
||||
# The `-m` flag is important because it adds the parent directory to the Python path,
|
||||
# allowing the relative imports (e.g., `from . import data`) to work correctly.
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue