old

2025-07-15 00:41:05 +02:00 · 2025-07-15 00:41:05 +02:00 · 43076bcbb1
commit 43076bcbb1
parent 720f21a85b
42 changed files with 237415 additions and 7831 deletions
--- a/analysis/main.py
+++ b/analysis/main.py
@ -0,0 +1,64 @@
+import logging
+import sys
+
+# Since this file is inside the 'analysis' package, we use relative imports
+# to access the other modules within the same package.
+from . import data
+from . import preprocess
+from . import generate
+
+# Configure logging for the entire application.
+# This setup will apply to loggers in data, preprocess, and generate modules as well.
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    stream=sys.stdout
+)
+
+def main():
+    """
+    The main entry point for the entire analysis pipeline.
+
+    This function orchestrates the three main stages of the analysis:
+    1. Data Setup: Downloads and prepares the necessary raw data and database.
+    2. Preprocessing: Cleans, enriches, and transforms the raw data into an
+       analysis-ready DataFrame.
+    3. Output Generation: Runs all registered generators to produce figures,
+       tables, and other outputs, saving them to the 'dist/' directory.
+    """
+    logger = logging.getLogger(__name__)
+    logger.info("=================================================")
+    logger.info("  STARTING ECONTAI ANALYSIS PIPELINE  ")
+    logger.info("=================================================")
+
+    try:
+        # Stage 1: Set up the data and database
+        logger.info("--- STAGE 1: DATA SETUP ---")
+        data.setup_data_and_database()
+        logger.info("--- DATA SETUP COMPLETE ---")
+
+        # Stage 2: Run the preprocessing pipeline
+        logger.info("--- STAGE 2: PREPROCESSING ---")
+        processed_dataframe = preprocess.run_preprocessing()
+        logger.info("--- PREPROCESSING COMPLETE ---")
+
+        # Stage 3: Generate all outputs
+        logger.info("--- STAGE 3: OUTPUT GENERATION ---")
+        generate.create_all_outputs(processed_dataframe)
+        logger.info("--- OUTPUT GENERATION COMPLETE ---")
+
+        logger.info("=================================================")
+        logger.info("  ANALYSIS PIPELINE COMPLETED SUCCESSFULLY  ")
+        logger.info("=================================================")
+
+    except Exception as e:
+        logger.critical("An unrecoverable error occurred during the pipeline execution.", exc_info=True)
+        # Exit with a non-zero status code to indicate failure, which is useful for automation.
+        sys.exit(1)
+
+
+# This allows the script to be run from the command line using `python -m analysis.main`.
+# The `-m` flag is important because it adds the parent directory to the Python path,
+# allowing the relative imports (e.g., `from . import data`) to work correctly.
+if __name__ == '__main__':
+    main()