""" This module defines the Metadata model for the pipeline. """ from datetime import datetime from pydantic import BaseModel, Field from typing import Dict, Any class Metadata(BaseModel): """ A Pydantic model for storing pipeline metadata. This class is intended to be instantiated once and passed through the pipeline. Each step in the pipeline can then add its own metadata. This provides a centralized and structured way to track data provenance, versions, and other important information. """ fetchers: Dict[str, Dict[str, Any]] = Field(default_factory=dict) ts: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S")) commit: str = Field(default_factory=lambda: _get_current_commit()) def _get_current_commit() -> str: """ Returns the current git commit hash, "unknown", or "errored" depending on why the commit could not be retrieved. """ raise NotImplementedError