28 lines
960 B
Python
28 lines
960 B
Python
"""
|
|
This module defines the Metadata model for the pipeline.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from pydantic import BaseModel, Field
|
|
from typing import Dict, Any
|
|
|
|
class Metadata(BaseModel):
|
|
"""
|
|
A Pydantic model for storing pipeline metadata.
|
|
|
|
This class is intended to be instantiated once and passed through the
|
|
pipeline. Each step in the pipeline can then add its own metadata.
|
|
This provides a centralized and structured way to track data provenance,
|
|
versions, and other important information.
|
|
"""
|
|
fetchers: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
|
|
|
|
ts: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
commit: str = Field(default_factory=lambda: _get_current_commit())
|
|
|
|
|
|
def _get_current_commit() -> str:
|
|
"""
|
|
Returns the current git commit hash, "unknown", or "errored" depending on why the commit could not be retrieved.
|
|
"""
|
|
raise NotImplementedError
|