This commit is contained in:
Félix Dorn 2025-07-03 16:26:30 +02:00
commit 2da206d368
17 changed files with 955 additions and 0 deletions

28
pipeline/metadata.py Normal file
View file

@ -0,0 +1,28 @@
"""
This module defines the Metadata model for the pipeline.
"""
from datetime import datetime
from pydantic import BaseModel, Field
from typing import Dict, Any
class Metadata(BaseModel):
"""
A Pydantic model for storing pipeline metadata.
This class is intended to be instantiated once and passed through the
pipeline. Each step in the pipeline can then add its own metadata.
This provides a centralized and structured way to track data provenance,
versions, and other important information.
"""
fetchers: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
ts: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
commit: str = Field(default_factory=lambda: _get_current_commit())
def _get_current_commit() -> str:
"""
Returns the current git commit hash, "unknown", or "errored" depending on why the commit could not be retrieved.
"""
raise NotImplementedError