41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
"""
|
|
This module defines the Metadata model for the pipeline.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from pydantic import BaseModel, Field
|
|
from typing import Dict, Any
|
|
|
|
class Metadata(BaseModel):
|
|
"""
|
|
A Pydantic model for storing pipeline metadata.
|
|
|
|
This class is intended to be instantiated once and passed through the
|
|
pipeline. Each step in the pipeline can then add its own metadata.
|
|
This provides a centralized and structured way to track data provenance,
|
|
versions, and other important information.
|
|
"""
|
|
fetchers: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
|
|
enrichments: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
|
|
|
|
ts: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
commit: str = Field(default_factory=lambda: _get_current_commit())
|
|
|
|
|
|
def _get_current_commit() -> str:
|
|
"""
|
|
Returns the current git commit hash, "unknown", or "errored" depending on why the commit could not be retrieved.
|
|
"""
|
|
import subprocess
|
|
try:
|
|
# Get the current commit hash
|
|
commit_hash = subprocess.check_output(
|
|
["git", "rev-parse", "HEAD"], stderr=subprocess.PIPE, text=True
|
|
).strip()
|
|
return commit_hash
|
|
except subprocess.CalledProcessError:
|
|
# If git command fails (e.g., not a git repository)
|
|
return "errored"
|
|
except FileNotFoundError:
|
|
# If git is not installed
|
|
return "unknown"
|