Source code for coco_pipe.report.provenance

"""
Provenance Capture
==================

Utilities for capturing reproducibility metadata (Git hash, environment, versions).
"""

import datetime
import importlib.metadata
import platform
import subprocess
import sys
from typing import Any, Dict


[docs] def get_git_revision_hash() -> str: """ Return the current git hash if available. Returns ------- str Short git hash (e.g. "a1b2c3d") or "Unknown". """ try: # We need to run this from the package root ideally, or CWD result = subprocess.run( ["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=1, ) if result.returncode == 0: return result.stdout.strip() except (FileNotFoundError, subprocess.TimeoutExpired): pass return "Unknown"
[docs] def get_package_version(package_name: str) -> str: """ Safely get package version. Parameters ---------- package_name : str Name of the pip package. Returns ------- str Version string or "Unknown". """ try: return importlib.metadata.version(package_name) except importlib.metadata.PackageNotFoundError: return "Unknown"
[docs] def get_environment_info() -> Dict[str, Any]: """ Capture runtime environment information for reproducibility. Returns ------- Dict[str, Any] Dictionary containing timestamp, os, python version, git hash, etc. Examples -------- >>> info = get_environment_info() >>> print(info['git_hash']) 'a1b2c3d' """ info = { "timestamp_utc": datetime.datetime.now(datetime.timezone.utc).strftime( "%Y-%m-%d %H:%M:%S UTC" ), "os_platform": platform.platform(), "python_version": platform.python_version(), "command": " ".join(sys.argv), "git_hash": get_git_revision_hash(), "coco_pipe_version": get_package_version("coco-pipe"), "versions": { "numpy": get_package_version("numpy"), "pandas": get_package_version("pandas"), "scipy": get_package_version("scipy"), "plotly": get_package_version("plotly"), }, } return info