Source code for coco_pipe.dim_reduction.reducers.base

"""
Base interfaces for dimensionality reduction backends.

This module defines the reducer contract shared by built-in reducers and
user-defined reducers. A reducer is any object derived from `BaseReducer`
implementing `fit` and `transform`, optionally exposing diagnostics and scalar
quality metadata through helper methods.

The surrounding dim-reduction stack uses these interfaces to provide:

- input validation through the reducer `capabilities` mapping
- standardized persistence with `save` and `load`
- reducer-aware reporting and visualization hooks
- optional dependency loading through `coco_pipe.utils.import_optional_dependency`

Notes
-----
`BaseReducer` is the intended extension point for custom reducers. Third-party
reducers can participate in `DimReduction` workflows without extra wrappers as
long as they respect the method contract documented here.
"""

import os
from abc import ABC, abstractmethod
from typing import (
    Any,
    Dict,
    Iterable,
    Optional,
    Union,
)

import joblib
import numpy as np

# Type alias for array-like objects
ArrayLike = Union[np.ndarray, list]

__all__ = ["ArrayLike", "BaseReducer"]



[docs]
class BaseReducer(ABC):
    """
    Abstract base class for all dimensionality reduction implementations.

    This class defines the standard interface that all reducers must implement
    and is safe to subclass for custom reducers. It provides built-in support
    for model persistence (save/load) using joblib.

    For custom reducers operating on nonstandard data layouts, override
    `capabilities` so the manager layer can route validation, scoring,
    plotting, and reporting correctly.

    Parameters
    ----------
    n_components : int, default=2
        Target dimensionality of the reduced representation.
    **kwargs : dict
        Additional keyword arguments stored on `params` and typically forwarded
        to the wrapped estimator or backend implementation.

    Attributes
    ----------
    n_components : int
        Target dimensionality of the reduced representation.
    params : dict
        Additional reducer parameters captured at initialization time.
    model : Any
        Underlying fitted model object, such as a scikit-learn estimator or a
        scientific computing backend. This attribute should be populated by
        `fit`.

    Notes
    -----
    The `capabilities` property returns a plain dictionary consumed by the
    manager and evaluation layers. Custom reducers should declare supported
    diagnostics and scalar metadata explicitly through this mapping. Common
    keys include:

    - `input_ndim` : expected dimensionality of the input container
    - `input_layout` : semantic layout name such as `"standard"`
    - `has_transform` : whether `transform` is supported
    - `has_inverse_transform` : whether inverse transforms are available
    - `has_components` : whether PCA-like components are exposed
    - `supported_diagnostics` : names returned by `get_diagnostics`
    - `has_native_plot` : whether the reducer exposes its own plotting path
    - `is_linear` : whether the reducer is linear
    - `is_stochastic` : whether repeated runs can vary without a fixed seed

    Examples
    --------
    >>> from sklearn.decomposition import PCA
    >>> from coco_pipe.dim_reduction import BaseReducer
    >>>
    >>> class CustomPCAReducer(BaseReducer):
    ...     @property
    ...     def capabilities(self):
    ...         return self._merge_capabilities(
    ...             super().capabilities,
    ...             is_linear=True,
    ...             has_components=True,
    ...             supported_diagnostics=("explained_variance_ratio_",),
    ...         )
    ...
    ...     def fit(self, X, y=None):
    ...         self.model = PCA(n_components=self.n_components, **self.params)
    ...         self.model.fit(X)
    ...         return self
    ...
    ...     def transform(self, X):
    ...         return self.model.transform(X)
    """

    def __init__(self, n_components: int = 2, **kwargs):
        """
        Initialize the reducer.

        Parameters
        ----------
        n_components : int, default=2
            The target number of dimensions.
        **kwargs : dict
            Additional keyword arguments for the underlying model.
        """
        self.n_components = n_components
        self.params = kwargs
        self.model = None
        self.context_: Dict[str, Any] = {}

    @property
    def name(self) -> str:
        """Return a stable public display name for the reducer."""
        return type(self).__name__


[docs]
    def _filter_params(self, fn_or_class: Any, params: dict) -> dict:
        """
        Filter parameters to match the signature of a function or class.

        Parameters
        ----------
        fn_or_class : Any
            The function or class to inspect.
        params : dict
            The parameters to filter.

        Returns
        -------
        filtered_params : dict
            Parameters present in the signature. If the target accepts
            ``**kwargs`` or its signature cannot be inspected, the original
            parameter dictionary is returned unchanged.

        Notes
        -----
        This is a convenience helper for reducer implementations that wrap
        third-party estimators with partially overlapping constructor
        signatures.
        """
        import inspect

        try:
            if inspect.isclass(fn_or_class):
                target = fn_or_class.__init__
            else:
                target = fn_or_class

            sig = inspect.signature(target)
            allowed_params = sig.parameters.keys()

            # If the target accepts **kwargs, don't filter
            if any(
                p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
            ):
                return params

            return {k: v for k, v in params.items() if k in allowed_params}
        except (ValueError, TypeError):
            # Fallback if signature extraction fails (e.g. C extensions)
            return params



[docs]
    def _build_estimator(
        self,
        estimator_cls: Any,
        params: Optional[dict] = None,
        component_param: Optional[str] = "n_components",
        **fixed_kwargs: Any,
    ) -> Any:
        """
        Instantiate an estimator with filtered reducer parameters.

        Parameters
        ----------
        estimator_cls : Any
            Estimator class to instantiate.
        params : dict, optional
            Explicit parameter dictionary to filter instead of `self.params`.
        component_param : str or None, default=\"n_components\"
            Name of the constructor argument receiving `self.n_components`.
            Set to ``None`` to skip injecting the component count.
        **fixed_kwargs : dict
            Keyword arguments always forwarded to the estimator constructor.

        Returns
        -------
        Any
            Instantiated estimator.

        Notes
        -----
        This helper assumes the wrapped backend is constructor-driven and can
        be configured from keyword arguments.
        """
        raw_params = self.params if params is None else params
        filtered_params = self._filter_params(estimator_cls, raw_params)
        constructor_kwargs = dict(fixed_kwargs)
        if component_param is not None:
            constructor_kwargs[component_param] = self.n_components
        return estimator_cls(**constructor_kwargs, **filtered_params)



[docs]
    def _require_fitted(self, method_name: str = "transform", model: Any = None) -> Any:
        """
        Validate that a reducer backend has been fitted before access.

        Parameters
        ----------
        method_name : str, default=\"transform\"
            Operation requiring a fitted model.
        model : Any, optional
            Backend model to check. Defaults to `self.model`.

        Returns
        -------
        Any
            The validated model instance.

        Raises
        ------
        RuntimeError
            If no fitted model is available.
        """
        resolved_model = self.model if model is None else model
        if resolved_model is None:
            raise RuntimeError(
                f"{type(self).__name__} must be fitted before calling {method_name}()."
            )
        return resolved_model



[docs]
    def _merge_capabilities(
        self, base_caps: Dict[str, Any], **overrides: Any
    ) -> Dict[str, Any]:
        """
        Return a capability mapping updated with reducer-specific overrides.

        Parameters
        ----------
        base_caps : dict
            Base capability mapping, typically `super().capabilities`.
        **overrides : dict
            Reducer-specific capability values to apply.

        Returns
        -------
        dict
            Capability mapping with overrides applied.
        """
        caps = dict(base_caps)
        caps.update(overrides)
        for key in ("supported_diagnostics", "supported_metadata"):
            if key in caps:
                caps[key] = list(caps[key])
        return caps



[docs]
    @abstractmethod
    def fit(self, X: ArrayLike, y: Optional[ArrayLike] = None) -> "BaseReducer":
        """
        Fit the model to the data.

        Parameters
        ----------
        X : ArrayLike
            Training data. Most reducers expect `(n_samples, n_features)`, but
            reducers with custom `capabilities["input_layout"]` may accept other
            layouts such as snapshot matrices or grouped trajectory tensors.
        y : ArrayLike, optional
            Optional supervision aligned with the sample axis used by the
            reducer's declared input layout.

        Returns
        -------
        self : BaseReducer
            The fitted reducer instance.

        Notes
        -----
        Most reducers expect `X` to have shape `(n_samples, n_features)`. Some
        reducers operate on alternative layouts and should document those
        layouts through `capabilities`.
        """
        pass



[docs]
    @abstractmethod
    def transform(self, X: ArrayLike) -> np.ndarray:
        """
        Apply dimensionality reduction to X.

        Parameters
        ----------
        X : ArrayLike
            New data to transform. Its layout should match the reducer's
            declared `capabilities`.

        Returns
        -------
        X_new : np.ndarray
            Reduced representation. The exact output shape depends on the
            reducer, but the last dimension usually matches `n_components`.

        Raises
        ------
        RuntimeError
            Raised by concrete implementations when `transform` is called
            before fitting or when the reducer does not support out-of-sample
            transforms.
        """
        pass



[docs]
    def fit_transform(self, X: ArrayLike, y: Optional[ArrayLike] = None) -> np.ndarray:
        """
        Fit the model to data and return the transformed data.

        This method usually calls `fit` and then `transform`, but reducers may
        override it for efficiency if the underlying algorithm supports a
        native combined path.

        Parameters
        ----------
        X : ArrayLike
            Training data following the reducer's declared layout.
        y : ArrayLike, optional
            Optional supervision aligned with the reducer's input layout.

        Returns
        -------
        X_new : np.ndarray
            Reduced representation returned by `transform`.
        """
        self.fit(X, y=y)
        return self.transform(X)



[docs]
    def save(self, filepath: Union[str, os.PathLike]) -> None:
        """
        Persist the reducer to a file.

        The default implementation serializes the reducer instance with joblib.
        Custom reducers should either remain joblib-serializable or override
        this method and `load()` with a custom persistence strategy.

        Parameters
        ----------
        filepath : str or Path
            Path to the output file.

        Notes
        -----
        The default implementation serializes the reducer instance with
        `joblib.dump`. Custom reducers should either remain joblib-serializable
        or override this method and `load` with a custom persistence strategy.
        """
        filepath = str(filepath)
        out_dir = os.path.dirname(filepath)
        if out_dir:
            os.makedirs(out_dir, exist_ok=True)
        joblib.dump(self, filepath)


    @property
    def capabilities(self) -> Dict[str, Any]:
        """
        Return reducer capability flags consumed by the manager layer.

        Custom reducers with nonstandard inputs should override at least
        `input_ndim` and `input_layout`. Reducers exposing diagnostics or
        scalar quality metadata should declare them explicitly through
        `supported_diagnostics` and `supported_metadata`.

        Returns
        -------
        dict
            Mapping of reducer capability flags.

        Notes
        -----
        The default capabilities describe a typical estimator consuming
        `(samples, features)` input and exposing `transform`.
        """
        return {
            "input_ndim": 2,
            "input_layout": "standard",
            "has_transform": True,
            "has_inverse_transform": hasattr(self.model, "inverse_transform")
            if self.model
            else False,
            "has_components": hasattr(self.model, "components_")
            if self.model
            else False,
            "supported_diagnostics": [],
            "supported_metadata": [],
            "has_native_plot": False,
            "is_linear": False,
            "is_stochastic": False,
        }


[docs]
    def _attribute_dict(self, obj: Any, attrs: Iterable[str]) -> Dict[str, Any]:
        """
        Extract requested attributes from a target object into a dictionary.

        This helper filters missing attributes and swallows common access
        errors (such as deferred scikit-learn properties) to return only what
        is currently available on the target.

        Parameters
        ----------
        obj : Any
            Target object to inspect.
        attrs : iterable of str
            Attribute names to attempt to extract.

        Returns
        -------
        dict
            Mapping of available attribute names to their values.
        """
        if obj is None:
            return {}

        out = {}
        for attr in attrs:
            try:
                out[attr] = getattr(obj, attr)
            except (AttributeError, RuntimeError, ValueError):
                continue
        return out



[docs]
    def get_diagnostics(self) -> Dict[str, Any]:
        """
        Return diagnostic arrays or structured artifacts.

        Diagnostics are intended for non-scalar outputs such as explained
        variance curves, eigenvalues, modes, graphs, or training histories.
        Only names declared in `capabilities["supported_diagnostics"]` are
        queried.

        Returns
        -------
        diagnostics : dict
            Dictionary of diagnostic attributes declared in
            `capabilities["supported_diagnostics"]`.

        Raises
        ------
        RuntimeError
            If the reducer has not been fitted.
        """
        self._require_fitted()
        attrs = self.capabilities.get("supported_diagnostics", [])
        diag = self._attribute_dict(self.model, attrs)
        diag.update(self._attribute_dict(self, attrs))
        return diag



[docs]
    def get_quality_metadata(self) -> Dict[str, Any]:
        """
        Return scalar metadata about the reduction process or quality.

        Typical examples include iteration counts, optimization stress, final
        loss values, or backend-specific convergence flags. Only names
        declared in `capabilities["supported_metadata"]` are queried.

        Returns
        -------
        metadata : dict
            Dictionary containing only scalar values corresponding to keys
            declared in `capabilities["supported_metadata"]`.

        Raises
        ------
        RuntimeError
            If the reducer has not been fitted.
        """
        self._require_fitted()
        attrs = self.capabilities.get("supported_metadata", [])
        meta = self._attribute_dict(self.model, attrs)
        meta.update(self._attribute_dict(self, attrs))
        return meta



[docs]
    def get_components(self) -> np.ndarray:
        """
        Return reducer-defined component-like outputs.

        Returns
        -------
        np.ndarray
            Reducer-defined component array.

        Raises
        ------
        ValueError
            If the reducer does not expose public components.
        """
        raise ValueError(
            f"{type(self).__name__} does not expose public get_components()."
        )



[docs]
    @classmethod
    def load(cls, filepath: Union[str, os.PathLike]) -> "BaseReducer":
        """
        Load a reducer from a file.

        Parameters
        ----------
        filepath : str or Path
            Path to the file to load.

        Returns
        -------
        reducer : BaseReducer
            The loaded reducer instance.

        Notes
        -----
        This method assumes the reducer was serialized with `save` or a
        compatible `joblib.dump` call.
        """
        return joblib.load(str(filepath))