Source code for coco_pipe.io.config
"""
Configuration Schemas for IO
============================
Pydantic models for verifying dataset configurations.
Classes
-------
TabularConfig
Configuration for tabular data (CSV, Excel).
BIDSConfig
Configuration for BIDS-compliant datasets.
EmbeddingConfig
Configuration for pre-computed embeddings.
DatasetConfig
Union container for any dataset configuration.
Author: Antigravity
Date: 2026-01-16
"""
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, Field
[docs]
class BaseDatasetConfig(BaseModel):
path: Path = Field(..., description="Path to the data source (file or directory).")
subjects: Optional[Union[int, List[Union[str, int]]]] = Field(
None, description="Specific subjects to load (int for top N, list for IDs)."
)
[docs]
class TabularConfig(BaseDatasetConfig):
"""Configuration for TabularDataset."""
mode: Literal["tabular"] = "tabular"
target_col: Optional[str] = Field(None, description="Column to use as target `y`.")
index_col: Optional[Union[str, int]] = Field(
None, description="Column to use as index."
)
sep: str = Field("\t", description="Separator for text files.")
header: Optional[Union[int, List[int]]] = 0
sheet_name: Union[str, int] = 0
columns_to_dims: Optional[List[str]] = Field(
None, description="Reshape columns into dimensions."
)
col_sep: str = "_"
meta_columns: Optional[List[str]] = Field(
None, description="Columns to extract as metadata."
)
clean: bool = False
clean_kwargs: Dict[str, Any] = Field(default_factory=dict)
select_kwargs: Dict[str, Any] = Field(
default_factory=dict, description="Arguments for feature selection."
)
[docs]
class BIDSConfig(BaseDatasetConfig):
"""Configuration for BIDSDataset."""
mode: Literal["bids"] = "bids"
task: Optional[str] = None
session: Optional[Union[str, List[str]]] = None
datatype: str = "eeg"
suffix: Optional[str] = None
loading_mode: str = Field(
"epochs",
description="Loading strategy: 'epochs', 'continuous', 'load_existing'.",
)
window_length: Optional[float] = None
stride: Optional[float] = None
[docs]
class EmbeddingConfig(BaseDatasetConfig):
"""Configuration for EmbeddingDataset."""
mode: Literal["embedding"] = "embedding"
pattern: str = "*.pkl"
dims: Tuple[str, ...] = ("obs", "feature")
coords: Optional[Dict[str, Union[List, Any]]] = None
task: Optional[str] = None
run: Optional[str] = None
processing: Optional[str] = None
[docs]
class DatasetConfig(BaseModel):
"""Master configuration container for IO."""
dataset: Union[TabularConfig, BIDSConfig, EmbeddingConfig] = Field(
..., discriminator="mode"
)