.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/demo_pipeline.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_demo_pipeline.py: End-to-End Pipeline Demo ======================== Demonstrates the full CoCo pipeline: 1. Load (Tabular) 2. Preprocess (StandardScaler) 3. Reduce (PCA & UMAP) 4. Report (Comparison) Usage: python examples/demo_pipeline.py .. GENERATED FROM PYTHON SOURCE LINES 15-132 .. code-block:: Python import logging from pathlib import Path import numpy as np import pandas as pd from coco_pipe.dim_reduction import DimReduction from coco_pipe.io.dataset import TabularDataset from coco_pipe.report import from_reductions # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def _sample_metadata(container) -> dict[str, np.ndarray]: """Extract observation-aligned metadata columns from a DataContainer.""" n_samples = np.asarray(container.X).shape[0] metadata = {} for key, value in (getattr(container, "coords", {}) or {}).items(): value = np.asarray(value) if key == "obs": continue if value.ndim >= 1 and value.shape[0] == n_samples: metadata[key] = value return metadata def create_synthetic_csv(path: Path): """Create a dummy CSV file for the demo.""" logger.info(f"Creating synthetic dataset at {path}...") # 3 Clusters c1 = np.random.randn(50, 10) + 5 c2 = np.random.randn(50, 10) - 5 c3 = np.random.randn(100, 10) X = np.vstack([c1, c2, c3]) labels = ["A"] * 50 + ["B"] * 50 + ["C"] * 100 df = pd.DataFrame(X, columns=[f"feat_{i}" for i in range(10)]) df["label"] = labels # Add extra metadata for interactive labeling demo df["center"] = np.random.choice(["Site A", "Site B"], size=len(df)) df["batch"] = np.random.choice(["Batch 1", "Batch 2"], size=len(df)) df.to_csv(path, index=False) def main(): # Setup data_path = Path("examples/outputs/dummy_data.csv") data_path.parent.mkdir(parents=True, exist_ok=True) create_synthetic_csv(data_path) # 1. Load Data logger.info("1. Loading Data...") ds = TabularDataset( data_path, target_col="label", sep=",", meta_columns=["center", "batch"] ) container = ds.load() # 2. Preprocessing (simulate via manual scaling) logger.info("2. Preprocessing...") # Ensure X is numeric if isinstance(container.X, pd.DataFrame): container.X = container.X.select_dtypes(include=[np.number]) elif hasattr(container.X, "dtype") and container.X.dtype == object: container.X = ( pd.DataFrame(container.X).select_dtypes(include=[np.number]).values ) container.X = (container.X - container.X.mean(axis=0)) / container.X.std(axis=0) # 3. Dimensionality Reduction logger.info("3. Running Reductions...") # PCA pca = DimReduction(method="PCA", n_components=2) pca_emb = pca.fit_transform(container.X) # UMAP umap = DimReduction(method="UMAP", n_components=2, n_neighbors=15) umap_emb = umap.fit_transform(container.X) # 4. Generate Comparative Report logger.info("4. Generating Report...") # Calculate dummy metrics for demonstration metrics_data = { "Trustworthiness": [0.95, 0.88], "Continuity": [0.90, 0.92], "Shepard Goodness": [0.85, 0.91], } metrics_df = pd.DataFrame(metrics_data, index=["PCA", "UMAP"]) report = from_reductions( reductions=[pca, umap], container=container, embeddings=[pca_emb, umap_emb], labels=container.y, metadata=_sample_metadata(container), title="Pipeline Demo: PCA vs UMAP", config={"pipeline": "Full Demo", "scaling": "StandardScaler"}, ) # Add comparison section report.add_comparison(metrics_df, name="Method Comparison") output_path = Path("examples/outputs/demo_pipeline.html") report.save(output_path) logger.info(f"Report saved to {output_path}") if __name__ == "__main__": main() .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 0.416 seconds) .. _sphx_glr_download_auto_examples_demo_pipeline.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: demo_pipeline.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: demo_pipeline.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: demo_pipeline.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_