diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f500c00..ead7736 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,6 +11,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python-version: "3.10.20" - name: Install scalr requirements run: | pip install -r requirements.txt diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 044cfe2..2a6a8ab 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.9" + python-version: "3.10.20" - name: Install pypa/build run: >- python3 -m diff --git a/.github/workflows/run_isort.yml b/.github/workflows/run_isort.yml index 0d7c7b6..ee4332f 100644 --- a/.github/workflows/run_isort.yml +++ b/.github/workflows/run_isort.yml @@ -10,10 +10,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.10.20 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: "3.10.20" - name: Install isort run: pip install isort - name: Run isort diff --git a/.github/workflows/run_pytest.yml b/.github/workflows/run_pytest.yml index 7474e62..be5af6f 100644 --- a/.github/workflows/run_pytest.yml +++ b/.github/workflows/run_pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10.20"] steps: - uses: actions/checkout@v3 diff --git a/README.md b/README.md index d4e5983..45d992a 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ ## Pre-requisites and installation scaLR -- ScaLR can be installed using git or pip. It is tested in Python 3.10 and it is recommended to use that environment. +- ScaLR can be installed using git or pip. It is tested in Python 3.10.20 and it is recommended to use that environment. ``` -conda create -n scaLR_env python=3.10 +conda create -n scaLR_env python=3.10.20 conda activate scaLR_env ``` @@ -374,5 +374,4 @@ Performs evaluation of best model trained on user-defined metrics on the test se ## Citation -Jogani Saiyam, Anand Santosh Pol, Mayur Prajapati, Amit Samal, Kriti Bhatia, Jayendra Parmar, Urvik Patel, Falak Shah, Nisarg Vyas, and Saurabh Gupta. "scaLR: a low-resource deep neural network-based platform for single cell analysis and biomarker discovery." bioRxiv (2024): 2024-09. - +Jogani, S., Pol, A. S., Prajapati, M., Samal, A., Bhatia, K., Parmar, J., ... & Gupta, S. (2025). scaLR: a low-resource deep neural network-based platform for single cell analysis and biomarker discovery. Briefings in Bioinformatics, 26(3), bbaf243. diff --git a/config/config.yaml b/config/config.yaml index aebd971..f6d8f71 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,7 +1,7 @@ # Config file for pipeline run. # DEVICE SETUP. -device: 'cuda' +device: 'cpu' # EXPERIMENT. experiment: @@ -16,8 +16,7 @@ data: num_workers: 1 train_val_test: - full_datapath: '/path/to/anndata.h5ad' - + full_datapath: 'path/to/adata.h5ad' splitter_config: name: GroupSplitter params: @@ -35,7 +34,7 @@ data: # params: # **args - target: Cell_Type + target: cell_type # FEATURE SELECTION. diff --git a/pyproject.toml b/pyproject.toml index ed979c0..b3dd290 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,14 +6,14 @@ build-backend = "hatchling.build" name = "pyscaLR" version = "1.1.0" -requires-python = ">=3.10" +requires-python = ">=3.10.20" authors = [ { name="Infocusp", email="saurabh@infocusp.com" }, ] description = "scaLR: Single cell analysis using low resource." readme = "README.md" classifiers = [ - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Operating System :: OS Independent", "Intended Audience :: Science/Research" ] diff --git a/requirements.txt b/requirements.txt index e0620e8..c3a2e56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -anndata==0.10.9 +anndata>=0.11.2,<0.12 isort==5.13.2 loky==3.4.1 memory-profiler==0.61.0 @@ -15,4 +15,4 @@ tensorboard==2.17.0 toml==0.10.2 torch==2.4.1 --index-url https://download.pytorch.org/whl/cu118 tqdm==4.66.5 -yapf==0.40.2 +yapf==0.40.2 \ No newline at end of file diff --git a/scalr/analysis/dge_lmem.py b/scalr/analysis/dge_lmem.py index 5462435..9abfe2a 100644 --- a/scalr/analysis/dge_lmem.py +++ b/scalr/analysis/dge_lmem.py @@ -11,7 +11,6 @@ from anndata import AnnData from anndata import ImplicitModificationWarning -import anndata as ad from anndata.experimental import AnnCollection from joblib import delayed from joblib import Parallel diff --git a/scalr/analysis/dge_pseudobulk.py b/scalr/analysis/dge_pseudobulk.py index 1ceaf70..d919727 100644 --- a/scalr/analysis/dge_pseudobulk.py +++ b/scalr/analysis/dge_pseudobulk.py @@ -4,8 +4,7 @@ from os import path from typing import Optional, Tuple, Union -from anndata import AnnData -import anndata as ad +from anndata import AnnData, concat from anndata.experimental import AnnCollection import matplotlib.pyplot as plt import numpy as np @@ -94,7 +93,7 @@ def _make_design_matrix(self, adata: AnnData, cell_type: str): for sum_sample in condition_subset.obs[self.sum_column].unique(): sum_subset = condition_subset[condition_subset.obs[ self.sum_column] == sum_sample] - subdata = ad.AnnData( + subdata = AnnData( X=sum_subset[:].X.sum(axis=0).reshape( 1, len(sum_subset.var_names)), var=DataFrame(index=sum_subset.var_names), @@ -102,7 +101,7 @@ def _make_design_matrix(self, adata: AnnData, cell_type: str): subdata.obs[self.design_factor_no_undrscr] = [condition] design_matrix_list.append(subdata) - design_matrix = ad.concat(design_matrix_list) + design_matrix = concat(design_matrix_list) return design_matrix def get_differential_expression_results(self, design_matrix: AnnData, diff --git a/scalr/nn/dataloader/simple_metadataloader.py b/scalr/nn/dataloader/simple_metadataloader.py index 4a3899b..04cd0e5 100644 --- a/scalr/nn/dataloader/simple_metadataloader.py +++ b/scalr/nn/dataloader/simple_metadataloader.py @@ -71,7 +71,7 @@ def collate_fn( x = torch.cat( (x, torch.as_tensor(self.metadata_onehotencoder[col].transform( - adata_batch.obs[col].values.reshape(-1, 1)).A, + adata_batch.obs[col].values.reshape(-1, 1)).toarray(), dtype=torch.float32)), dim=1) return x, y diff --git a/scalr/nn/dataloader/test_simple_metadataloader.py b/scalr/nn/dataloader/test_simple_metadataloader.py index d4c9023..0e2647f 100644 --- a/scalr/nn/dataloader/test_simple_metadataloader.py +++ b/scalr/nn/dataloader/test_simple_metadataloader.py @@ -1,6 +1,5 @@ '''This is a test file for simplemetadataloader.''' -import anndata import numpy as np import pandas as pd diff --git a/scalr/utils/data_utils.py b/scalr/utils/data_utils.py index b01c5ed..411daaa 100644 --- a/scalr/utils/data_utils.py +++ b/scalr/utils/data_utils.py @@ -46,7 +46,7 @@ def get_random_samples( random_background_data = data[random_indices].X if not isinstance(random_background_data, np.ndarray): - random_background_data = random_background_data.A + random_background_data = random_background_data.toarray() random_background_data = torch.as_tensor(random_background_data, dtype=torch.float32) diff --git a/scalr/utils/file_utils.py b/scalr/utils/file_utils.py index b518ad8..b7aeab9 100644 --- a/scalr/utils/file_utils.py +++ b/scalr/utils/file_utils.py @@ -7,8 +7,8 @@ from typing import Union from anndata import AnnData -import anndata as ad from anndata.experimental import AnnCollection +from anndata.io import read_h5ad from joblib import delayed from joblib import Parallel import numpy as np @@ -142,7 +142,7 @@ def transform_and_write_data(data: AnnData, chunk_number: int): if transform: data = AnnData(data.X, obs=data.obs, var=data.var) if not isinstance(data.X, np.ndarray): - data.X = data.X.A + data.X = data.X.toarray() data.X = transform(data.X) write_data(data, path.join(dirpath, f'{chunk_number}.h5ad')) @@ -262,7 +262,7 @@ def read_csv(filepath: str, index_col: int = 0) -> pd.DataFrame: def read_anndata(filepath: str, backed: str = 'r') -> AnnData: """This file returns the Anndata object from filepath.""" - data = ad.read_h5ad(filepath, backed=backed) + data = read_h5ad(filepath, backed=backed) return data diff --git a/tutorials/analysis/differential_gene_expression/dge_lmem_main.py b/tutorials/analysis/differential_gene_expression/dge_lmem_main.py index 769a19c..391ac13 100644 --- a/tutorials/analysis/differential_gene_expression/dge_lmem_main.py +++ b/tutorials/analysis/differential_gene_expression/dge_lmem_main.py @@ -13,14 +13,13 @@ from anndata import AnnData from anndata import ImplicitModificationWarning -import anndata as ad from anndata.experimental import AnnCollection +from anndata.io import read_h5ad from joblib import Parallel, delayed import matplotlib.pyplot as plt import numpy as np import pandas as pd from pandas import DataFrame -import scanpy as sc from scipy.optimize import OptimizeWarning import statsmodels.api as sm import statsmodels.formula.api as smf @@ -31,7 +30,7 @@ def main(config): - test_data = sc.read_h5ad(config['full_datapath'], backed='r') + test_data = read_h5ad(config['full_datapath'], backed='r') dirpath = config['dirpath'] dge_type = config['dge_type'] assert (dge_type == 'DgeLMEM') and ('lmem_params' in config), ( diff --git a/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py b/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py index 30cdae9..a300f8f 100644 --- a/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py +++ b/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py @@ -5,22 +5,21 @@ from typing import Optional, Union, Tuple import yaml -import anndata as ad from anndata import AnnData from anndata.experimental import AnnCollection +from anndata.io import read_h5ad import matplotlib.pyplot as plt import numpy as np import pandas as pd from pandas import DataFrame from pydeseq2.dds import DeseqDataSet from pydeseq2.ds import DeseqStats -import scanpy as sc from scalr.analysis import DgePseudoBulk def main(config): - test_data = sc.read_h5ad(config['full_datapath'], backed='r') + test_data = read_h5ad(config['full_datapath'], backed='r') dirpath = config['dirpath'] dge_type = config['dge_type'] assert (dge_type == 'DgePseudoBulk') and ('psedobulk_params' in config), ( diff --git a/tutorials/pipeline/config_celltype.yaml b/tutorials/pipeline/config_celltype.yaml index a2ef0af..bfe1dc1 100644 --- a/tutorials/pipeline/config_celltype.yaml +++ b/tutorials/pipeline/config_celltype.yaml @@ -1,7 +1,7 @@ # Config file for pipeline run for cell type classification. # DEVICE SETUP. -device: 'cuda' +device: 'cpu' # EXPERIMENT. experiment: @@ -15,8 +15,8 @@ data: sample_chunksize: 20000 train_val_test: - full_datapath: 'data/modified_adata.h5ad' - num_workers: 2 + full_datapath: 'path/to/adata.h5ad' + num_workers: 4 splitter_config: name: GroupSplitter diff --git a/tutorials/pipeline/config_clinical.yaml b/tutorials/pipeline/config_clinical.yaml index f4ac2e8..e3417ce 100644 --- a/tutorials/pipeline/config_clinical.yaml +++ b/tutorials/pipeline/config_clinical.yaml @@ -1,7 +1,7 @@ # Config file for pipeline run for clinical condition specific biomarker identification. # DEVICE SETUP. -device: 'cuda' +device: 'cpu' # EXPERIMENT. experiment: @@ -15,7 +15,7 @@ data: sample_chunksize: 20000 train_val_test: - full_datapath: 'data/modified_adata.h5ad' + full_datapath: 'path/to/adata.h5ad' num_workers: 2 splitter_config: diff --git a/tutorials/pipeline/scalr_pipeline.ipynb b/tutorials/pipeline/scalr_pipeline.ipynb index 23ed459..845e131 100644 --- a/tutorials/pipeline/scalr_pipeline.ipynb +++ b/tutorials/pipeline/scalr_pipeline.ipynb @@ -358,7 +358,7 @@ "outputs": [], "source": [ "#Gene expression values of first 5 cells and 10 genes.\n", - "adata.X[:5,:10].A" + "adata.X[:5,:10]\n" ] }, { @@ -385,7 +385,7 @@ "source": [ "# Verifying normalized values in X\n", "# Getting the sum of gene expression values for the first 10 cells (should be floating-point values).\n", - "adata.X[:10,:].A.sum(axis=1)" + "adata.X[:10,:].sum(axis=1)" ] }, { @@ -411,8 +411,8 @@ "outputs": [], "source": [ "# Getting the maximum and minimum gene expression values for the first 1000 cells.\n", - "max_val = np.max(adata.X[:1000, :].A)\n", - "min_val = np.min(adata.X[:1000, :].A)\n", + "max_val = np.max(adata.X[:1000, :])\n", + "min_val = np.min(adata.X[:1000, :])\n", "print(f'Max value : {max_val} | Min value : {min_val}')\n", "# Raising a warning if the values are outside the 0-10 range\n", "if max_val > 10 or min_val < 0:\n", diff --git a/tutorials/pipeline/scalr_pipeline_local_run.ipynb b/tutorials/pipeline/scalr_pipeline_local_run.ipynb index c9fe5ec..b98bef8 100644 --- a/tutorials/pipeline/scalr_pipeline_local_run.ipynb +++ b/tutorials/pipeline/scalr_pipeline_local_run.ipynb @@ -44,21 +44,7 @@ "metadata": { "id": "CdutIWiy8xJb" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'scaLR'...\n", - "remote: Enumerating objects: 3452, done.\u001b[K\n", - "remote: Counting objects: 100% (372/372), done.\u001b[K\n", - "remote: Compressing objects: 100% (181/181), done.\u001b[K\n", - "remote: Total 3452 (delta 243), reused 261 (delta 189), pack-reused 3080 (from 1)\u001b[K\n", - "Receiving objects: 100% (3452/3452), 170.03 MiB | 2.80 MiB/s, done.\n", - "Resolving deltas: 100% (2073/2073), done.\n" - ] - } - ], + "outputs": [], "source": [ "!git clone https://github.com/infocusp/scaLR.git" ] @@ -78,89 +64,7 @@ "metadata": { "id": "9dQLPmLwPL0C" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: anndata==0.10.9 in /home/amit.samal/.local/lib/python3.10/site-packages (0.10.9)\n", - "Requirement already satisfied: isort==5.13.2 in /home/amit.samal/.local/lib/python3.10/site-packages (5.13.2)\n", - "Collecting loky==3.4.1\n", - " Downloading loky-3.4.1-py3-none-any.whl.metadata (6.4 kB)\n", - "Requirement already satisfied: pillow==10.4.0 in /home/amit.samal/.local/lib/python3.10/site-packages (10.4.0)\n", - "Requirement already satisfied: pydeseq2==0.4.11 in /home/amit.samal/.local/lib/python3.10/site-packages (0.4.11)\n", - "Requirement already satisfied: pyparsing==3.2.0 in /home/amit.samal/.local/lib/python3.10/site-packages (3.2.0)\n", - "Requirement already satisfied: pytest==8.3.3 in /home/amit.samal/.local/lib/python3.10/site-packages (8.3.3)\n", - "Requirement already satisfied: PyYAML==6.0.2 in /home/amit.samal/.local/lib/python3.10/site-packages (6.0.2)\n", - "Requirement already satisfied: scanpy==1.10.3 in /home/amit.samal/.local/lib/python3.10/site-packages (1.10.3)\n", - "Requirement already satisfied: scikit-learn==1.5.2 in /home/amit.samal/.local/lib/python3.10/site-packages (1.5.2)\n", - "Requirement already satisfied: shap==0.46.0 in /home/amit.samal/.local/lib/python3.10/site-packages (0.46.0)\n", - "Requirement already satisfied: tensorboard==2.17.0 in /home/amit.samal/.local/lib/python3.10/site-packages (2.17.0)\n", - "Requirement already satisfied: toml==0.10.2 in /home/amit.samal/.local/lib/python3.10/site-packages (0.10.2)\n", - "Requirement already satisfied: tqdm==4.66.5 in /home/amit.samal/.local/lib/python3.10/site-packages (4.66.5)\n", - "Requirement already satisfied: yapf==0.40.2 in /home/amit.samal/.local/lib/python3.10/site-packages (0.40.2)\n", - "Requirement already satisfied: array-api-compat!=1.5,>1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.5.1)\n", - "Requirement already satisfied: exceptiongroup in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.2.0)\n", - "Requirement already satisfied: h5py>=3.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (3.10.0)\n", - "Requirement already satisfied: natsort in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (8.4.0)\n", - "Requirement already satisfied: numpy>=1.23 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.26.3)\n", - "Requirement already satisfied: packaging>=20.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (24.0)\n", - "Requirement already satisfied: pandas!=2.1.0rc0,!=2.1.2,>=1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.5.3)\n", - "Requirement already satisfied: scipy>1.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.12.0)\n", - "Requirement already satisfied: cloudpickle in /home/amit.samal/.local/lib/python3.10/site-packages (from loky==3.4.1) (3.0.0)\n", - "Requirement already satisfied: matplotlib>=3.6.2 in /home/amit.samal/.local/lib/python3.10/site-packages (from pydeseq2==0.4.11) (3.8.3)\n", - "Requirement already satisfied: iniconfig in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (2.0.0)\n", - "Requirement already satisfied: pluggy<2,>=1.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (1.5.0)\n", - "Requirement already satisfied: tomli>=1 in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (2.1.0)\n", - "Requirement already satisfied: joblib in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.3.2)\n", - "Requirement already satisfied: legacy-api-wrap>=1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.4)\n", - "Requirement already satisfied: networkx>=2.7 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (3.2.1)\n", - "Requirement already satisfied: numba>=0.56 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.59.1)\n", - "Requirement already satisfied: patsy in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.6)\n", - "Requirement already satisfied: pynndescent>=0.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.11)\n", - "Requirement already satisfied: seaborn>=0.13 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.13.2)\n", - "Requirement already satisfied: session-info in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.0.0)\n", - "Requirement already satisfied: statsmodels>=0.13 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.14.1)\n", - "Requirement already satisfied: umap-learn!=0.5.0,>=0.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.5)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from scikit-learn==1.5.2) (3.4.0)\n", - "Requirement already satisfied: slicer==0.0.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from shap==0.46.0) (0.0.8)\n", - "Requirement already satisfied: absl-py>=0.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (2.1.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (1.70.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (3.7)\n", - "Requirement already satisfied: protobuf!=4.24.0,<5.0.0,>=3.19.6 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (4.25.6)\n", - "Requirement already satisfied: setuptools>=41.0.0 in /usr/lib/python3/dist-packages (from tensorboard==2.17.0) (59.6.0)\n", - "Requirement already satisfied: six>1.9 in /usr/lib/python3/dist-packages (from tensorboard==2.17.0) (1.16.0)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (3.1.3)\n", - "Requirement already satisfied: importlib-metadata>=6.6.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from yapf==0.40.2) (8.6.1)\n", - "Requirement already satisfied: platformdirs>=3.5.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from yapf==0.40.2) (4.2.0)\n", - "Requirement already satisfied: zipp>=3.20 in /home/amit.samal/.local/lib/python3.10/site-packages (from importlib-metadata>=6.6.0->yapf==0.40.2) (3.21.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (1.2.0)\n", - "Requirement already satisfied: cycler>=0.10 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (4.50.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (1.4.5)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (2.9.0.post0)\n", - "Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in /home/amit.samal/.local/lib/python3.10/site-packages (from numba>=0.56->scanpy==1.10.3) (0.42.0)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas!=2.1.0rc0,!=2.1.2,>=1.4->anndata==0.10.9) (2022.1)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard==2.17.0) (3.0.2)\n", - "Requirement already satisfied: stdlib-list in /home/amit.samal/.local/lib/python3.10/site-packages (from session-info->scanpy==1.10.3) (0.10.0)\n", - "Downloading loky-3.4.1-py3-none-any.whl (54 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hInstalling collected packages: loky\n", - "Successfully installed loky-3.4.1\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: memory-profiler==0.61.0 in /home/amit.samal/.local/lib/python3.10/site-packages (0.61.0)\n", - "Requirement already satisfied: psutil in /home/amit.samal/.local/lib/python3.10/site-packages (from memory-profiler==0.61.0) (5.9.8)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "import sys\n", "imported_packages = {pkg.split('.')[0] for pkg in sys.modules.keys()}\n", @@ -199,29 +103,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "loCfvnwt9ei1" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-02-27 18:52:02-- https://datasets.cellxgene.cziscience.com/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad\n", - "Resolving datasets.cellxgene.cziscience.com (datasets.cellxgene.cziscience.com)... 18.239.111.15, 18.239.111.109, 18.239.111.30, ...\n", - "Connecting to datasets.cellxgene.cziscience.com (datasets.cellxgene.cziscience.com)|18.239.111.15|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 980103606 (935M) [binary/octet-stream]\n", - "Saving to: ‘data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad’\n", - "\n", - "21ef2ea2-cbed-4b6c- 100%[===================>] 934.70M 3.21MB/s in 4m 48s \n", - "\n", - "2025-02-27 18:56:51 (3.25 MB/s) - ‘data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad’ saved [980103606/980103606]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# This shell will take approximately 00:00:53 (hh:mm:ss) to run.\n", "!wget -P data https://datasets.cellxgene.cziscience.com/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad" @@ -238,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "id": "23C87j3PR9ox" }, @@ -254,277 +140,33 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "eDH3GxXr-er6" }, "outputs": [], "source": [ - "adata = ad.read_h5ad(\"data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad\",backed='r')" + "adata = ad.read_h5ad(\"/home/jupyter/haritable_git/scaLR/data/af8c4fce-4c63-4671-b339-91a383cf36f6.h5ad\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "id": "SS4oTWW6Xn8c" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "The anndata has '125117' cells and '30695' genes\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"\\nThe anndata has '{adata.n_obs}' cells and '{adata.n_vars}' genes\")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "id": "z1u-kctbSStJ" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dsm_severity_score_groupdisease_ontology_term_idseveritytissue_ontology_term_idtimepointoutcomedsm_severity_scoredays_since_hospitalizedagedonor_id...tissue_typecell_typeassaydiseaseorganismsextissueself_reported_ethnicitydevelopment_stageobservation_joinid
AAACCTGAGAAACCTA-1_1DSM_lowMONDO:0100096ModerateUBERON:0000178T0alive-1.9508581.055.0HGR0000083...tissuenon-classical monocyte10x 5' v1COVID-19Homo sapiensmalebloodEuropean55-year-old stage!9L}G4hgnw
AAACCTGAGGGTTTCT-1_1DSM_highMONDO:0100096CriticalUBERON:0000178T0alive-0.09237513.040.0HGR0000078...tissueclassical monocyte10x 5' v1COVID-19Homo sapiensfemalebloodEuropean40-year-old stageYRcUzlVyg0
AAACCTGCACCTGGTG-1_1DSM_highMONDO:0100096CriticalUBERON:0000178T0alive2.9543501.060.0HGR0000098...tissueCD16-positive, CD56-dim natural killer cell, h...10x 5' v1COVID-19Homo sapiensmalebloodEuropean60-year-old stage)*azge@M0l
AAACCTGGTCCGAGTC-1_1DSM_highMONDO:0100096CriticalUBERON:0000178T0deceased3.2762336.076.0HGR0000141...tissueclassical monocyte10x 5' v1COVID-19Homo sapiensmalebloodEuropean76-year-old stageE<FU`+QN&T
AAACCTGGTGCCTTGG-1_1DSM_lowMONDO:0100096CriticalUBERON:0000178T0alive-0.3488881.070.0HGR0000093...tissueclassical monocyte10x 5' v1COVID-19Homo sapiensmalebloodEuropean70-year-old stage2MZ#6SX}{g
\n", - "

5 rows × 32 columns

\n", - "
" - ], - "text/plain": [ - " dsm_severity_score_group disease_ontology_term_id \\\n", - "AAACCTGAGAAACCTA-1_1 DSM_low MONDO:0100096 \n", - "AAACCTGAGGGTTTCT-1_1 DSM_high MONDO:0100096 \n", - "AAACCTGCACCTGGTG-1_1 DSM_high MONDO:0100096 \n", - "AAACCTGGTCCGAGTC-1_1 DSM_high MONDO:0100096 \n", - "AAACCTGGTGCCTTGG-1_1 DSM_low MONDO:0100096 \n", - "\n", - " severity tissue_ontology_term_id timepoint outcome \\\n", - "AAACCTGAGAAACCTA-1_1 Moderate UBERON:0000178 T0 alive \n", - "AAACCTGAGGGTTTCT-1_1 Critical UBERON:0000178 T0 alive \n", - "AAACCTGCACCTGGTG-1_1 Critical UBERON:0000178 T0 alive \n", - "AAACCTGGTCCGAGTC-1_1 Critical UBERON:0000178 T0 deceased \n", - "AAACCTGGTGCCTTGG-1_1 Critical UBERON:0000178 T0 alive \n", - "\n", - " dsm_severity_score days_since_hospitalized age \\\n", - "AAACCTGAGAAACCTA-1_1 -1.950858 1.0 55.0 \n", - "AAACCTGAGGGTTTCT-1_1 -0.092375 13.0 40.0 \n", - "AAACCTGCACCTGGTG-1_1 2.954350 1.0 60.0 \n", - "AAACCTGGTCCGAGTC-1_1 3.276233 6.0 76.0 \n", - "AAACCTGGTGCCTTGG-1_1 -0.348888 1.0 70.0 \n", - "\n", - " donor_id ... tissue_type \\\n", - "AAACCTGAGAAACCTA-1_1 HGR0000083 ... tissue \n", - "AAACCTGAGGGTTTCT-1_1 HGR0000078 ... tissue \n", - "AAACCTGCACCTGGTG-1_1 HGR0000098 ... tissue \n", - "AAACCTGGTCCGAGTC-1_1 HGR0000141 ... tissue \n", - "AAACCTGGTGCCTTGG-1_1 HGR0000093 ... tissue \n", - "\n", - " cell_type \\\n", - "AAACCTGAGAAACCTA-1_1 non-classical monocyte \n", - "AAACCTGAGGGTTTCT-1_1 classical monocyte \n", - "AAACCTGCACCTGGTG-1_1 CD16-positive, CD56-dim natural killer cell, h... \n", - "AAACCTGGTCCGAGTC-1_1 classical monocyte \n", - "AAACCTGGTGCCTTGG-1_1 classical monocyte \n", - "\n", - " assay disease organism sex tissue \\\n", - "AAACCTGAGAAACCTA-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n", - "AAACCTGAGGGTTTCT-1_1 10x 5' v1 COVID-19 Homo sapiens female blood \n", - "AAACCTGCACCTGGTG-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n", - "AAACCTGGTCCGAGTC-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n", - "AAACCTGGTGCCTTGG-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n", - "\n", - " self_reported_ethnicity development_stage \\\n", - "AAACCTGAGAAACCTA-1_1 European 55-year-old stage \n", - "AAACCTGAGGGTTTCT-1_1 European 40-year-old stage \n", - "AAACCTGCACCTGGTG-1_1 European 60-year-old stage \n", - "AAACCTGGTCCGAGTC-1_1 European 76-year-old stage \n", - "AAACCTGGTGCCTTGG-1_1 European 70-year-old stage \n", - "\n", - " observation_joinid \n", - "AAACCTGAGAAACCTA-1_1 !9L}G4hgnw \n", - "AAACCTGAGGGTTTCT-1_1 YRcUzlVyg0 \n", - "AAACCTGCACCTGGTG-1_1 )*azge@M0l \n", - "AAACCTGGTCCGAGTC-1_1 E 10 or min_val < 0:\n", @@ -721,149 +265,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "id": "bd2fTv0gdluU" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mvp.meanmvp.dispersionmvp.dispersion.scaledmvp.variablefeature_is_filteredfeature_namefeature_referencefeature_biotypefeature_lengthfeature_type
ENSG000001684540.0003801.1688760.181734FalseFalseTXNDC2NCBITaxon:9606gene1703protein_coding
ENSG000001978520.0359951.6341790.886458FalseFalseINKA2NCBITaxon:9606gene1217protein_coding
ENSG000001968780.0088621.6177290.861545FalseFalseLAMB3NCBITaxon:9606gene3931protein_coding
ENSG000002565400.0000221.6609930.927070FalseFalseIQSEC3-AS1NCBITaxon:9606gene1065lncRNA
ENSG000001391800.0901001.1847200.205731FalseFalseNDUFA9NCBITaxon:9606gene782protein_coding
\n", - "
" - ], - "text/plain": [ - " mvp.mean mvp.dispersion mvp.dispersion.scaled \\\n", - "ENSG00000168454 0.000380 1.168876 0.181734 \n", - "ENSG00000197852 0.035995 1.634179 0.886458 \n", - "ENSG00000196878 0.008862 1.617729 0.861545 \n", - "ENSG00000256540 0.000022 1.660993 0.927070 \n", - "ENSG00000139180 0.090100 1.184720 0.205731 \n", - "\n", - " mvp.variable feature_is_filtered feature_name \\\n", - "ENSG00000168454 False False TXNDC2 \n", - "ENSG00000197852 False False INKA2 \n", - "ENSG00000196878 False False LAMB3 \n", - "ENSG00000256540 False False IQSEC3-AS1 \n", - "ENSG00000139180 False False NDUFA9 \n", - "\n", - " feature_reference feature_biotype feature_length \\\n", - "ENSG00000168454 NCBITaxon:9606 gene 1703 \n", - "ENSG00000197852 NCBITaxon:9606 gene 1217 \n", - "ENSG00000196878 NCBITaxon:9606 gene 3931 \n", - "ENSG00000256540 NCBITaxon:9606 gene 1065 \n", - "ENSG00000139180 NCBITaxon:9606 gene 782 \n", - "\n", - " feature_type \n", - "ENSG00000168454 protein_coding \n", - "ENSG00000197852 protein_coding \n", - "ENSG00000196878 protein_coding \n", - "ENSG00000256540 lncRNA \n", - "ENSG00000139180 protein_coding " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#Gene metadata\n", "adata.var.head()" @@ -890,162 +296,23 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "id": "qoSHdJtwgPaA" }, "outputs": [], "source": [ - "adata.var.set_index('feature_name',inplace=True)" + "adata.var.set_index('feature_name',inplace=True)\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "id": "p3LvDmZmhJ_c" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mvp.meanmvp.dispersionmvp.dispersion.scaledmvp.variablefeature_is_filteredfeature_referencefeature_biotypefeature_lengthfeature_type
feature_name
TXNDC20.0003801.1688760.181734FalseFalseNCBITaxon:9606gene1703protein_coding
INKA20.0359951.6341790.886458FalseFalseNCBITaxon:9606gene1217protein_coding
LAMB30.0088621.6177290.861545FalseFalseNCBITaxon:9606gene3931protein_coding
IQSEC3-AS10.0000221.6609930.927070FalseFalseNCBITaxon:9606gene1065lncRNA
NDUFA90.0901001.1847200.205731FalseFalseNCBITaxon:9606gene782protein_coding
\n", - "
" - ], - "text/plain": [ - " mvp.mean mvp.dispersion mvp.dispersion.scaled mvp.variable \\\n", - "feature_name \n", - "TXNDC2 0.000380 1.168876 0.181734 False \n", - "INKA2 0.035995 1.634179 0.886458 False \n", - "LAMB3 0.008862 1.617729 0.861545 False \n", - "IQSEC3-AS1 0.000022 1.660993 0.927070 False \n", - "NDUFA9 0.090100 1.184720 0.205731 False \n", - "\n", - " feature_is_filtered feature_reference feature_biotype \\\n", - "feature_name \n", - "TXNDC2 False NCBITaxon:9606 gene \n", - "INKA2 False NCBITaxon:9606 gene \n", - "LAMB3 False NCBITaxon:9606 gene \n", - "IQSEC3-AS1 False NCBITaxon:9606 gene \n", - "NDUFA9 False NCBITaxon:9606 gene \n", - "\n", - " feature_length feature_type \n", - "feature_name \n", - "TXNDC2 1703 protein_coding \n", - "INKA2 1217 protein_coding \n", - "LAMB3 3931 protein_coding \n", - "IQSEC3-AS1 1065 lncRNA \n", - "NDUFA9 782 protein_coding " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Now the index values are the gene symbols.\n", "adata.var.head()" @@ -1053,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "id": "6yCi6UQ-kh0Q" }, @@ -1063,7 +330,7 @@ "# This shell will take approximately 00:00:47 (hh:mm:ss) to run.\n", "adata.obs.index = adata.obs.index.astype(str)\n", "adata.var.index = adata.var.index.astype(str)\n", - "AnnData(X=adata.X,obs=adata.obs,var=adata.var).write('data/modified_adata.h5ad',compression='gzip')" + "AnnData(X=adata.X,obs=adata.obs,var=adata.var).write('/home/jupyter/haritable_git/scaLR/data/modified_adataT.h5ad',compression='gzip')" ] }, { @@ -1220,19 +487,11 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "id": "uLgN7MDv7hV-" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/bin/bash: line 1: python: command not found\n" - ] - } - ], + "outputs": [], "source": [ "# Possible flags using 'scaLR/pipeline.py'\n", "!python scaLR/pipeline.py --help" @@ -1249,66 +508,11 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "id": "xqvT9AiQFVGq" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2025-02-27 19:02:51,535 - ROOT - INFO : Experiment directory: `scalr_experiments/exp_name_0`\n", - "2025-02-27 19:02:51,544 - ROOT - INFO : Data Ingestion pipeline running\n", - "2025-02-27 19:02:51,544 - DataIngestion - INFO : Generating Train, Validation and Test sets\n", - "2025-02-27 19:03:35,769 - DataIngestion - INFO : Generate label mappings for all columns in metadata\n", - "2025-02-27 19:03:36,946 - ROOT - INFO : Feature Extraction pipeline running\n", - "2025-02-27 19:03:36,946 - File Utils - INFO : Data Loaded from Final datapaths\n", - "2025-02-27 19:03:37,467 - FeatureExtraction - INFO : Feature subset models training\n", - "2025-02-27 19:05:09,181 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:09,253 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:09,295 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:09,393 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:09,750 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:09,751 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:09,770 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:09,881 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:16,105 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:16,106 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:16,153 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:16,154 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:16,168 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:16,174 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:20,327 - FeatureExtraction - INFO : Feature scoring\n", - "2025-02-27 19:05:20,712 - FeatureExtraction - INFO : Top features extraction\n", - "2025-02-27 19:05:20,719 - FeatureExtraction - INFO : Writing feature-subset data onto disk\n", - "2025-02-27 19:05:51,902 - ROOT - INFO : Final Model Training pipeline running\n", - "2025-02-27 19:05:51,905 - File Utils - INFO : Data Loaded from Feature subset datapaths\n", - "2025-02-27 19:05:52,382 - ModelTraining - INFO : Building model training artifacts\n", - "2025-02-27 19:05:52,841 - ModelTraining - INFO : Training the model\n", - "2025-02-27 19:05:59,278 - ROOT - INFO : Analysis pipeline running\n", - "2025-02-27 19:05:59,281 - File Utils - INFO : Data Loaded from Feature subset datapaths\n", - "2025-02-27 19:05:59,676 - File Utils - INFO : Data Loaded from Feature subset datapaths\n", - "2025-02-27 19:05:59,805 - File Utils - INFO : Data Loaded from Feature subset datapaths\n", - "2025-02-27 19:06:00,379 - Eval&Analysis - INFO : Calculating accuracy and generating classification report on test set\n", - "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", - "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", - "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", - "2025-02-27 19:06:03,433 - Eval&Analysis - INFO : Performing gene analysis\n", - "2025-02-27 19:06:03,433 - FeatureExtraction - INFO : Feature scoring\n", - "2025-02-27 19:06:03,471 - FeatureExtraction - INFO : Top features extraction\n", - "2025-02-27 19:06:03,540 - Eval&Analysis - INFO : Performing Downstream Analysis on test samples\n", - "2025-02-27 19:06:03,540 - Eval&Analysis - INFO : Performing GeneRecallCurve\n", - "2025-02-27 19:06:04,781 - Eval&Analysis - INFO : Performing Heatmap\n", - "2025-02-27 19:06:09,548 - Eval&Analysis - INFO : Performing RocAucCurve\n", - "2025-02-27 19:06:09,929 - ROOT - INFO : Total time taken: 198.401921749115 s\n", - "2025-02-27 19:06:09,929 - ROOT - INFO : Maximum memory usage: 1915.5625 MB\n" - ] - } - ], + "outputs": [], "source": [ "# Command to run end to end pipeline.\n", "# This shell will take approximately 00:21:15 (hh:mm:ss) on GPU to run.()\n", @@ -1744,7 +948,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "scaLR_env", "language": "python", "name": "python3" }, @@ -1758,7 +962,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.20" } }, "nbformat": 4,