diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index f500c00..ead7736 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -11,6 +11,8 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
+ with:
+ python-version: "3.10.20"
- name: Install scalr requirements
run: |
pip install -r requirements.txt
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
index 044cfe2..2a6a8ab 100644
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -15,7 +15,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
- python-version: "3.9"
+ python-version: "3.10.20"
- name: Install pypa/build
run: >-
python3 -m
diff --git a/.github/workflows/run_isort.yml b/.github/workflows/run_isort.yml
index 0d7c7b6..ee4332f 100644
--- a/.github/workflows/run_isort.yml
+++ b/.github/workflows/run_isort.yml
@@ -10,10 +10,10 @@ jobs:
steps:
- uses: actions/checkout@v2
- - name: Set up Python 3.9
+ - name: Set up Python 3.10.20
uses: actions/setup-python@v2
with:
- python-version: 3.9
+ python-version: "3.10.20"
- name: Install isort
run: pip install isort
- name: Run isort
diff --git a/.github/workflows/run_pytest.yml b/.github/workflows/run_pytest.yml
index 7474e62..be5af6f 100644
--- a/.github/workflows/run_pytest.yml
+++ b/.github/workflows/run_pytest.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.9"]
+ python-version: ["3.10.20"]
steps:
- uses: actions/checkout@v3
diff --git a/README.md b/README.md
index d4e5983..45d992a 100644
--- a/README.md
+++ b/README.md
@@ -27,10 +27,10 @@
## Pre-requisites and installation scaLR
-- ScaLR can be installed using git or pip. It is tested in Python 3.10 and it is recommended to use that environment.
+- ScaLR can be installed using git or pip. It is tested in Python 3.10.20 and it is recommended to use that environment.
```
-conda create -n scaLR_env python=3.10
+conda create -n scaLR_env python=3.10.20
conda activate scaLR_env
```
@@ -374,5 +374,4 @@ Performs evaluation of best model trained on user-defined metrics on the test se
## Citation
-Jogani Saiyam, Anand Santosh Pol, Mayur Prajapati, Amit Samal, Kriti Bhatia, Jayendra Parmar, Urvik Patel, Falak Shah, Nisarg Vyas, and Saurabh Gupta. "scaLR: a low-resource deep neural network-based platform for single cell analysis and biomarker discovery." bioRxiv (2024): 2024-09.
-
+Jogani, S., Pol, A. S., Prajapati, M., Samal, A., Bhatia, K., Parmar, J., ... & Gupta, S. (2025). scaLR: a low-resource deep neural network-based platform for single cell analysis and biomarker discovery. Briefings in Bioinformatics, 26(3), bbaf243.
diff --git a/config/config.yaml b/config/config.yaml
index aebd971..f6d8f71 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,7 +1,7 @@
# Config file for pipeline run.
# DEVICE SETUP.
-device: 'cuda'
+device: 'cpu'
# EXPERIMENT.
experiment:
@@ -16,8 +16,7 @@ data:
num_workers: 1
train_val_test:
- full_datapath: '/path/to/anndata.h5ad'
-
+ full_datapath: 'path/to/adata.h5ad'
splitter_config:
name: GroupSplitter
params:
@@ -35,7 +34,7 @@ data:
# params:
# **args
- target: Cell_Type
+ target: cell_type
# FEATURE SELECTION.
diff --git a/pyproject.toml b/pyproject.toml
index ed979c0..b3dd290 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,14 +6,14 @@ build-backend = "hatchling.build"
name = "pyscaLR"
version = "1.1.0"
-requires-python = ">=3.10"
+requires-python = ">=3.10.20"
authors = [
{ name="Infocusp", email="saurabh@infocusp.com" },
]
description = "scaLR: Single cell analysis using low resource."
readme = "README.md"
classifiers = [
- "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research"
]
diff --git a/requirements.txt b/requirements.txt
index e0620e8..c3a2e56 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-anndata==0.10.9
+anndata>=0.11.2,<0.12
isort==5.13.2
loky==3.4.1
memory-profiler==0.61.0
@@ -15,4 +15,4 @@ tensorboard==2.17.0
toml==0.10.2
torch==2.4.1 --index-url https://download.pytorch.org/whl/cu118
tqdm==4.66.5
-yapf==0.40.2
+yapf==0.40.2
\ No newline at end of file
diff --git a/scalr/analysis/dge_lmem.py b/scalr/analysis/dge_lmem.py
index 5462435..9abfe2a 100644
--- a/scalr/analysis/dge_lmem.py
+++ b/scalr/analysis/dge_lmem.py
@@ -11,7 +11,6 @@
from anndata import AnnData
from anndata import ImplicitModificationWarning
-import anndata as ad
from anndata.experimental import AnnCollection
from joblib import delayed
from joblib import Parallel
diff --git a/scalr/analysis/dge_pseudobulk.py b/scalr/analysis/dge_pseudobulk.py
index 1ceaf70..d919727 100644
--- a/scalr/analysis/dge_pseudobulk.py
+++ b/scalr/analysis/dge_pseudobulk.py
@@ -4,8 +4,7 @@
from os import path
from typing import Optional, Tuple, Union
-from anndata import AnnData
-import anndata as ad
+from anndata import AnnData, concat
from anndata.experimental import AnnCollection
import matplotlib.pyplot as plt
import numpy as np
@@ -94,7 +93,7 @@ def _make_design_matrix(self, adata: AnnData, cell_type: str):
for sum_sample in condition_subset.obs[self.sum_column].unique():
sum_subset = condition_subset[condition_subset.obs[
self.sum_column] == sum_sample]
- subdata = ad.AnnData(
+ subdata = AnnData(
X=sum_subset[:].X.sum(axis=0).reshape(
1, len(sum_subset.var_names)),
var=DataFrame(index=sum_subset.var_names),
@@ -102,7 +101,7 @@ def _make_design_matrix(self, adata: AnnData, cell_type: str):
subdata.obs[self.design_factor_no_undrscr] = [condition]
design_matrix_list.append(subdata)
- design_matrix = ad.concat(design_matrix_list)
+ design_matrix = concat(design_matrix_list)
return design_matrix
def get_differential_expression_results(self, design_matrix: AnnData,
diff --git a/scalr/nn/dataloader/simple_metadataloader.py b/scalr/nn/dataloader/simple_metadataloader.py
index 4a3899b..04cd0e5 100644
--- a/scalr/nn/dataloader/simple_metadataloader.py
+++ b/scalr/nn/dataloader/simple_metadataloader.py
@@ -71,7 +71,7 @@ def collate_fn(
x = torch.cat(
(x,
torch.as_tensor(self.metadata_onehotencoder[col].transform(
- adata_batch.obs[col].values.reshape(-1, 1)).A,
+ adata_batch.obs[col].values.reshape(-1, 1)).toarray(),
dtype=torch.float32)),
dim=1)
return x, y
diff --git a/scalr/nn/dataloader/test_simple_metadataloader.py b/scalr/nn/dataloader/test_simple_metadataloader.py
index d4c9023..0e2647f 100644
--- a/scalr/nn/dataloader/test_simple_metadataloader.py
+++ b/scalr/nn/dataloader/test_simple_metadataloader.py
@@ -1,6 +1,5 @@
'''This is a test file for simplemetadataloader.'''
-import anndata
import numpy as np
import pandas as pd
diff --git a/scalr/utils/data_utils.py b/scalr/utils/data_utils.py
index b01c5ed..411daaa 100644
--- a/scalr/utils/data_utils.py
+++ b/scalr/utils/data_utils.py
@@ -46,7 +46,7 @@ def get_random_samples(
random_background_data = data[random_indices].X
if not isinstance(random_background_data, np.ndarray):
- random_background_data = random_background_data.A
+ random_background_data = random_background_data.toarray()
random_background_data = torch.as_tensor(random_background_data,
dtype=torch.float32)
diff --git a/scalr/utils/file_utils.py b/scalr/utils/file_utils.py
index b518ad8..b7aeab9 100644
--- a/scalr/utils/file_utils.py
+++ b/scalr/utils/file_utils.py
@@ -7,8 +7,8 @@
from typing import Union
from anndata import AnnData
-import anndata as ad
from anndata.experimental import AnnCollection
+from anndata.io import read_h5ad
from joblib import delayed
from joblib import Parallel
import numpy as np
@@ -142,7 +142,7 @@ def transform_and_write_data(data: AnnData, chunk_number: int):
if transform:
data = AnnData(data.X, obs=data.obs, var=data.var)
if not isinstance(data.X, np.ndarray):
- data.X = data.X.A
+ data.X = data.X.toarray()
data.X = transform(data.X)
write_data(data, path.join(dirpath, f'{chunk_number}.h5ad'))
@@ -262,7 +262,7 @@ def read_csv(filepath: str, index_col: int = 0) -> pd.DataFrame:
def read_anndata(filepath: str, backed: str = 'r') -> AnnData:
"""This file returns the Anndata object from filepath."""
- data = ad.read_h5ad(filepath, backed=backed)
+ data = read_h5ad(filepath, backed=backed)
return data
diff --git a/tutorials/analysis/differential_gene_expression/dge_lmem_main.py b/tutorials/analysis/differential_gene_expression/dge_lmem_main.py
index 769a19c..391ac13 100644
--- a/tutorials/analysis/differential_gene_expression/dge_lmem_main.py
+++ b/tutorials/analysis/differential_gene_expression/dge_lmem_main.py
@@ -13,14 +13,13 @@
from anndata import AnnData
from anndata import ImplicitModificationWarning
-import anndata as ad
from anndata.experimental import AnnCollection
+from anndata.io import read_h5ad
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import DataFrame
-import scanpy as sc
from scipy.optimize import OptimizeWarning
import statsmodels.api as sm
import statsmodels.formula.api as smf
@@ -31,7 +30,7 @@
def main(config):
- test_data = sc.read_h5ad(config['full_datapath'], backed='r')
+ test_data = read_h5ad(config['full_datapath'], backed='r')
dirpath = config['dirpath']
dge_type = config['dge_type']
assert (dge_type == 'DgeLMEM') and ('lmem_params' in config), (
diff --git a/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py b/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py
index 30cdae9..a300f8f 100644
--- a/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py
+++ b/tutorials/analysis/differential_gene_expression/dge_pseudobulk_main.py
@@ -5,22 +5,21 @@
from typing import Optional, Union, Tuple
import yaml
-import anndata as ad
from anndata import AnnData
from anndata.experimental import AnnCollection
+from anndata.io import read_h5ad
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import DataFrame
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
-import scanpy as sc
from scalr.analysis import DgePseudoBulk
def main(config):
- test_data = sc.read_h5ad(config['full_datapath'], backed='r')
+ test_data = read_h5ad(config['full_datapath'], backed='r')
dirpath = config['dirpath']
dge_type = config['dge_type']
assert (dge_type == 'DgePseudoBulk') and ('psedobulk_params' in config), (
diff --git a/tutorials/pipeline/config_celltype.yaml b/tutorials/pipeline/config_celltype.yaml
index a2ef0af..bfe1dc1 100644
--- a/tutorials/pipeline/config_celltype.yaml
+++ b/tutorials/pipeline/config_celltype.yaml
@@ -1,7 +1,7 @@
# Config file for pipeline run for cell type classification.
# DEVICE SETUP.
-device: 'cuda'
+device: 'cpu'
# EXPERIMENT.
experiment:
@@ -15,8 +15,8 @@ data:
sample_chunksize: 20000
train_val_test:
- full_datapath: 'data/modified_adata.h5ad'
- num_workers: 2
+ full_datapath: 'path/to/adata.h5ad'
+ num_workers: 4
splitter_config:
name: GroupSplitter
diff --git a/tutorials/pipeline/config_clinical.yaml b/tutorials/pipeline/config_clinical.yaml
index f4ac2e8..e3417ce 100644
--- a/tutorials/pipeline/config_clinical.yaml
+++ b/tutorials/pipeline/config_clinical.yaml
@@ -1,7 +1,7 @@
# Config file for pipeline run for clinical condition specific biomarker identification.
# DEVICE SETUP.
-device: 'cuda'
+device: 'cpu'
# EXPERIMENT.
experiment:
@@ -15,7 +15,7 @@ data:
sample_chunksize: 20000
train_val_test:
- full_datapath: 'data/modified_adata.h5ad'
+ full_datapath: 'path/to/adata.h5ad'
num_workers: 2
splitter_config:
diff --git a/tutorials/pipeline/scalr_pipeline.ipynb b/tutorials/pipeline/scalr_pipeline.ipynb
index 23ed459..845e131 100644
--- a/tutorials/pipeline/scalr_pipeline.ipynb
+++ b/tutorials/pipeline/scalr_pipeline.ipynb
@@ -358,7 +358,7 @@
"outputs": [],
"source": [
"#Gene expression values of first 5 cells and 10 genes.\n",
- "adata.X[:5,:10].A"
+ "adata.X[:5,:10]\n"
]
},
{
@@ -385,7 +385,7 @@
"source": [
"# Verifying normalized values in X\n",
"# Getting the sum of gene expression values for the first 10 cells (should be floating-point values).\n",
- "adata.X[:10,:].A.sum(axis=1)"
+ "adata.X[:10,:].sum(axis=1)"
]
},
{
@@ -411,8 +411,8 @@
"outputs": [],
"source": [
"# Getting the maximum and minimum gene expression values for the first 1000 cells.\n",
- "max_val = np.max(adata.X[:1000, :].A)\n",
- "min_val = np.min(adata.X[:1000, :].A)\n",
+ "max_val = np.max(adata.X[:1000, :])\n",
+ "min_val = np.min(adata.X[:1000, :])\n",
"print(f'Max value : {max_val} | Min value : {min_val}')\n",
"# Raising a warning if the values are outside the 0-10 range\n",
"if max_val > 10 or min_val < 0:\n",
diff --git a/tutorials/pipeline/scalr_pipeline_local_run.ipynb b/tutorials/pipeline/scalr_pipeline_local_run.ipynb
index c9fe5ec..b98bef8 100644
--- a/tutorials/pipeline/scalr_pipeline_local_run.ipynb
+++ b/tutorials/pipeline/scalr_pipeline_local_run.ipynb
@@ -44,21 +44,7 @@
"metadata": {
"id": "CdutIWiy8xJb"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Cloning into 'scaLR'...\n",
- "remote: Enumerating objects: 3452, done.\u001b[K\n",
- "remote: Counting objects: 100% (372/372), done.\u001b[K\n",
- "remote: Compressing objects: 100% (181/181), done.\u001b[K\n",
- "remote: Total 3452 (delta 243), reused 261 (delta 189), pack-reused 3080 (from 1)\u001b[K\n",
- "Receiving objects: 100% (3452/3452), 170.03 MiB | 2.80 MiB/s, done.\n",
- "Resolving deltas: 100% (2073/2073), done.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!git clone https://github.com/infocusp/scaLR.git"
]
@@ -78,89 +64,7 @@
"metadata": {
"id": "9dQLPmLwPL0C"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Defaulting to user installation because normal site-packages is not writeable\n",
- "Requirement already satisfied: anndata==0.10.9 in /home/amit.samal/.local/lib/python3.10/site-packages (0.10.9)\n",
- "Requirement already satisfied: isort==5.13.2 in /home/amit.samal/.local/lib/python3.10/site-packages (5.13.2)\n",
- "Collecting loky==3.4.1\n",
- " Downloading loky-3.4.1-py3-none-any.whl.metadata (6.4 kB)\n",
- "Requirement already satisfied: pillow==10.4.0 in /home/amit.samal/.local/lib/python3.10/site-packages (10.4.0)\n",
- "Requirement already satisfied: pydeseq2==0.4.11 in /home/amit.samal/.local/lib/python3.10/site-packages (0.4.11)\n",
- "Requirement already satisfied: pyparsing==3.2.0 in /home/amit.samal/.local/lib/python3.10/site-packages (3.2.0)\n",
- "Requirement already satisfied: pytest==8.3.3 in /home/amit.samal/.local/lib/python3.10/site-packages (8.3.3)\n",
- "Requirement already satisfied: PyYAML==6.0.2 in /home/amit.samal/.local/lib/python3.10/site-packages (6.0.2)\n",
- "Requirement already satisfied: scanpy==1.10.3 in /home/amit.samal/.local/lib/python3.10/site-packages (1.10.3)\n",
- "Requirement already satisfied: scikit-learn==1.5.2 in /home/amit.samal/.local/lib/python3.10/site-packages (1.5.2)\n",
- "Requirement already satisfied: shap==0.46.0 in /home/amit.samal/.local/lib/python3.10/site-packages (0.46.0)\n",
- "Requirement already satisfied: tensorboard==2.17.0 in /home/amit.samal/.local/lib/python3.10/site-packages (2.17.0)\n",
- "Requirement already satisfied: toml==0.10.2 in /home/amit.samal/.local/lib/python3.10/site-packages (0.10.2)\n",
- "Requirement already satisfied: tqdm==4.66.5 in /home/amit.samal/.local/lib/python3.10/site-packages (4.66.5)\n",
- "Requirement already satisfied: yapf==0.40.2 in /home/amit.samal/.local/lib/python3.10/site-packages (0.40.2)\n",
- "Requirement already satisfied: array-api-compat!=1.5,>1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.5.1)\n",
- "Requirement already satisfied: exceptiongroup in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.2.0)\n",
- "Requirement already satisfied: h5py>=3.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (3.10.0)\n",
- "Requirement already satisfied: natsort in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (8.4.0)\n",
- "Requirement already satisfied: numpy>=1.23 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.26.3)\n",
- "Requirement already satisfied: packaging>=20.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (24.0)\n",
- "Requirement already satisfied: pandas!=2.1.0rc0,!=2.1.2,>=1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.5.3)\n",
- "Requirement already satisfied: scipy>1.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from anndata==0.10.9) (1.12.0)\n",
- "Requirement already satisfied: cloudpickle in /home/amit.samal/.local/lib/python3.10/site-packages (from loky==3.4.1) (3.0.0)\n",
- "Requirement already satisfied: matplotlib>=3.6.2 in /home/amit.samal/.local/lib/python3.10/site-packages (from pydeseq2==0.4.11) (3.8.3)\n",
- "Requirement already satisfied: iniconfig in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (2.0.0)\n",
- "Requirement already satisfied: pluggy<2,>=1.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (1.5.0)\n",
- "Requirement already satisfied: tomli>=1 in /home/amit.samal/.local/lib/python3.10/site-packages (from pytest==8.3.3) (2.1.0)\n",
- "Requirement already satisfied: joblib in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.3.2)\n",
- "Requirement already satisfied: legacy-api-wrap>=1.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.4)\n",
- "Requirement already satisfied: networkx>=2.7 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (3.2.1)\n",
- "Requirement already satisfied: numba>=0.56 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.59.1)\n",
- "Requirement already satisfied: patsy in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.6)\n",
- "Requirement already satisfied: pynndescent>=0.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.11)\n",
- "Requirement already satisfied: seaborn>=0.13 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.13.2)\n",
- "Requirement already satisfied: session-info in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (1.0.0)\n",
- "Requirement already satisfied: statsmodels>=0.13 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.14.1)\n",
- "Requirement already satisfied: umap-learn!=0.5.0,>=0.5 in /home/amit.samal/.local/lib/python3.10/site-packages (from scanpy==1.10.3) (0.5.5)\n",
- "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from scikit-learn==1.5.2) (3.4.0)\n",
- "Requirement already satisfied: slicer==0.0.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from shap==0.46.0) (0.0.8)\n",
- "Requirement already satisfied: absl-py>=0.4 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (2.1.0)\n",
- "Requirement already satisfied: grpcio>=1.48.2 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (1.70.0)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (3.7)\n",
- "Requirement already satisfied: protobuf!=4.24.0,<5.0.0,>=3.19.6 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (4.25.6)\n",
- "Requirement already satisfied: setuptools>=41.0.0 in /usr/lib/python3/dist-packages (from tensorboard==2.17.0) (59.6.0)\n",
- "Requirement already satisfied: six>1.9 in /usr/lib/python3/dist-packages (from tensorboard==2.17.0) (1.16.0)\n",
- "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (0.7.2)\n",
- "Requirement already satisfied: werkzeug>=1.0.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from tensorboard==2.17.0) (3.1.3)\n",
- "Requirement already satisfied: importlib-metadata>=6.6.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from yapf==0.40.2) (8.6.1)\n",
- "Requirement already satisfied: platformdirs>=3.5.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from yapf==0.40.2) (4.2.0)\n",
- "Requirement already satisfied: zipp>=3.20 in /home/amit.samal/.local/lib/python3.10/site-packages (from importlib-metadata>=6.6.0->yapf==0.40.2) (3.21.0)\n",
- "Requirement already satisfied: contourpy>=1.0.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (1.2.0)\n",
- "Requirement already satisfied: cycler>=0.10 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (0.12.1)\n",
- "Requirement already satisfied: fonttools>=4.22.0 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (4.50.0)\n",
- "Requirement already satisfied: kiwisolver>=1.3.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (1.4.5)\n",
- "Requirement already satisfied: python-dateutil>=2.7 in /home/amit.samal/.local/lib/python3.10/site-packages (from matplotlib>=3.6.2->pydeseq2==0.4.11) (2.9.0.post0)\n",
- "Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in /home/amit.samal/.local/lib/python3.10/site-packages (from numba>=0.56->scanpy==1.10.3) (0.42.0)\n",
- "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas!=2.1.0rc0,!=2.1.2,>=1.4->anndata==0.10.9) (2022.1)\n",
- "Requirement already satisfied: MarkupSafe>=2.1.1 in /home/amit.samal/.local/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard==2.17.0) (3.0.2)\n",
- "Requirement already satisfied: stdlib-list in /home/amit.samal/.local/lib/python3.10/site-packages (from session-info->scanpy==1.10.3) (0.10.0)\n",
- "Downloading loky-3.4.1-py3-none-any.whl (54 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.6/54.6 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
- "\u001b[?25hInstalling collected packages: loky\n",
- "Successfully installed loky-3.4.1\n",
- "\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
- "Defaulting to user installation because normal site-packages is not writeable\n",
- "Requirement already satisfied: memory-profiler==0.61.0 in /home/amit.samal/.local/lib/python3.10/site-packages (0.61.0)\n",
- "Requirement already satisfied: psutil in /home/amit.samal/.local/lib/python3.10/site-packages (from memory-profiler==0.61.0) (5.9.8)\n",
- "\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import sys\n",
"imported_packages = {pkg.split('.')[0] for pkg in sys.modules.keys()}\n",
@@ -199,29 +103,11 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {
"id": "loCfvnwt9ei1"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "--2025-02-27 18:52:02-- https://datasets.cellxgene.cziscience.com/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad\n",
- "Resolving datasets.cellxgene.cziscience.com (datasets.cellxgene.cziscience.com)... 18.239.111.15, 18.239.111.109, 18.239.111.30, ...\n",
- "Connecting to datasets.cellxgene.cziscience.com (datasets.cellxgene.cziscience.com)|18.239.111.15|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: 980103606 (935M) [binary/octet-stream]\n",
- "Saving to: ‘data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad’\n",
- "\n",
- "21ef2ea2-cbed-4b6c- 100%[===================>] 934.70M 3.21MB/s in 4m 48s \n",
- "\n",
- "2025-02-27 18:56:51 (3.25 MB/s) - ‘data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad’ saved [980103606/980103606]\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# This shell will take approximately 00:00:53 (hh:mm:ss) to run.\n",
"!wget -P data https://datasets.cellxgene.cziscience.com/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad"
@@ -238,7 +124,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {
"id": "23C87j3PR9ox"
},
@@ -254,277 +140,33 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {
"id": "eDH3GxXr-er6"
},
"outputs": [],
"source": [
- "adata = ad.read_h5ad(\"data/21ef2ea2-cbed-4b6c-a572-0ddd1d9020bc.h5ad\",backed='r')"
+ "adata = ad.read_h5ad(\"/home/jupyter/haritable_git/scaLR/data/af8c4fce-4c63-4671-b339-91a383cf36f6.h5ad\")"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"id": "SS4oTWW6Xn8c"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "The anndata has '125117' cells and '30695' genes\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"print(f\"\\nThe anndata has '{adata.n_obs}' cells and '{adata.n_vars}' genes\")"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {
"id": "z1u-kctbSStJ"
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " dsm_severity_score_group | \n",
- " disease_ontology_term_id | \n",
- " severity | \n",
- " tissue_ontology_term_id | \n",
- " timepoint | \n",
- " outcome | \n",
- " dsm_severity_score | \n",
- " days_since_hospitalized | \n",
- " age | \n",
- " donor_id | \n",
- " ... | \n",
- " tissue_type | \n",
- " cell_type | \n",
- " assay | \n",
- " disease | \n",
- " organism | \n",
- " sex | \n",
- " tissue | \n",
- " self_reported_ethnicity | \n",
- " development_stage | \n",
- " observation_joinid | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | AAACCTGAGAAACCTA-1_1 | \n",
- " DSM_low | \n",
- " MONDO:0100096 | \n",
- " Moderate | \n",
- " UBERON:0000178 | \n",
- " T0 | \n",
- " alive | \n",
- " -1.950858 | \n",
- " 1.0 | \n",
- " 55.0 | \n",
- " HGR0000083 | \n",
- " ... | \n",
- " tissue | \n",
- " non-classical monocyte | \n",
- " 10x 5' v1 | \n",
- " COVID-19 | \n",
- " Homo sapiens | \n",
- " male | \n",
- " blood | \n",
- " European | \n",
- " 55-year-old stage | \n",
- " !9L}G4hgnw | \n",
- "
\n",
- " \n",
- " | AAACCTGAGGGTTTCT-1_1 | \n",
- " DSM_high | \n",
- " MONDO:0100096 | \n",
- " Critical | \n",
- " UBERON:0000178 | \n",
- " T0 | \n",
- " alive | \n",
- " -0.092375 | \n",
- " 13.0 | \n",
- " 40.0 | \n",
- " HGR0000078 | \n",
- " ... | \n",
- " tissue | \n",
- " classical monocyte | \n",
- " 10x 5' v1 | \n",
- " COVID-19 | \n",
- " Homo sapiens | \n",
- " female | \n",
- " blood | \n",
- " European | \n",
- " 40-year-old stage | \n",
- " YRcUzlVyg0 | \n",
- "
\n",
- " \n",
- " | AAACCTGCACCTGGTG-1_1 | \n",
- " DSM_high | \n",
- " MONDO:0100096 | \n",
- " Critical | \n",
- " UBERON:0000178 | \n",
- " T0 | \n",
- " alive | \n",
- " 2.954350 | \n",
- " 1.0 | \n",
- " 60.0 | \n",
- " HGR0000098 | \n",
- " ... | \n",
- " tissue | \n",
- " CD16-positive, CD56-dim natural killer cell, h... | \n",
- " 10x 5' v1 | \n",
- " COVID-19 | \n",
- " Homo sapiens | \n",
- " male | \n",
- " blood | \n",
- " European | \n",
- " 60-year-old stage | \n",
- " )*azge@M0l | \n",
- "
\n",
- " \n",
- " | AAACCTGGTCCGAGTC-1_1 | \n",
- " DSM_high | \n",
- " MONDO:0100096 | \n",
- " Critical | \n",
- " UBERON:0000178 | \n",
- " T0 | \n",
- " deceased | \n",
- " 3.276233 | \n",
- " 6.0 | \n",
- " 76.0 | \n",
- " HGR0000141 | \n",
- " ... | \n",
- " tissue | \n",
- " classical monocyte | \n",
- " 10x 5' v1 | \n",
- " COVID-19 | \n",
- " Homo sapiens | \n",
- " male | \n",
- " blood | \n",
- " European | \n",
- " 76-year-old stage | \n",
- " E<FU`+QN&T | \n",
- "
\n",
- " \n",
- " | AAACCTGGTGCCTTGG-1_1 | \n",
- " DSM_low | \n",
- " MONDO:0100096 | \n",
- " Critical | \n",
- " UBERON:0000178 | \n",
- " T0 | \n",
- " alive | \n",
- " -0.348888 | \n",
- " 1.0 | \n",
- " 70.0 | \n",
- " HGR0000093 | \n",
- " ... | \n",
- " tissue | \n",
- " classical monocyte | \n",
- " 10x 5' v1 | \n",
- " COVID-19 | \n",
- " Homo sapiens | \n",
- " male | \n",
- " blood | \n",
- " European | \n",
- " 70-year-old stage | \n",
- " 2MZ#6SX}{g | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 32 columns
\n",
- "
"
- ],
- "text/plain": [
- " dsm_severity_score_group disease_ontology_term_id \\\n",
- "AAACCTGAGAAACCTA-1_1 DSM_low MONDO:0100096 \n",
- "AAACCTGAGGGTTTCT-1_1 DSM_high MONDO:0100096 \n",
- "AAACCTGCACCTGGTG-1_1 DSM_high MONDO:0100096 \n",
- "AAACCTGGTCCGAGTC-1_1 DSM_high MONDO:0100096 \n",
- "AAACCTGGTGCCTTGG-1_1 DSM_low MONDO:0100096 \n",
- "\n",
- " severity tissue_ontology_term_id timepoint outcome \\\n",
- "AAACCTGAGAAACCTA-1_1 Moderate UBERON:0000178 T0 alive \n",
- "AAACCTGAGGGTTTCT-1_1 Critical UBERON:0000178 T0 alive \n",
- "AAACCTGCACCTGGTG-1_1 Critical UBERON:0000178 T0 alive \n",
- "AAACCTGGTCCGAGTC-1_1 Critical UBERON:0000178 T0 deceased \n",
- "AAACCTGGTGCCTTGG-1_1 Critical UBERON:0000178 T0 alive \n",
- "\n",
- " dsm_severity_score days_since_hospitalized age \\\n",
- "AAACCTGAGAAACCTA-1_1 -1.950858 1.0 55.0 \n",
- "AAACCTGAGGGTTTCT-1_1 -0.092375 13.0 40.0 \n",
- "AAACCTGCACCTGGTG-1_1 2.954350 1.0 60.0 \n",
- "AAACCTGGTCCGAGTC-1_1 3.276233 6.0 76.0 \n",
- "AAACCTGGTGCCTTGG-1_1 -0.348888 1.0 70.0 \n",
- "\n",
- " donor_id ... tissue_type \\\n",
- "AAACCTGAGAAACCTA-1_1 HGR0000083 ... tissue \n",
- "AAACCTGAGGGTTTCT-1_1 HGR0000078 ... tissue \n",
- "AAACCTGCACCTGGTG-1_1 HGR0000098 ... tissue \n",
- "AAACCTGGTCCGAGTC-1_1 HGR0000141 ... tissue \n",
- "AAACCTGGTGCCTTGG-1_1 HGR0000093 ... tissue \n",
- "\n",
- " cell_type \\\n",
- "AAACCTGAGAAACCTA-1_1 non-classical monocyte \n",
- "AAACCTGAGGGTTTCT-1_1 classical monocyte \n",
- "AAACCTGCACCTGGTG-1_1 CD16-positive, CD56-dim natural killer cell, h... \n",
- "AAACCTGGTCCGAGTC-1_1 classical monocyte \n",
- "AAACCTGGTGCCTTGG-1_1 classical monocyte \n",
- "\n",
- " assay disease organism sex tissue \\\n",
- "AAACCTGAGAAACCTA-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n",
- "AAACCTGAGGGTTTCT-1_1 10x 5' v1 COVID-19 Homo sapiens female blood \n",
- "AAACCTGCACCTGGTG-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n",
- "AAACCTGGTCCGAGTC-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n",
- "AAACCTGGTGCCTTGG-1_1 10x 5' v1 COVID-19 Homo sapiens male blood \n",
- "\n",
- " self_reported_ethnicity development_stage \\\n",
- "AAACCTGAGAAACCTA-1_1 European 55-year-old stage \n",
- "AAACCTGAGGGTTTCT-1_1 European 40-year-old stage \n",
- "AAACCTGCACCTGGTG-1_1 European 60-year-old stage \n",
- "AAACCTGGTCCGAGTC-1_1 European 76-year-old stage \n",
- "AAACCTGGTGCCTTGG-1_1 European 70-year-old stage \n",
- "\n",
- " observation_joinid \n",
- "AAACCTGAGAAACCTA-1_1 !9L}G4hgnw \n",
- "AAACCTGAGGGTTTCT-1_1 YRcUzlVyg0 \n",
- "AAACCTGCACCTGGTG-1_1 )*azge@M0l \n",
- "AAACCTGGTCCGAGTC-1_1 E 10 or min_val < 0:\n",
@@ -721,149 +265,11 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {
"id": "bd2fTv0gdluU"
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " mvp.mean | \n",
- " mvp.dispersion | \n",
- " mvp.dispersion.scaled | \n",
- " mvp.variable | \n",
- " feature_is_filtered | \n",
- " feature_name | \n",
- " feature_reference | \n",
- " feature_biotype | \n",
- " feature_length | \n",
- " feature_type | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | ENSG00000168454 | \n",
- " 0.000380 | \n",
- " 1.168876 | \n",
- " 0.181734 | \n",
- " False | \n",
- " False | \n",
- " TXNDC2 | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1703 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | ENSG00000197852 | \n",
- " 0.035995 | \n",
- " 1.634179 | \n",
- " 0.886458 | \n",
- " False | \n",
- " False | \n",
- " INKA2 | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1217 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | ENSG00000196878 | \n",
- " 0.008862 | \n",
- " 1.617729 | \n",
- " 0.861545 | \n",
- " False | \n",
- " False | \n",
- " LAMB3 | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 3931 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | ENSG00000256540 | \n",
- " 0.000022 | \n",
- " 1.660993 | \n",
- " 0.927070 | \n",
- " False | \n",
- " False | \n",
- " IQSEC3-AS1 | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1065 | \n",
- " lncRNA | \n",
- "
\n",
- " \n",
- " | ENSG00000139180 | \n",
- " 0.090100 | \n",
- " 1.184720 | \n",
- " 0.205731 | \n",
- " False | \n",
- " False | \n",
- " NDUFA9 | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 782 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " mvp.mean mvp.dispersion mvp.dispersion.scaled \\\n",
- "ENSG00000168454 0.000380 1.168876 0.181734 \n",
- "ENSG00000197852 0.035995 1.634179 0.886458 \n",
- "ENSG00000196878 0.008862 1.617729 0.861545 \n",
- "ENSG00000256540 0.000022 1.660993 0.927070 \n",
- "ENSG00000139180 0.090100 1.184720 0.205731 \n",
- "\n",
- " mvp.variable feature_is_filtered feature_name \\\n",
- "ENSG00000168454 False False TXNDC2 \n",
- "ENSG00000197852 False False INKA2 \n",
- "ENSG00000196878 False False LAMB3 \n",
- "ENSG00000256540 False False IQSEC3-AS1 \n",
- "ENSG00000139180 False False NDUFA9 \n",
- "\n",
- " feature_reference feature_biotype feature_length \\\n",
- "ENSG00000168454 NCBITaxon:9606 gene 1703 \n",
- "ENSG00000197852 NCBITaxon:9606 gene 1217 \n",
- "ENSG00000196878 NCBITaxon:9606 gene 3931 \n",
- "ENSG00000256540 NCBITaxon:9606 gene 1065 \n",
- "ENSG00000139180 NCBITaxon:9606 gene 782 \n",
- "\n",
- " feature_type \n",
- "ENSG00000168454 protein_coding \n",
- "ENSG00000197852 protein_coding \n",
- "ENSG00000196878 protein_coding \n",
- "ENSG00000256540 lncRNA \n",
- "ENSG00000139180 protein_coding "
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"#Gene metadata\n",
"adata.var.head()"
@@ -890,162 +296,23 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {
"id": "qoSHdJtwgPaA"
},
"outputs": [],
"source": [
- "adata.var.set_index('feature_name',inplace=True)"
+ "adata.var.set_index('feature_name',inplace=True)\n",
+ "\n"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {
"id": "p3LvDmZmhJ_c"
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " mvp.mean | \n",
- " mvp.dispersion | \n",
- " mvp.dispersion.scaled | \n",
- " mvp.variable | \n",
- " feature_is_filtered | \n",
- " feature_reference | \n",
- " feature_biotype | \n",
- " feature_length | \n",
- " feature_type | \n",
- "
\n",
- " \n",
- " | feature_name | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | TXNDC2 | \n",
- " 0.000380 | \n",
- " 1.168876 | \n",
- " 0.181734 | \n",
- " False | \n",
- " False | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1703 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | INKA2 | \n",
- " 0.035995 | \n",
- " 1.634179 | \n",
- " 0.886458 | \n",
- " False | \n",
- " False | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1217 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | LAMB3 | \n",
- " 0.008862 | \n",
- " 1.617729 | \n",
- " 0.861545 | \n",
- " False | \n",
- " False | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 3931 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- " | IQSEC3-AS1 | \n",
- " 0.000022 | \n",
- " 1.660993 | \n",
- " 0.927070 | \n",
- " False | \n",
- " False | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 1065 | \n",
- " lncRNA | \n",
- "
\n",
- " \n",
- " | NDUFA9 | \n",
- " 0.090100 | \n",
- " 1.184720 | \n",
- " 0.205731 | \n",
- " False | \n",
- " False | \n",
- " NCBITaxon:9606 | \n",
- " gene | \n",
- " 782 | \n",
- " protein_coding | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " mvp.mean mvp.dispersion mvp.dispersion.scaled mvp.variable \\\n",
- "feature_name \n",
- "TXNDC2 0.000380 1.168876 0.181734 False \n",
- "INKA2 0.035995 1.634179 0.886458 False \n",
- "LAMB3 0.008862 1.617729 0.861545 False \n",
- "IQSEC3-AS1 0.000022 1.660993 0.927070 False \n",
- "NDUFA9 0.090100 1.184720 0.205731 False \n",
- "\n",
- " feature_is_filtered feature_reference feature_biotype \\\n",
- "feature_name \n",
- "TXNDC2 False NCBITaxon:9606 gene \n",
- "INKA2 False NCBITaxon:9606 gene \n",
- "LAMB3 False NCBITaxon:9606 gene \n",
- "IQSEC3-AS1 False NCBITaxon:9606 gene \n",
- "NDUFA9 False NCBITaxon:9606 gene \n",
- "\n",
- " feature_length feature_type \n",
- "feature_name \n",
- "TXNDC2 1703 protein_coding \n",
- "INKA2 1217 protein_coding \n",
- "LAMB3 3931 protein_coding \n",
- "IQSEC3-AS1 1065 lncRNA \n",
- "NDUFA9 782 protein_coding "
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Now the index values are the gene symbols.\n",
"adata.var.head()"
@@ -1053,7 +320,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"metadata": {
"id": "6yCi6UQ-kh0Q"
},
@@ -1063,7 +330,7 @@
"# This shell will take approximately 00:00:47 (hh:mm:ss) to run.\n",
"adata.obs.index = adata.obs.index.astype(str)\n",
"adata.var.index = adata.var.index.astype(str)\n",
- "AnnData(X=adata.X,obs=adata.obs,var=adata.var).write('data/modified_adata.h5ad',compression='gzip')"
+ "AnnData(X=adata.X,obs=adata.obs,var=adata.var).write('/home/jupyter/haritable_git/scaLR/data/modified_adataT.h5ad',compression='gzip')"
]
},
{
@@ -1220,19 +487,11 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"metadata": {
"id": "uLgN7MDv7hV-"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "/bin/bash: line 1: python: command not found\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Possible flags using 'scaLR/pipeline.py'\n",
"!python scaLR/pipeline.py --help"
@@ -1249,66 +508,11 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"metadata": {
"id": "xqvT9AiQFVGq"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2025-02-27 19:02:51,535 - ROOT - INFO : Experiment directory: `scalr_experiments/exp_name_0`\n",
- "2025-02-27 19:02:51,544 - ROOT - INFO : Data Ingestion pipeline running\n",
- "2025-02-27 19:02:51,544 - DataIngestion - INFO : Generating Train, Validation and Test sets\n",
- "2025-02-27 19:03:35,769 - DataIngestion - INFO : Generate label mappings for all columns in metadata\n",
- "2025-02-27 19:03:36,946 - ROOT - INFO : Feature Extraction pipeline running\n",
- "2025-02-27 19:03:36,946 - File Utils - INFO : Data Loaded from Final datapaths\n",
- "2025-02-27 19:03:37,467 - FeatureExtraction - INFO : Feature subset models training\n",
- "2025-02-27 19:05:09,181 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:09,253 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:09,295 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:09,393 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:09,750 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:09,751 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:09,770 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:09,881 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:16,105 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:16,106 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:16,153 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:16,154 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:16,168 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:16,174 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:20,327 - FeatureExtraction - INFO : Feature scoring\n",
- "2025-02-27 19:05:20,712 - FeatureExtraction - INFO : Top features extraction\n",
- "2025-02-27 19:05:20,719 - FeatureExtraction - INFO : Writing feature-subset data onto disk\n",
- "2025-02-27 19:05:51,902 - ROOT - INFO : Final Model Training pipeline running\n",
- "2025-02-27 19:05:51,905 - File Utils - INFO : Data Loaded from Feature subset datapaths\n",
- "2025-02-27 19:05:52,382 - ModelTraining - INFO : Building model training artifacts\n",
- "2025-02-27 19:05:52,841 - ModelTraining - INFO : Training the model\n",
- "2025-02-27 19:05:59,278 - ROOT - INFO : Analysis pipeline running\n",
- "2025-02-27 19:05:59,281 - File Utils - INFO : Data Loaded from Feature subset datapaths\n",
- "2025-02-27 19:05:59,676 - File Utils - INFO : Data Loaded from Feature subset datapaths\n",
- "2025-02-27 19:05:59,805 - File Utils - INFO : Data Loaded from Feature subset datapaths\n",
- "2025-02-27 19:06:00,379 - Eval&Analysis - INFO : Calculating accuracy and generating classification report on test set\n",
- "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
- "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
- "/home/amit.samal/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
- " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
- "2025-02-27 19:06:03,433 - Eval&Analysis - INFO : Performing gene analysis\n",
- "2025-02-27 19:06:03,433 - FeatureExtraction - INFO : Feature scoring\n",
- "2025-02-27 19:06:03,471 - FeatureExtraction - INFO : Top features extraction\n",
- "2025-02-27 19:06:03,540 - Eval&Analysis - INFO : Performing Downstream Analysis on test samples\n",
- "2025-02-27 19:06:03,540 - Eval&Analysis - INFO : Performing GeneRecallCurve\n",
- "2025-02-27 19:06:04,781 - Eval&Analysis - INFO : Performing Heatmap\n",
- "2025-02-27 19:06:09,548 - Eval&Analysis - INFO : Performing RocAucCurve\n",
- "2025-02-27 19:06:09,929 - ROOT - INFO : Total time taken: 198.401921749115 s\n",
- "2025-02-27 19:06:09,929 - ROOT - INFO : Maximum memory usage: 1915.5625 MB\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Command to run end to end pipeline.\n",
"# This shell will take approximately 00:21:15 (hh:mm:ss) on GPU to run.()\n",
@@ -1744,7 +948,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "scaLR_env",
"language": "python",
"name": "python3"
},
@@ -1758,7 +962,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.12"
+ "version": "3.10.20"
}
},
"nbformat": 4,