diff --git a/src/control_methods/random_labels/config.vsh.yaml b/src/control_methods/random_labels/config.vsh.yaml new file mode 100644 index 0000000..b7386ef --- /dev/null +++ b/src/control_methods/random_labels/config.vsh.yaml @@ -0,0 +1,27 @@ +# Base component API configuration +__merge__: ../../api/comp_control_method.yaml + +# Component configuration +name: "random_labels" +label: Random Labels +summary: "Negative control by randomly generating labels." +description: "This method serves as a negative control, where random labels are generated for the data." +info: + preferred_normalization: counts + variants: + random_features: + +# Script configuration +resources: + - type: python_script + path: script.py + +# Platform configuration +engines: + - type: docker + image: openproblems/base_python:1.0.0 +runners: + - type: executable + - type: nextflow + directives: + label: [lowtime, lowmem, lowcpu] \ No newline at end of file diff --git a/src/control_methods/random_labels/script.py b/src/control_methods/random_labels/script.py new file mode 100644 index 0000000..5e091f5 --- /dev/null +++ b/src/control_methods/random_labels/script.py @@ -0,0 +1,39 @@ + +import anndata as ad +import random +import pandas as pd + +## VIASH START +par = { + "input": "resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad", + "output": "resources_test/task_spatial_segmentation/mouse_brain_combined/reference_prediction.h5ad", + "seed": 123, + "label": "cell_type" +} +meta = { + "name": "random_labels", +} +## VIASH END + +if par["seed"]: + print(f">> Setting seed to {par['seed']}") + random.seed(par["seed"]) + +print("Load input data", flush=True) +input = ad.read_h5ad(par["input"]) + +print("Create random labels", flush=True) +input.obs[par["label"]] = [random.randint(1, 10) for _ in range(input.n_obs)] + +print("Create output AnnData", flush=True) +output = ad.AnnData( + obs=pd.DataFrame(input.obs[par["label"]]), + uns={ + "dataset_id": input.uns["dataset_id"], + "normalization_id": input.uns["normalization_id"], + "method_id": meta["name"], + }, +) + +print("Write output to file", flush=True) +output.write_h5ad(par["output"], compression="gzip") diff --git a/src/data_processors/leiden/config.vsh.yaml b/src/data_processors/leiden/config.vsh.yaml new file mode 100644 index 0000000..dcced68 --- /dev/null +++ b/src/data_processors/leiden/config.vsh.yaml @@ -0,0 +1,52 @@ +__merge__: ../../api/comp_data_processor.yaml + +name: process_dataset + +arguments: + - name: "--label" + type: "string" + default: "cell_type" + description: Label added to anndata for prediction. + - name: "--n_neighbors" + type: "integer" + default: 20 + description: Number of neighbors to use for nearest neighbors distance matrix. + - name: "--min_dist" + type: "double" + default: 0.1 + description: Effective minimum distance to use for UMAP. + - name: "--spread" + type: "double" + default: 1.2 + description: The effective scale of embedded points to use for UMAP. + - name: "--resolution" + type: "double" + default: 1.0 + description: The resolution to use for leiden clustering. + - name: "--seed" + type: "integer" + default: 123 + description: Seed. + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + #image: openproblems/base_pytorch_nvidia:1 # TODO: ideally get gpu image to work + image: openproblems/base_python:1 + setup: + - type: python + packages: scikit-learn + - type: python + packages: leidenalg + __merge__: + - /src/base/setup_spatialdata_partial.yaml + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [highmem, midcpu, midtime] \ No newline at end of file diff --git a/src/data_processors/leiden/script.py b/src/data_processors/leiden/script.py new file mode 100644 index 0000000..bb0b0dc --- /dev/null +++ b/src/data_processors/leiden/script.py @@ -0,0 +1,43 @@ + +import random +import anndata as ad +import scanpy as sc +import pandas as pd + +## VIASH START +par = { + 'input': 'resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad', + 'output': 'resources_test/task_spatial_segmentation/mouse_brain_combined/method_prediction.h5ad', + 'label': 'cell_type', + 'n_neighbors': 20, + 'min_dist': 0.1, + 'spread': 1.2, + 'resolution': 1.0, + 'seed': 123 +} +## VIASH END + +# set seed if need be +if par["seed"]: + print(f">> Setting seed to {par['seed']}") + random.seed(par["seed"]) + +print('>> Reading input files', flush=True) +input = ad.read_h5ad(par['input']) + +print('>> Perform Leiden clustering', flush=True) +sc.pp.neighbors(input, n_neighbors=par['n_neighbors'], random_state=par['seed']) +sc.tl.umap(input, min_dist=par['min_dist'], spread=par['spread'], random_state=par['seed']) +sc.tl.leiden(input, resolution=par['resolution'], key_added=par["label"], random_state=par['seed']) + +print(">> Write output AnnData to file", flush=True) +output = ad.AnnData( + obs=pd.DataFrame(input.obs[par["label"]]), + uns={ + "dataset_id": input.uns["dataset_id"], + "normalization_id": input.uns["normalization_id"], + #"method_id": input.uns["method_id"], #TODO + }, +) + +output.write_h5ad(par['output'], compression='gzip') diff --git a/src/metrics/ari/config.vsh.yaml b/src/metrics/ari/config.vsh.yaml index 8c1e3df..1b3d3b0 100644 --- a/src/metrics/ari/config.vsh.yaml +++ b/src/metrics/ari/config.vsh.yaml @@ -33,4 +33,4 @@ runners: - type: executable - type: nextflow directives: - label: [midtime, midmem, midcpu] \ No newline at end of file + label: [midtime, midmem, midcpu] diff --git a/src/metrics/ari/script.py b/src/metrics/ari/script.py index 6c5a848..22cbe06 100644 --- a/src/metrics/ari/script.py +++ b/src/metrics/ari/script.py @@ -67,4 +67,4 @@ def lookup_labels(label_element, transcripts_global): "metric_values": [float(ari_score)], } ) -output.write_h5ad(par["output"], compression="gzip") \ No newline at end of file +output.write_h5ad(par["output"], compression="gzip")