Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/control_methods/random_labels/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Base component API configuration
__merge__: ../../api/comp_control_method.yaml

# Component configuration
name: "random_labels"
label: Random Labels
summary: "Negative control by randomly generating labels."
description: "This method serves as a negative control, where random labels are generated for the data."
info:
preferred_normalization: counts
variants:
random_features:

# Script configuration
resources:
- type: python_script
path: script.py

# Platform configuration
engines:
- type: docker
image: openproblems/base_python:1.0.0
runners:
- type: executable
- type: nextflow
directives:
label: [lowtime, lowmem, lowcpu]
39 changes: 39 additions & 0 deletions src/control_methods/random_labels/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import anndata as ad
import random
import pandas as pd

## VIASH START
par = {
"input": "resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad",
"output": "resources_test/task_spatial_segmentation/mouse_brain_combined/reference_prediction.h5ad",
"seed": 123,
"label": "cell_type"
}
meta = {
"name": "random_labels",
}
## VIASH END

if par["seed"]:
print(f">> Setting seed to {par['seed']}")
random.seed(par["seed"])

print("Load input data", flush=True)
input = ad.read_h5ad(par["input"])

print("Create random labels", flush=True)
input.obs[par["label"]] = [random.randint(1, 10) for _ in range(input.n_obs)]

print("Create output AnnData", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(input.obs[par["label"]]),
uns={
"dataset_id": input.uns["dataset_id"],
"normalization_id": input.uns["normalization_id"],
"method_id": meta["name"],
},
)

print("Write output to file", flush=True)
output.write_h5ad(par["output"], compression="gzip")
52 changes: 52 additions & 0 deletions src/data_processors/leiden/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
__merge__: ../../api/comp_data_processor.yaml

name: process_dataset

arguments:
- name: "--label"
type: "string"
default: "cell_type"
description: Label added to anndata for prediction.
- name: "--n_neighbors"
type: "integer"
default: 20
description: Number of neighbors to use for nearest neighbors distance matrix.
- name: "--min_dist"
type: "double"
default: 0.1
description: Effective minimum distance to use for UMAP.
- name: "--spread"
type: "double"
default: 1.2
description: The effective scale of embedded points to use for UMAP.
- name: "--resolution"
type: "double"
default: 1.0
description: The resolution to use for leiden clustering.
- name: "--seed"
type: "integer"
default: 123
description: Seed.

resources:
- type: python_script
path: script.py

engines:
- type: docker
#image: openproblems/base_pytorch_nvidia:1 # TODO: ideally get gpu image to work
image: openproblems/base_python:1
setup:
- type: python
packages: scikit-learn
- type: python
packages: leidenalg
__merge__:
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [highmem, midcpu, midtime]
43 changes: 43 additions & 0 deletions src/data_processors/leiden/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

import random
import anndata as ad
import scanpy as sc
import pandas as pd

## VIASH START
par = {
'input': 'resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad',
'output': 'resources_test/task_spatial_segmentation/mouse_brain_combined/method_prediction.h5ad',
'label': 'cell_type',
'n_neighbors': 20,
'min_dist': 0.1,
'spread': 1.2,
'resolution': 1.0,
'seed': 123
}
## VIASH END

# set seed if need be
if par["seed"]:
print(f">> Setting seed to {par['seed']}")
random.seed(par["seed"])

print('>> Reading input files', flush=True)
input = ad.read_h5ad(par['input'])

print('>> Perform Leiden clustering', flush=True)
sc.pp.neighbors(input, n_neighbors=par['n_neighbors'], random_state=par['seed'])
sc.tl.umap(input, min_dist=par['min_dist'], spread=par['spread'], random_state=par['seed'])
sc.tl.leiden(input, resolution=par['resolution'], key_added=par["label"], random_state=par['seed'])

print(">> Write output AnnData to file", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(input.obs[par["label"]]),
uns={
"dataset_id": input.uns["dataset_id"],
"normalization_id": input.uns["normalization_id"],
#"method_id": input.uns["method_id"], #TODO
},
)

output.write_h5ad(par['output'], compression='gzip')
2 changes: 1 addition & 1 deletion src/metrics/ari/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ runners:
- type: executable
- type: nextflow
directives:
label: [midtime, midmem, midcpu]
label: [midtime, midmem, midcpu]
2 changes: 1 addition & 1 deletion src/metrics/ari/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ def lookup_labels(label_element, transcripts_global):
"metric_values": [float(ari_score)],
}
)
output.write_h5ad(par["output"], compression="gzip")
output.write_h5ad(par["output"], compression="gzip")
Loading