openproblems-bio · heylf · Apr 22, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/src/control_methods/random_labels/config.vsh.yaml b/src/control_methods/random_labels/config.vsh.yaml
@@ -0,0 +1,27 @@
+# Base component API configuration
+__merge__: ../../api/comp_control_method.yaml
+
+# Component configuration
+name: "random_labels"
+label: Random Labels
+summary: "Negative control by randomly generating labels."
+description: "This method serves as a negative control, where random labels are generated for the data."
+info:
+  preferred_normalization: counts
+  variants:
+    random_features:
+
+# Script configuration
+resources:
+  - type: python_script
+    path: script.py
+
+# Platform configuration
+engines:
+  - type: docker
+    image: openproblems/base_python:1.0.0
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowtime, lowmem, lowcpu]
diff --git a/src/control_methods/random_labels/script.py b/src/control_methods/random_labels/script.py
@@ -0,0 +1,39 @@
+
+import anndata as ad
+import random
+import pandas as pd
+
+## VIASH START
+par = {
+    "input": "resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad",
+    "output": "resources_test/task_spatial_segmentation/mouse_brain_combined/reference_prediction.h5ad",
+    "seed": 123,
+    "label": "cell_type"
+}
+meta = {
+    "name": "random_labels",
+}
+## VIASH END
+
+if par["seed"]:
+    print(f">> Setting seed to {par['seed']}")
+    random.seed(par["seed"])
+
+print("Load input data", flush=True)
+input = ad.read_h5ad(par["input"])
+
+print("Create random labels", flush=True)
+input.obs[par["label"]] = [random.randint(1, 10) for _ in range(input.n_obs)]
+
+print("Create output AnnData", flush=True)
+output = ad.AnnData(
+    obs=pd.DataFrame(input.obs[par["label"]]),
+    uns={
+        "dataset_id": input.uns["dataset_id"],
+        "normalization_id": input.uns["normalization_id"],
+        "method_id": meta["name"],
+    },
+)
+
+print("Write output to file", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
diff --git a/src/data_processors/leiden/config.vsh.yaml b/src/data_processors/leiden/config.vsh.yaml
@@ -0,0 +1,52 @@
+__merge__: ../../api/comp_data_processor.yaml
+
+name: process_dataset
+
+arguments:
+  - name: "--label"
+    type: "string"
+    default: "cell_type"
+    description: Label added to anndata for prediction.
+  - name: "--n_neighbors"
+    type: "integer"
+    default: 20
+    description: Number of neighbors to use for nearest neighbors distance matrix.
+  - name: "--min_dist"
+    type: "double"
+    default: 0.1
+    description: Effective minimum distance to use for UMAP.
+  - name: "--spread"
+    type: "double"
+    default: 1.2
+    description: The effective scale of embedded points to use for UMAP.
+  - name: "--resolution"
+    type: "double"
+    default: 1.0
+    description: The resolution to use for leiden clustering.
+  - name: "--seed"
+    type: "integer"
+    default: 123
+    description: Seed.
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    #image: openproblems/base_pytorch_nvidia:1 # TODO: ideally get gpu image to work
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        packages: scikit-learn
+      - type: python
+        packages: leidenalg
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [highmem, midcpu, midtime]
diff --git a/src/data_processors/leiden/script.py b/src/data_processors/leiden/script.py
@@ -0,0 +1,43 @@
+
+import random
+import anndata as ad
+import scanpy as sc
+import pandas as pd
+
+## VIASH START
+par = {
+    'input': 'resources_test/task_spatial_segmentation/mouse_brain_combined/output_scrnaseq_reference.h5ad',
+    'output': 'resources_test/task_spatial_segmentation/mouse_brain_combined/method_prediction.h5ad',
+    'label': 'cell_type',
+    'n_neighbors': 20,
+    'min_dist': 0.1,
+    'spread': 1.2,
+    'resolution': 1.0,
+    'seed': 123
+}
+## VIASH END
+
+# set seed if need be
+if par["seed"]:
+    print(f">> Setting seed to {par['seed']}")
+    random.seed(par["seed"])
+
+print('>> Reading input files', flush=True)
+input = ad.read_h5ad(par['input'])
+
+print('>> Perform Leiden clustering', flush=True)
+sc.pp.neighbors(input, n_neighbors=par['n_neighbors'], random_state=par['seed'])
+sc.tl.umap(input, min_dist=par['min_dist'], spread=par['spread'], random_state=par['seed'])
+sc.tl.leiden(input, resolution=par['resolution'], key_added=par["label"], random_state=par['seed'])
+
+print(">> Write output AnnData to file", flush=True)
+output = ad.AnnData(
+    obs=pd.DataFrame(input.obs[par["label"]]),
+    uns={
+        "dataset_id": input.uns["dataset_id"],
+        "normalization_id": input.uns["normalization_id"],
+        #"method_id": input.uns["method_id"], #TODO
+    },
+)
+
+output.write_h5ad(par['output'], compression='gzip')
diff --git a/src/metrics/ari/config.vsh.yaml b/src/metrics/ari/config.vsh.yaml
@@ -33,4 +33,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [midtime, midmem, midcpu]
+      label: [midtime, midmem, midcpu]
diff --git a/src/metrics/ari/script.py b/src/metrics/ari/script.py
@@ -67,4 +67,4 @@ def lookup_labels(label_element, transcripts_global):
         "metric_values": [float(ari_score)],
     }
 )
-output.write_h5ad(par["output"], compression="gzip")
+output.write_h5ad(par["output"], compression="gzip")