Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/create_resources/test_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ viash run src/data_processors/process_dataset/config.vsh.yaml -- \
--dataset_id mouse_brain_combined \
--dataset_name "Test data mouse brain combined 2023 tenx Xenium replicate 1 2023 Yao scRNAseq" \
--dataset_url "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717" \
--dataset_reference "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z" \
--dataset_reference "10.1038/s41586-023-06812-z" \
--dataset_summary "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain" \
--dataset_description "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset." \
--dataset_organism "mus_musculus"
Expand Down
23 changes: 9 additions & 14 deletions scripts/run_benchmark/run_test_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
exit 1

set -e

echo "Running benchmark on test data"
Expand All @@ -23,14 +15,17 @@ echo " Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
RUN_ID="testrun_$(date +%Y-%m-%d_%H-%M-%S)"
publish_dir="temp/results/${RUN_ID}"

nextflow run . \
NXF_VER=25.10.4 nextflow run . \
-main-script target/nextflow/workflows/run_benchmark/main.nf \
-profile docker \
-resume \
-c common/nextflow_helpers/labels_ci.config \
--id cxg_mouse_pancreas_atlas \
--input_train resources_test/task_spatial_segmentation/mouse_brain_combined/train.h5ad \
--input_test resources_test/task_spatial_segmentation/mouse_brain_combined/test.h5ad \
--input_solution resources_test/task_spatial_segmentation/mouse_brain_combined/solution.h5ad \
--id mouse_brain_combined \
--input_spatial_unlabelled resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_unlabelled.zarr \
--input_spatial_solution resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_solution.zarr \
--input_scrnaseq_reference resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad \
--output_state state.yaml \
--publish_dir "$publish_dir"
--publish_dir "$publish_dir" \
-with-trace "$publish_dir/trace.txt"

common/scripts/render_results_report local "$publish_dir" --output "$publish_dir/report/"
21 changes: 20 additions & 1 deletion src/data_processors/process_dataset/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
import spatialdata as sd
import scanpy as sc
import os
import shutil

## VIASH START
par = {
Expand Down Expand Up @@ -63,7 +65,7 @@ def sc_processing(adata):

print(">> Override dataset metadata in .uns", flush=True)
sc_data.uns["orig_dataset_id"] = sc_data.uns.get("dataset_id", None)
for key in ["dataset_id", "dataset_name", "dataset_url", "dataset_summary", "dataset_description", "dataset_reference", "dataset_organism"]:
for key in ["dataset_id", "dataset_name", "dataset_url", "dataset_summary", "dataset_description", "dataset_organism", "dataset_reference"]:
sc_data.uns[key] = par[key]

print(">> Writing scrnaseq reference", flush=True)
Expand Down Expand Up @@ -114,6 +116,12 @@ def sc_processing(adata):
)

print(">> Writing spatial unlabelled dataset", flush=True)
# remove if output exists
if os.path.exists(par["output_spatial_unlabelled"]):
if os.path.isdir(par["output_spatial_unlabelled"]):
shutil.rmtree(par["output_spatial_unlabelled"])
else:
os.remove(par["output_spatial_unlabelled"])
output_spatial.write(par["output_spatial_unlabelled"], overwrite=True)

# ---------------------------------------------------------------
Expand All @@ -132,6 +140,12 @@ def sc_processing(adata):
var=var_df,
uns={
"dataset_id": par["dataset_id"],
"dataset_name": par["dataset_name"],
"dataset_url": par["dataset_url"],
"dataset_summary": par["dataset_summary"],
"dataset_description": par["dataset_description"],
"dataset_reference": par["dataset_reference"],
"dataset_organism": par["dataset_organism"],
"orig_dataset_id": sp_data.tables["table"].uns.get("dataset_id", None),
"spatialdata_attrs": ref_table.uns["spatialdata_attrs"],
},
Expand All @@ -151,4 +165,9 @@ def sc_processing(adata):
)

print(">> Writing spatial solution", flush=True)
if os.path.exists(par["output_spatial_solution"]):
if os.path.isdir(par["output_spatial_solution"]):
shutil.rmtree(par["output_spatial_solution"])
else:
os.remove(par["output_spatial_solution"])
output_solution.write(par["output_spatial_solution"], overwrite=True)
28 changes: 17 additions & 11 deletions src/data_processors/process_prediction/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,28 @@
table.layers["normalized_log_scaled"] = table.X.copy()

print(">> Computing highly variable genes", flush=True)
# Reset X to counts for HVG computation
table.X = table.layers["counts"].copy()
try:
sc.pp.highly_variable_genes(table, flavor="seurat_v3", layer="counts", n_top_genes=min(3000, table.n_vars))
except ValueError:
# seurat_v3 loess fitting can fail on small datasets; fall back to seurat flavor
sc.pp.normalize_total(table, target_sum=1e4)
sc.pp.log1p(table)
sc.pp.highly_variable_genes(table, flavor="seurat", n_top_genes=min(3000, table.n_vars))
table.var.rename(columns={"highly_variable": "hvg"}, inplace=True)
if table.n_vars == 0 or table.n_obs == 0:
# No cells detected (e.g. empty_labels negative control); mark all vars as non-HVG
table.var["hvg"] = False
else:
# Reset X to counts for HVG computation
table.X = table.layers["counts"].copy()
try:
sc.pp.highly_variable_genes(table, flavor="seurat_v3", layer="counts", n_top_genes=min(3000, table.n_vars))
except ValueError:
# seurat_v3 loess fitting can fail on small datasets; fall back to seurat flavor
sc.pp.normalize_total(table, target_sum=1e4)
sc.pp.log1p(table)
sc.pp.highly_variable_genes(table, flavor="seurat", n_top_genes=min(3000, table.n_vars))
table.var.rename(columns={"highly_variable": "hvg"}, inplace=True)

table.uns["dataset_id"] = dataset_id
table.uns["method_id"] = method_id
table.uns["spatialdata_attrs"] = {
"instance_key": "cell_id",
"region": ["segmentation"],
# Derive regions from actual obs to handle the empty-table case (e.g. empty_labels
# negative control) where no cells were detected and obs has 0 rows.
"region": list(table.obs["region"].unique()),
"region_key": "region",
}

Expand Down
9 changes: 6 additions & 3 deletions src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ workflow run_wf {

// extract the dataset metadata
| extract_uns_metadata.run(
fromState: [input: "input_spatial_unlabelled"],
fromState: [input: "input_scrnaseq_reference"],
toState: { id, output, state ->
state + [
dataset_uns: readYaml(output.output).uns
Expand Down Expand Up @@ -84,7 +84,10 @@ workflow run_wf {
)

| process_prediction.run(
fromState: [input: "method_output"],
fromState: [
input_prediction: "method_output",
input_spatial_unlabelled: "input_spatial_unlabelled"
],
toState: { id, output, state ->
state + [
input_prediction: output.output
Expand All @@ -100,7 +103,7 @@ workflow run_wf {
},
// use 'fromState' to fetch the arguments the component requires from the overall state
fromState: [
input_solution: "input_solution",
input_solution: "input_spatial_solution",
input_prediction: "input_prediction"
],
// use 'toState' to publish that component's outputs to the overall state
Expand Down
Loading