diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh
index 59fc1c2..30694b5 100755
--- a/scripts/create_resources/test_resources.sh
+++ b/scripts/create_resources/test_resources.sh
@@ -30,7 +30,7 @@ viash run src/data_processors/process_dataset/config.vsh.yaml -- \
   --dataset_id mouse_brain_combined \
   --dataset_name "Test data mouse brain combined 2023 tenx Xenium replicate 1 2023 Yao scRNAseq" \
   --dataset_url "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717" \
-  --dataset_reference "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z" \
+  --dataset_reference "10.1038/s41586-023-06812-z" \
   --dataset_summary "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain" \
   --dataset_description "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset." \
   --dataset_organism "mus_musculus"
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
index 54d8e3d..adc34bb 100755
--- a/scripts/run_benchmark/run_test_local.sh
+++ b/scripts/run_benchmark/run_test_local.sh
@@ -6,14 +6,6 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
-# remove this when you have implemented the script
-echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
-echo "  Step 1: replace 'task_template' with the name of the task in the following command."
-echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
-echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
-echo "  Step 4: remove this message"
-exit 1
-
 set -e
 
 echo "Running benchmark on test data"
@@ -23,14 +15,17 @@ echo "  Make sure to run 'scripts/project/build_all_docker_containers.sh'!"
 RUN_ID="testrun_$(date +%Y-%m-%d_%H-%M-%S)"
 publish_dir="temp/results/${RUN_ID}"
 
-nextflow run . \
+NXF_VER=25.10.4 nextflow run . \
   -main-script target/nextflow/workflows/run_benchmark/main.nf \
   -profile docker \
   -resume \
   -c common/nextflow_helpers/labels_ci.config \
-  --id cxg_mouse_pancreas_atlas \
-  --input_train resources_test/task_spatial_segmentation/mouse_brain_combined/train.h5ad \
-  --input_test resources_test/task_spatial_segmentation/mouse_brain_combined/test.h5ad \
-  --input_solution resources_test/task_spatial_segmentation/mouse_brain_combined/solution.h5ad \
+  --id mouse_brain_combined \
+  --input_spatial_unlabelled resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_unlabelled.zarr \
+  --input_spatial_solution resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_solution.zarr \
+  --input_scrnaseq_reference resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad \
   --output_state state.yaml \
-  --publish_dir "$publish_dir"
+  --publish_dir "$publish_dir" \
+  -with-trace "$publish_dir/trace.txt"
+
+common/scripts/render_results_report local "$publish_dir" --output "$publish_dir/report/"
\ No newline at end of file
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py
index ad85433..e1174a2 100644
--- a/src/data_processors/process_dataset/script.py
+++ b/src/data_processors/process_dataset/script.py
@@ -2,6 +2,8 @@
 import pandas as pd
 import spatialdata as sd
 import scanpy as sc
+import os
+import shutil
 
 ## VIASH START
 par = {
@@ -63,7 +65,7 @@ def sc_processing(adata):
 
 print(">> Override dataset metadata in .uns", flush=True)
 sc_data.uns["orig_dataset_id"] = sc_data.uns.get("dataset_id", None)
-for key in ["dataset_id", "dataset_name", "dataset_url", "dataset_summary", "dataset_description", "dataset_reference", "dataset_organism"]:
+for key in ["dataset_id", "dataset_name", "dataset_url", "dataset_summary", "dataset_description", "dataset_organism", "dataset_reference"]:
     sc_data.uns[key] = par[key]
 
 print(">> Writing scrnaseq reference", flush=True)
@@ -114,6 +116,12 @@ def sc_processing(adata):
 )
 
 print(">> Writing spatial unlabelled dataset", flush=True)
+# remove if output exists
+if os.path.exists(par["output_spatial_unlabelled"]):
+    if os.path.isdir(par["output_spatial_unlabelled"]):
+        shutil.rmtree(par["output_spatial_unlabelled"])
+    else:
+        os.remove(par["output_spatial_unlabelled"])
 output_spatial.write(par["output_spatial_unlabelled"], overwrite=True)
 
 # ---------------------------------------------------------------
@@ -132,6 +140,12 @@ def sc_processing(adata):
     var=var_df,
     uns={
         "dataset_id": par["dataset_id"],
+        "dataset_name": par["dataset_name"],
+        "dataset_url": par["dataset_url"],
+        "dataset_summary": par["dataset_summary"],
+        "dataset_description": par["dataset_description"],
+        "dataset_reference": par["dataset_reference"],
+        "dataset_organism": par["dataset_organism"],
         "orig_dataset_id": sp_data.tables["table"].uns.get("dataset_id", None),
         "spatialdata_attrs": ref_table.uns["spatialdata_attrs"],
     },
@@ -151,4 +165,9 @@ def sc_processing(adata):
 )
 
 print(">> Writing spatial solution", flush=True)
+if os.path.exists(par["output_spatial_solution"]):
+    if os.path.isdir(par["output_spatial_solution"]):
+        shutil.rmtree(par["output_spatial_solution"])
+    else:
+        os.remove(par["output_spatial_solution"])
 output_solution.write(par["output_spatial_solution"], overwrite=True)
diff --git a/src/data_processors/process_prediction/script.py b/src/data_processors/process_prediction/script.py
index 17c3728..5d1ae2a 100644
--- a/src/data_processors/process_prediction/script.py
+++ b/src/data_processors/process_prediction/script.py
@@ -83,22 +83,28 @@
 table.layers["normalized_log_scaled"] = table.X.copy()
 
 print(">> Computing highly variable genes", flush=True)
-# Reset X to counts for HVG computation
-table.X = table.layers["counts"].copy()
-try:
-    sc.pp.highly_variable_genes(table, flavor="seurat_v3", layer="counts", n_top_genes=min(3000, table.n_vars))
-except ValueError:
-    # seurat_v3 loess fitting can fail on small datasets; fall back to seurat flavor
-    sc.pp.normalize_total(table, target_sum=1e4)
-    sc.pp.log1p(table)
-    sc.pp.highly_variable_genes(table, flavor="seurat", n_top_genes=min(3000, table.n_vars))
-table.var.rename(columns={"highly_variable": "hvg"}, inplace=True)
+if table.n_vars == 0 or table.n_obs == 0:
+    # No cells detected (e.g. empty_labels negative control); mark all vars as non-HVG
+    table.var["hvg"] = False
+else:
+    # Reset X to counts for HVG computation
+    table.X = table.layers["counts"].copy()
+    try:
+        sc.pp.highly_variable_genes(table, flavor="seurat_v3", layer="counts", n_top_genes=min(3000, table.n_vars))
+    except ValueError:
+        # seurat_v3 loess fitting can fail on small datasets; fall back to seurat flavor
+        sc.pp.normalize_total(table, target_sum=1e4)
+        sc.pp.log1p(table)
+        sc.pp.highly_variable_genes(table, flavor="seurat", n_top_genes=min(3000, table.n_vars))
+    table.var.rename(columns={"highly_variable": "hvg"}, inplace=True)
 
 table.uns["dataset_id"] = dataset_id
 table.uns["method_id"] = method_id
 table.uns["spatialdata_attrs"] = {
     "instance_key": "cell_id",
-    "region": ["segmentation"],
+    # Derive regions from actual obs to handle the empty-table case (e.g. empty_labels
+    # negative control) where no cells were detected and obs has 0 rows.
+    "region": list(table.obs["region"].unique()),
     "region_key": "region",
 }
 
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index d39386c..a61478f 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -35,7 +35,7 @@ workflow run_wf {
 
     // extract the dataset metadata
     | extract_uns_metadata.run(
-      fromState: [input: "input_spatial_unlabelled"],
+      fromState: [input: "input_scrnaseq_reference"],
       toState: { id, output, state ->
         state + [
           dataset_uns: readYaml(output.output).uns
@@ -84,7 +84,10 @@ workflow run_wf {
     )
 
     | process_prediction.run(
-      fromState: [input: "method_output"],
+      fromState: [
+        input_prediction: "method_output",
+        input_spatial_unlabelled: "input_spatial_unlabelled"
+      ],
       toState: { id, output, state ->
         state + [
           input_prediction: output.output
@@ -100,7 +103,7 @@ workflow run_wf {
       },
       // use 'fromState' to fetch the arguments the component requires from the overall state
       fromState: [
-        input_solution: "input_solution",
+        input_solution: "input_spatial_solution",
         input_prediction: "input_prediction"
       ],
       // use 'toState' to publish that component's outputs to the overall state