Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 184 additions & 58 deletions README.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions scripts/create_resources/resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ exit 1

cat > /tmp/params.yaml << 'HERE'
input_states: s3://openproblems-data/resources/datasets/**/state.yaml
rename_keys: 'input:output_dataset'
rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
output_state: '$id/state.yaml'
settings: '{"output_spatial_dataset": "$id/output_spatial_dataset.zarr", "output_scrnaseq": "$id/output_scrnaseq.h5ad"}'
settings: '{"output_spatial_unlabelled": "$id/output_spatial_unlabelled.zarr", "output_spatial_solution": "$id/output_spatial_solution.zarr", "output_scrnaseq": "$id/output_scrnaseq.h5ad"}'
publish_dir: s3://openproblems-data/resources/task_template/datasets/
HERE

Expand Down
28 changes: 18 additions & 10 deletions scripts/create_resources/test_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ mkdir -p $DATASET_DIR
viash run src/data_processors/process_dataset/config.vsh.yaml -- \
--input_sp $RAW_DATA/2023_10x_mouse_brain_xenium_rep1/dataset.zarr \
--input_sc $RAW_DATA/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad \
--output_spatial_dataset $DATASET_DIR/spatial_dataset.zarr \
--output_spatial_unlabelled $DATASET_DIR/spatial_unlabelled.zarr \
--output_spatial_solution $DATASET_DIR/spatial_solution.zarr \
--output_scrnaseq_reference $DATASET_DIR/scrnaseq_reference.h5ad \
--dataset_id mouse_brain_combined \
--dataset_name "Test data mouse brain combined 2023 tenx Xenium replicate 1 2023 Yao scRNAseq" \
Expand All @@ -36,22 +37,29 @@ viash run src/data_processors/process_dataset/config.vsh.yaml -- \

# run one method
viash run src/methods/cellpose/config.vsh.yaml -- \
--input $DATASET_DIR/spatial_dataset.zarr \
--output $DATASET_DIR/prediction.h5ad
--input $DATASET_DIR/spatial_unlabelled.zarr \
--output $DATASET_DIR/prediction.zarr

# run prediction processor
viash run src/data_processors/process_prediction/config.vsh.yaml -- \
--input_prediction $DATASET_DIR/prediction.zarr \
--input_spatial_unlabelled $DATASET_DIR/spatial_unlabelled.zarr \
--output $DATASET_DIR/processed_prediction.zarr

# run one metric
# TODO: implement this!
# viash run src/metrics/ari/config.vsh.yaml -- \
# --input_prediction $DATASET_DIR/prediction.h5ad \
# --input_scrnaseq_reference $DATASET_DIR/scrnaseq_reference.h5ad \
# --output $DATASET_DIR/score.h5ad
viash run src/metrics/ari/config.vsh.yaml -- \
--input_prediction $DATASET_DIR/processed_prediction.zarr \
--input_solution $DATASET_DIR/spatial_solution.zarr \
--output $DATASET_DIR/score.h5ad

# write manual state.yaml. this is not actually necessary but you never know it might be useful
cat > $DATASET_DIR/state.yaml << HERE
id: $DATASET_ID
spatial_dataset: spatial_dataset.zarr
spatial_unlabelled: spatial_unlabelled.zarr
spatial_solution: spatial_solution.zarr
scrnaseq_reference: scrnaseq_reference.h5ad
prediction: prediction.h5ad
prediction: prediction.zarr
processed_prediction: processed_prediction.zarr
score: score.h5ad
HERE

Expand Down
2 changes: 1 addition & 1 deletion scripts/run_benchmark/run_full_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ publish_dir="resources/results/${RUN_ID}"
# write the parameters to file
cat > /tmp/params.yaml << HERE
input_states: resources/datasets/**/state.yaml
rename_keys: 'input_spatial_dataset:output_spatial_dataset,input_scrnaseq_reference:output_scrnaseq_reference'
rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_benchmark/run_full_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
# write the parameters to file
cat > /tmp/params.yaml << HERE
input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
rename_keys: 'input_spatial_dataset:output_spatial_dataset,input_scrnaseq_reference:output_scrnaseq_reference'
rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE
Expand Down
6 changes: 3 additions & 3 deletions src/api/comp_control_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ info:
in the task.
arguments:
- name: --input
__merge__: file_spatial_dataset.yaml
__merge__: file_spatial_unlabelled.yaml
required: true
direction: input
- name: "--input_scrnaseq_reference"
__merge__: file_scrnaseq_reference.yaml
- name: "--input_solution"
__merge__: file_spatial_solution.yaml
direction: input
required: true
- name: --output
Expand Down
9 changes: 6 additions & 3 deletions src/api/comp_data_processor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ argument_groups:
direction: input
- name: Outputs
arguments:
- name: "--output_spatial_dataset"
__merge__: file_spatial_dataset.yaml
- name: "--output_spatial_unlabelled"
__merge__: file_spatial_unlabelled.yaml
direction: output
required: true
- name: "--output_spatial_solution"
__merge__: file_spatial_solution.yaml
direction: output
required: true
- name: "--output_scrnaseq_reference"
Expand Down Expand Up @@ -80,4 +84,3 @@ test_resources:
dest: resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2
- type: python_script
path: /common/component_tests/run_and_check_output.py

2 changes: 1 addition & 1 deletion src/api/comp_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ info:
A method to predict the task effects.
arguments:
- name: --input
__merge__: file_spatial_dataset.yaml
__merge__: file_spatial_unlabelled.yaml
required: true
direction: input
- name: --output
Expand Down
6 changes: 3 additions & 3 deletions src/api/comp_metric.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ info:
A metric for evaluating method predictions.
arguments:
- name: "--input_prediction"
__merge__: file_prediction.yaml
__merge__: file_processed_prediction.yaml
direction: input
required: true
- name: "--input_scrnaseq_reference"
__merge__: file_scrnaseq_reference.yaml
- name: "--input_solution"
__merge__: file_spatial_solution.yaml
direction: input
required: true
- name: "--output"
Expand Down
30 changes: 30 additions & 0 deletions src/api/comp_output_processor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
namespace: "data_processors"
info:
type: data_processor
type_info:
label: Output processor
summary: An output processor for the prediction.
description: |
A component for a prediction dataset into a processed prediction dataset that can be evaluated by the metrics.
argument_groups:
- name: Inputs
arguments:
- name: "--input_prediction"
__merge__: file_prediction.yaml
required: true
direction: input
- name: "--input_spatial_unlabelled"
__merge__: file_spatial_unlabelled.yaml
required: true
direction: input
- name: Outputs
arguments:
- name: "--output"
__merge__: file_processed_prediction.yaml
direction: output
required: true
test_resources:
- type: python_script
path: /common/component_tests/run_and_check_output.py
- path: /resources_test/task_spatial_segmentation/mouse_brain_combined
dest: resources_test/task_spatial_segmentation/mouse_brain_combined
11 changes: 1 addition & 10 deletions src/api/file_prediction.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#TODO: Change to the required and/or optional fields of the anndata
type: file
example: "resources_test/task_spatial_segmentation/mouse_brain_combined/prediction.h5ad"
example: "resources_test/task_spatial_segmentation/mouse_brain_combined/prediction.zarr"
label: "Predicted data"
summary: A predicted dataset as output by a method.
info:
Expand All @@ -16,15 +16,6 @@ info:
name: table
description: AnnData table
required: true
obs:
- type: string
name: cell_id
description: Cell ID
required: true
- type: string
name: region
description: Region
required: true
uns:
- type: string
name: dataset_id
Expand Down
67 changes: 67 additions & 0 deletions src/api/file_processed_prediction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
type: file
example: "resources_test/task_spatial_segmentation/mouse_brain_combined/processed_prediction.zarr"
label: "Processed prediction"
summary: A processed predicted dataset, ready to be used as input for the evaluation.
info:
format:
type: spatialdata_zarr
labels:
- type: object
name: "segmentation"
description: Segmentation of the data
required: true
tables:
- type: anndata
name: table
description: AnnData table
required: true
# TODO: what is it that this component adds to the anndata?
layers:
- type: integer
name: counts
description: Raw counts
required: true
- type: double
name: normalized
description: Normalized expression values
required: true
- type: double
name: normalized_log
description: Log1p normalized expression values
required: true
- type: double
name: normalized_log_scaled
description: Log1p normalized expression values scaled to unit variance and zero mean
required: true
obs:
- type: string
name: cell_id
description: Cell ID
required: true
- type: string
name: region
description: Region
required: true
# .... cell info ... ?
var:
- type: string
name: feature_id
description: Unique identifier for the feature, usually a ENSEMBL gene id.
required: false
- type: string
name: feature_name
description: A human-readable name for the feature, usually a gene symbol.
required: true
- type: boolean
name: hvg
description: Whether or not the feature is considered to be a 'highly variable gene'
required: true
uns:
- type: string
name: dataset_id
description: "A unique identifier for the dataset"
required: true
- type: string
name: method_id
description: "A unique identifier for the method"
required: true
2 changes: 0 additions & 2 deletions src/api/file_scrnaseq_reference.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
type: file
example: "resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad"
# TODO: revert to the original example once file exists
# example: "resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_dataset.h5ad"
label: "scRNA-seq Reference"
summary: A single-cell reference dataset, preprocessed for this benchmark.
description: |
Expand Down
108 changes: 108 additions & 0 deletions src/api/file_spatial_solution.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
type: file
example: "resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_solution.zarr"
label: "Solution"
summary: Ground truth segmentation labels and cell assignments for method evaluation.
description: |
This dataset contains the ground truth cell and nucleus segmentation labels,
cell boundaries, and a reference table matching each cell to its label region.
info:
format:
type: spatialdata_zarr
points:
- type: dataframe
name: transcripts
description: Point cloud data of transcripts with ground truth cell assignments
required: true
columns:
- type: float
name: "x"
required: true
description: x-coordinate of the point
- type: float
name: "y"
required: true
description: y-coordinate of the point
- type: float
name: "z"
required: false
description: z-coordinate of the point
- type: categorical
name: feature_name
required: true
description: Name of the feature
- type: integer
name: cell_id
required: true
description: Ground truth cell assignment (0 = background)
- type: long
name: transcript_id
required: true
description: Unique identifier of the transcript
labels:
- type: object
name: "cell_labels"
description: Ground truth cell segmentation labels
required: true
- type: object
name: "nucleus_labels"
description: Ground truth nucleus segmentation labels
required: false
shapes:
- type: dataframe
name: "cell_boundaries"
description: Ground truth cell boundary shapes
required: false
columns:
- type: object
name: "geometry"
required: true
description: Geometry of the cell boundary
- type: dataframe
name: "nucleus_boundaries"
description: Ground truth nucleus boundary shapes
required: false
columns:
- type: object
name: "geometry"
required: true
description: Geometry of the nucleus boundary
tables:
- type: anndata
name: "table"
description: Reference cell metadata table
required: true
obs:
- type: integer
name: cell_id
description: Unique cell identifier, matching instance IDs in the label images
required: true
- type: string
name: region
description: Name of the label image this cell belongs to (e.g. 'cell_labels')
required: true
- type: double
name: cell_area
description: Area of the cell in pixels
required: false
- type: integer
name: transcript_counts
description: Total number of transcripts assigned to this cell
required: false
uns:
- type: string
name: dataset_id
description: A unique identifier for the dataset
required: true
- type: string
name: orig_dataset_id
required: true
description: The identifier of the original dataset from which this dataset was derived (if applicable)
var:
- type: string
name: feature_id
required: false
description: Unique identifier for the feature, usually a ENSEMBL gene id.
- type: string
name: feature_name
required: true
description: A human-readable name for the feature, usually a gene symbol.
Loading
Loading