From 70377e2274fa890801a2a504284a0e6b1b487162 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 20 Apr 2026 13:47:27 +0200
Subject: [PATCH 1/6] migrate doc rendering and component testing functions

---
 packages/python/openproblems/pyproject.toml   |   3 +-
 .../src/openproblems/project/__init__.py      |  10 +-
 .../project/component_tests/__init__.py       |  41 +++
 .../project/component_tests/check_config.py   | 166 +++++++++++
 .../component_tests/run_and_check_output.py   | 282 ++++++++++++++++++
 .../src/openproblems/project/docs/__init__.py |  17 ++
 .../openproblems/project/docs/_markdown.py    |  28 ++
 .../project/docs/read_component_spec.py       |  75 +++++
 .../project/docs/read_file_format.py          | 167 +++++++++++
 .../project/docs/read_task_config.py          |  11 +
 .../project/docs/read_task_metadata.py        | 145 +++++++++
 .../project/docs/render_component_spec.py     |  64 ++++
 .../project/docs/render_file_format.py        | 211 +++++++++++++
 .../project/docs/render_task_readme_qmd.py    | 210 +++++++++++++
 .../tests/data/example_project/_viash.yaml    |  88 ++++++
 .../api/comp_control_method.yaml              |  37 +++
 .../api/comp_data_processor.yaml              |  31 ++
 .../data/example_project/api/comp_method.yaml |  28 ++
 .../data/example_project/api/comp_metric.yaml |  28 ++
 .../api/file_common_dataset.yaml              |  72 +++++
 .../example_project/api/file_prediction.yaml  |  26 ++
 .../data/example_project/api/file_score.yaml  |  30 ++
 .../example_project/api/file_solution.yaml    |  73 +++++
 .../data/example_project/api/file_test.yaml   |  45 +++
 .../data/example_project/api/file_train.yaml  |  49 +++
 .../tests/test_docs_render_task_readme_qmd.py |  78 +++++
 schemas/api_file_format.yaml                  |   4 +
 schemas/schema_openproblems_definitions.yaml  |  80 +++++
 28 files changed, 2097 insertions(+), 2 deletions(-)
 create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/__init__.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/__init__.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/_markdown.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
 create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
 create mode 100644 packages/python/openproblems/tests/data/example_project/_viash.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_method.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_score.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_solution.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_test.yaml
 create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_train.yaml
 create mode 100644 packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py

diff --git a/packages/python/openproblems/pyproject.toml b/packages/python/openproblems/pyproject.toml
index 7bffaa8..e381e1a 100644
--- a/packages/python/openproblems/pyproject.toml
+++ b/packages/python/openproblems/pyproject.toml
@@ -13,7 +13,8 @@ license = { text = "MIT" }
 readme = "README.md"
 requires-python = ">= 3.9"
 dependencies = [
-  'PyYAML'
+  'PyYAML',
+  'networkx',
 ]
 
 [project.optional-dependencies]
diff --git a/packages/python/openproblems/src/openproblems/project/__init__.py b/packages/python/openproblems/src/openproblems/project/__init__.py
index 04f8a64..10832fb 100644
--- a/packages/python/openproblems/src/openproblems/project/__init__.py
+++ b/packages/python/openproblems/src/openproblems/project/__init__.py
@@ -1,9 +1,17 @@
 from .find_project_root import find_project_root
 from .read_viash_config import read_viash_config
 from .read_nested_yaml import read_nested_yaml
+from .component_tests.check_config import run_check_config as check_config
+from .component_tests.run_and_check_output import run_and_check_output
+from .docs.read_task_metadata import read_task_metadata
+from .docs.render_task_readme_qmd import render_task_readme_qmd
 
 __all__ = [
     "find_project_root",
     "read_viash_config",
     "read_nested_yaml",
-]
\ No newline at end of file
+    "check_config",
+    "run_and_check_output",
+    "read_task_metadata",
+    "render_task_readme_qmd",
+]
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py b/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py
new file mode 100644
index 0000000..b72ae5e
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py
@@ -0,0 +1,41 @@
+from .check_config import (
+    check_info,
+    check_links,
+    check_references,
+    check_url,
+    run_check_config,
+)
+from .run_and_check_output import (
+    check_anndata,
+    check_dataframe,
+    check_dictionary,
+    check_format,
+    check_input_files,
+    check_output_files,
+    check_spatialdata,
+    generate_cmd_args,
+    get_argument_sets,
+    run_and_check_output,
+    run_component,
+)
+
+__all__ = [
+    # check_config
+    "check_info",
+    "check_links",
+    "check_references",
+    "check_url",
+    "run_check_config",
+    # run_and_check_output
+    "check_anndata",
+    "check_dataframe",
+    "check_dictionary",
+    "check_format",
+    "check_input_files",
+    "check_output_files",
+    "check_spatialdata",
+    "generate_cmd_args",
+    "get_argument_sets",
+    "run_and_check_output",
+    "run_component",
+]
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
new file mode 100644
index 0000000..dd45b49
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+import re
+from typing import Dict, List, Union
+
+## CONSTANTS
+NAME_MAXLEN = 50
+LABEL_MAXLEN = 50
+SUMMARY_MAXLEN = 400
+DESCRIPTION_MAXLEN = 5000
+
+TIME_LABELS = ["lowtime", "midtime", "hightime", "veryhightime"]
+MEM_LABELS = ["lowmem", "midmem", "highmem", "veryhighmem"]
+CPU_LABELS = ["lowcpu", "midcpu", "highcpu", "veryhighcpu"]
+
+
+def check_url(url: str) -> bool:
+    import requests
+    from urllib3.util.retry import Retry
+    from requests.adapters import HTTPAdapter
+
+    session = requests.Session()
+    retry = Retry(connect=3, backoff_factor=0.5)
+    adapter = HTTPAdapter(max_retries=retry)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+
+    get = session.head(url)
+
+    if get.ok or get.status_code == 429:  # 429 rejected, too many requests
+        return True
+    else:
+        return False
+
+
+def check_references(references: Dict[str, Union[str, List[str]]]) -> None:
+    doi = references.get("doi")
+    bibtex = references.get("bibtex")
+
+    assert doi or bibtex, "One of .references.doi or .references.bibtex should be defined"
+
+    if doi:
+        if not isinstance(doi, list):
+            doi = [doi]
+        for d in doi:
+            assert re.match(r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d), f"Invalid DOI format: {doi}"
+            assert check_url(f"https://doi.org/{d}"), f"DOI '{d}' is not reachable"
+
+    if bibtex:
+        if not isinstance(bibtex, list):
+            bibtex = [bibtex]
+        for b in bibtex:
+            assert re.match(r"^@.*{.*", b), f"Invalid bibtex format: {b}"
+
+
+def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = []) -> None:
+    if not links:
+        return
+
+    for expected_link in required:
+        assert expected_link in links, f"Link .links.{expected_link} is not defined"
+
+    for link_type, link in links.items():
+        if link_type != "docker_registry":
+            assert check_url(link), f"Link .links.{link_type} URL '{link}' is not reachable"
+
+
+def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None:
+    metadata_field_lengths = {
+        "name": NAME_MAXLEN,
+        "label": LABEL_MAXLEN,
+        "summary": SUMMARY_MAXLEN,
+        "description": DESCRIPTION_MAXLEN,
+    }
+
+    for field, max_length in metadata_field_lengths.items():
+        value = this_info.get(field)
+        if comp_type != "metric":
+            value = this_config.get(field) or value
+        assert value, f"Metadata field '{field}' is not defined"
+        assert "FILL IN:" not in value, f"Metadata field '{field}' not filled in"
+        assert len(value) <= max_length, f"Metadata field '{field}' should not exceed {max_length} characters"
+
+    links = this_info.get("links") or this_config.get("links") or {}
+    required_links: List[str] = []
+    if comp_type == "method":
+        required_links = ["documentation", "repository"]
+    check_links(links, required_links)
+
+    references = this_info.get("references") or {}
+    if comp_type != "metric":
+        references = this_config.get("references") or references
+    if comp_type != "control_method" or references:
+        print("Check references fields", flush=True)
+        check_references(references)
+
+
+def run_check_config(meta: dict) -> None:
+    """Validate a viash component config.
+
+    Checks namespace, info.type, component metadata, preferred_normalization,
+    variants, and Nextflow runner labels.
+
+    Args:
+        meta: Viash meta dict with at least a ``"config"`` key pointing to the
+            ``.config.vsh.yaml`` path.
+    """
+    import openproblems
+
+    print("Load config data", flush=True)
+    config = openproblems.project.read_viash_config(meta["config"])
+    info = config.get("info", {})
+    comp_type = info.get("type")
+
+    print("Check .namespace", flush=True)
+    assert config.get("namespace"), ".namespace is not defined"
+
+    print("Check .info.type", flush=True)
+    expected_types = ["method", "control_method", "metric"]
+    assert comp_type in expected_types, ".info.type should be equal to 'method' or 'control_method'"
+
+    print("Check component metadata", flush=True)
+    if comp_type == "metric":
+        metric_infos = info.get("metrics", [])
+        assert metric_infos, ".info.metrics is not defined"
+        for metric_info in metric_infos:
+            check_info(metric_info, config, comp_type=comp_type)
+    else:
+        check_info(info, config, comp_type=comp_type)
+
+    if "preferred_normalization" in info:
+        print("Checking contents of .info.preferred_normalization", flush=True)
+        norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"]
+        assert info["preferred_normalization"] in norm_methods, (
+            ".info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'."
+        )
+
+    if "variants" in info:
+        print("Checking contents of .info.variants", flush=True)
+        arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + ["preferred_normalization"]
+        for paramset_id, paramset in info["variants"].items():
+            if paramset:
+                for arg_id in paramset:
+                    assert arg_id in arg_names, (
+                        f"Argument '{arg_id}' in `.info.variants['{paramset_id}']` "
+                        "is not an argument in `.arguments`."
+                    )
+
+    runners = config.get("runners", [])
+
+    print("Check Nextflow runner", flush=True)
+    nextflow_runner = next(
+        (runner for runner in runners if runner["type"] == "nextflow"),
+        None,
+    )
+
+    assert nextflow_runner, ".runners does not contain a nextflow runner"
+    assert nextflow_runner.get("directives"), "directives not a field in nextflow runner"
+    nextflow_labels = nextflow_runner["directives"].get("label")
+    assert nextflow_labels, "label not a field in nextflow runner directives"
+
+    assert [label for label in nextflow_labels if label in TIME_LABELS], "time label not filled in"
+    assert [label for label in nextflow_labels if label in MEM_LABELS], "mem label not filled in"
+    assert [label for label in nextflow_labels if label in CPU_LABELS], "cpu label not filled in"
+
+    print("All checks succeeded!", flush=True)
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
new file mode 100644
index 0000000..3484abe
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
@@ -0,0 +1,282 @@
+from __future__ import annotations
+
+import re
+
+
+def run_component(cmd: list) -> None:
+    """Run a component executable and assert it exits successfully."""
+    import subprocess
+
+    print(">> Running script as test", flush=True)
+    out = subprocess.run(cmd)
+    assert out.returncode == 0, f"Script exited with an error. Return code: {out.returncode}"
+
+
+def check_input_files(arguments: list) -> None:
+    """Assert that all required input files exist."""
+    from os import path
+
+    print(">> Checking whether input files exist", flush=True)
+    for arg in arguments:
+        if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]:
+            assert not arg["must_exist"] or path.exists(arg["value"]), (
+                f"Input file '{arg['value']}' does not exist"
+            )
+
+
+def check_output_files(arguments: list) -> None:
+    """Assert that all required output files exist and match their format spec."""
+    from os import path
+
+    print(">> Checking whether output file exists", flush=True)
+    for arg in arguments:
+        if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]:
+            assert not arg["must_exist"] or path.exists(arg["value"]), (
+                f"Output file '{arg['value']}' does not exist"
+            )
+
+    print(">> Reading output files and checking formats", flush=True)
+    for arg in arguments:
+        if arg["type"] != "file" or arg["direction"] != "output":
+            continue
+        check_format(arg)
+
+
+def check_format(arg: dict) -> None:
+    """Read an output file and validate its contents against the format spec."""
+    arg_info = arg.get("info") or {}
+    if arg["type"] == "file":
+        arg_format = arg_info.get("format", {})
+        file_type = arg_format.get("type") or arg_info.get("file_type")
+
+        # Tabular data
+        if file_type in ["parquet", "csv", "tsv"]:
+            import pandas as pd
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            if file_type == "csv":
+                df = pd.read_csv(arg["value"])
+            elif file_type == "tsv":
+                df = pd.read_csv(arg["value"], sep="\t")
+            else:
+                df = pd.read_parquet(arg["value"])
+            print(f"  {df}")
+
+            arg_columns = arg_format.get("columns") or arg_info.get("columns") or []
+            check_dataframe(df, arg_columns, f"File '{arg['value']}'")
+
+        # Hierarchical data
+        elif file_type == "json":
+            import json
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            with open(arg["value"]) as f:
+                data = json.load(f)
+            print(
+                f"  {type(data).__name__} with {len(data)} entries"
+                if isinstance(data, (dict, list))
+                else f"  {data}"
+            )
+            check_dictionary(data, arg)
+
+        elif file_type == "yaml":
+            import yaml
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            with open(arg["value"]) as f:
+                data = yaml.safe_load(f)
+            print(
+                f"  {type(data).__name__} with {len(data)} entries"
+                if isinstance(data, (dict, list))
+                else f"  {data}"
+            )
+            check_dictionary(data, arg)
+
+        # AnnData / SpatialData
+        elif file_type in ["h5ad", "anndata_hdf5"]:
+            import anndata as ad
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            adata = ad.read_h5ad(arg["value"])
+            print(f"  {adata}")
+            check_anndata(adata, arg_format, f"File '{arg['value']}'")
+
+        elif file_type == "anndata_zarr":
+            import anndata as ad
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            store = ad.read_zarr(arg["value"])
+            print(f"  {store}")
+            check_anndata(store, arg_format, f"File '{arg['value']}'")
+
+        elif file_type == "spatialdata_zarr":
+            import spatialdata
+
+            print(f"Reading and checking {arg['clean_name']}", flush=True)
+            sdata = spatialdata.read_zarr(arg["value"])
+            print(f"  {sdata}")
+            check_spatialdata(sdata, arg)
+
+
+def check_anndata(adata, format_spec: dict, label: str = "") -> None:
+    """Check whether an AnnData object contains all required slots
+    defined in the given format spec dict.
+    """
+    for struc_name, items in format_spec.items():
+        if not hasattr(adata, struc_name):
+            continue
+
+        struc_x = getattr(adata, struc_name)
+
+        if struc_name == "X":
+            if items.get("required", True):
+                assert struc_x is not None, f"{label} is missing slot .{struc_name}"
+        else:
+            for item in items:
+                if item.get("required", True):
+                    assert item["name"] in struc_x, (
+                        f"{label} is missing slot .{struc_name}['{item['name']}']"
+                    )
+
+
+def check_dataframe(df, columns: list, label: str = "") -> None:
+    """Check whether a DataFrame contains all required columns
+    defined in the given columns spec list.
+    """
+    for item in columns:
+        if item.get("required", True):
+            assert item["name"] in df.columns, f"{label} is missing column '{item['name']}'"
+
+
+def check_dictionary(data, arg: dict) -> None:
+    """Check whether a JSON/YAML object contains all required top-level keys
+    in the corresponding .info.format.keys field.
+    """
+    arg_info = arg.get("info") or {}
+    arg_format = arg_info.get("format", {})
+    arg_keys = arg_format.get("keys") or arg_info.get("keys") or []
+    for item in arg_keys:
+        if item.get("required", True):
+            assert isinstance(data, dict) and item["name"] in data, (
+                f"File '{arg['value']}' is missing key '{item['name']}'"
+            )
+
+
+def check_spatialdata(sdata, arg: dict) -> None:
+    """Check whether a SpatialData object contains all required elements
+    in the corresponding .info.format field. Supported element categories:
+    images, labels, points, shapes, tables.
+    """
+    arg_info = arg.get("info") or {}
+    arg_format = arg_info.get("format") or {}
+    element_categories = ["images", "labels", "points", "shapes", "tables"]
+    for category in element_categories:
+        items = arg_format.get(category) or []
+        category_store = getattr(sdata, category, {})
+        for item in items:
+            if item.get("required", True):
+                assert item["name"] in category_store, (
+                    f"File '{arg['value']}' is missing {category}['{item['name']}']"
+                )
+
+            elem_name = item["name"]
+            if elem_name not in category_store:
+                continue
+            element = category_store[elem_name]
+
+            if category in ["points", "shapes"]:
+                check_dataframe(
+                    element,
+                    item.get("columns") or [],
+                    f"File '{arg['value']}' {category}['{elem_name}']",
+                )
+            elif category == "tables":
+                check_anndata(element, item, f"File '{arg['value']}' tables['{elem_name}']")
+
+
+def get_argument_sets(config: dict, resources_dir: str) -> dict:
+    """Build argument sets from a viash config, resolving input file paths.
+
+    Args:
+        config: Parsed viash config dict (from ``read_viash_config``).
+        resources_dir: Directory where test resource files are located.
+
+    Returns:
+        Dict mapping argument-set name to list of resolved argument dicts.
+    """
+    arguments = []
+
+    for arg in config["all_arguments"]:
+        new_arg = arg.copy()
+        arg_info = new_arg.get("info") or {}
+        example = arg.get("example", [None])[0]
+
+        if example and arg["type"] == "file":
+            if arg["direction"] == "input":
+                value = f"{resources_dir}/{example}"
+            else:
+                ext_res = re.search(r"\.(\w+)$", example)
+                if ext_res:
+                    value = f"{arg['clean_name']}.{ext_res.group(1)}"
+                else:
+                    value = f"{arg['clean_name']}"
+            new_arg["value"] = value
+        elif "test_default" in arg_info:
+            new_arg["value"] = arg_info["test_default"]
+
+        arguments.append(new_arg)
+
+    config_info = config.get("info") or {}
+    if "test_setup" not in config_info:
+        argument_sets = {"run": arguments}
+    else:
+        test_setup = config_info["test_setup"]
+        argument_sets = {}
+        for name, test_instance in test_setup.items():
+            new_arguments = []
+            for arg in arguments:
+                new_arg = arg.copy()
+                if arg["clean_name"] in test_instance:
+                    val = test_instance[arg["clean_name"]]
+                    if new_arg["type"] == "file" and new_arg["direction"] == "input":
+                        val = f"{resources_dir}/{val}"
+                    new_arg["value"] = val
+                new_arguments.append(new_arg)
+            argument_sets[name] = new_arguments
+
+    return argument_sets
+
+
+def generate_cmd_args(argument_set: list) -> list:
+    """Convert a list of resolved argument dicts to a flat list of CLI args."""
+    cmd_args = []
+    for arg in argument_set:
+        if "value" in arg:
+            value = arg["value"]
+            if arg["multiple"] and isinstance(value, list):
+                value = arg["multiple_sep"].join(value)
+            cmd_args.extend([arg["name"], str(value)])
+    return cmd_args
+
+
+def run_and_check_output(meta: dict) -> None:
+    """Run a viash component with test resources and validate its outputs.
+
+    Args:
+        meta: Viash meta dict with keys ``"executable"``, ``"config"``, and
+            ``"resources_dir"``.
+    """
+    import openproblems
+
+    config = openproblems.project.read_viash_config(meta["config"])
+    argument_sets = get_argument_sets(config, meta["resources_dir"])
+
+    for argset_name, argset_args in argument_sets.items():
+        print(f">> Running test '{argset_name}'", flush=True)
+        cmd = [meta["executable"]] + generate_cmd_args(argset_args)
+
+        check_input_files(argset_args)
+        run_component(cmd)
+        check_output_files(argset_args)
+
+    print("All checks succeeded!", flush=True)
diff --git a/packages/python/openproblems/src/openproblems/project/docs/__init__.py b/packages/python/openproblems/src/openproblems/project/docs/__init__.py
new file mode 100644
index 0000000..74c1aff
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/__init__.py
@@ -0,0 +1,17 @@
+from .read_task_config import read_task_config
+from .read_component_spec import read_component_spec
+from .read_file_format import read_file_format
+from .read_task_metadata import read_task_metadata
+from .render_component_spec import render_component_spec
+from .render_file_format import render_file_format
+from .render_task_readme_qmd import render_task_readme_qmd
+
+__all__ = [
+    "read_task_config",
+    "read_component_spec",
+    "read_file_format",
+    "read_task_metadata",
+    "render_component_spec",
+    "render_file_format",
+    "render_task_readme_qmd",
+]
diff --git a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py
new file mode 100644
index 0000000..a0dae31
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py
@@ -0,0 +1,28 @@
+def format_markdown_table(headers, rows, col_widths=None):
+    """Format a GitHub-Flavored Markdown pipe table.
+
+    Args:
+        headers: Column header names.
+        rows: List of rows, each a list of cell values.
+        col_widths: Optional list of exact dash-counts for the separator row
+            (matches R's ``align_kable_widths`` behaviour).
+
+    Returns:
+        A GFM pipe table string, or an empty string when ``rows`` is empty.
+    """
+    if not rows:
+        return ""
+
+    header_line = "| " + " | ".join(str(h) for h in headers) + " |"
+
+    if col_widths is not None:
+        sep_line = "|" + "".join(f":{'-' * w}|" for w in col_widths)
+    else:
+        sep_line = "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |"
+
+    data_lines = [
+        "| " + " | ".join(str(cell) for cell in row) + " |"
+        for row in rows
+    ]
+
+    return "\n".join([header_line, sep_line] + data_lines)
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
new file mode 100644
index 0000000..47ea6f2
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+import os
+import re
+
+
+def read_component_spec(path: str) -> dict:
+    """Read a component spec from a ``comp_*.yaml`` file.
+
+    Args:
+        path: Path to a component spec yaml (usually ``src/api/comp_*.yaml``).
+
+    Returns:
+        A dict with keys ``info`` (dict) and ``args`` (list of dicts).
+    """
+    from .. import read_nested_yaml
+    data = read_nested_yaml(path)
+    return {
+        "info": _process_info(data, path),
+        "args": _process_arguments(data, path),
+    }
+
+
+def _process_info(data: dict, path: str) -> dict:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    info: dict = {"file_name": file_name}
+
+    # Top-level fields
+    for key in ("label", "summary", "description", "namespace"):
+        info[key] = data.get(key)
+
+    # Merge info block (may override Nones above)
+    for key, val in (data.get("info") or {}).items():
+        if info.get(key) is None:
+            info[key] = val
+
+    # Merge info.type_info
+    for key, val in ((data.get("info") or {}).get("type_info") or {}).items():
+        if info.get(key) is None:
+            info[key] = val
+
+    return info
+
+
+def _process_arguments(data: dict, path: str) -> list[dict]:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+
+    arguments = list(data.get("arguments") or [])
+    for arg_group in data.get("argument_groups") or []:
+        arguments.extend(arg_group.get("arguments") or [])
+
+    result = []
+    for arg in arguments:
+        arg_info = arg.get("info") or {}
+        merge_ref = arg.get("__merge__")
+        parent = re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None
+
+        default = arg.get("default")
+        example = arg.get("example")
+        if isinstance(example, list):
+            example = example[0] if example else None
+
+        result.append({
+            "file_name": file_name,
+            "arg_name": re.sub(r"^-+", "", arg.get("name", "")),
+            "type": arg.get("type", ""),
+            "direction": arg.get("direction") or "input",
+            "required": bool(arg.get("required")) if arg.get("required") is not None else False,
+            "default": str(default) if default is not None else None,
+            "example": str(example) if example is not None else None,
+            "description": arg.get("description") or arg_info.get("description"),
+            "summary": arg.get("summary") or arg_info.get("summary"),
+            "parent": parent,
+        })
+
+    return result
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
new file mode 100644
index 0000000..1d1fa05
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+import os
+import re
+
+ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"]
+SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"]
+
+
+def read_file_format(path: str) -> dict:
+    """Read a file format spec from a ``file_*.yaml`` file.
+
+    Args:
+        path: Path to a file format yaml (usually ``src/api/file_*.yaml``).
+
+    Returns:
+        A dict with key ``info`` (dict) and optionally ``expected_format``
+        (list of dicts) when the format type is known.
+    """
+    from .. import read_nested_yaml
+    data = read_nested_yaml(path)
+
+    out: dict = {"info": _process_info(data, path)}
+
+    fmt = (data.get("info") or {}).get("format") or {}
+    format_type = fmt.get("type")
+
+    if format_type == "h5ad":
+        out["expected_format"] = _process_h5ad(data, path, format_type)
+    elif format_type in ("anndata_hdf5", "anndata_zarr"):
+        out["expected_format"] = _process_h5ad(data, path, format_type)
+    elif format_type in ("tabular", "csv", "tsv", "parquet"):
+        out["expected_format"] = _process_tabular(data, path, format_type)
+    elif format_type in ("json", "yaml"):
+        out["expected_format"] = _process_keyed(data, path, format_type)
+    elif format_type == "spatialdata_zarr":
+        out["expected_format"] = _process_spatialdata(data, path)
+
+    return out
+
+
+def _process_info(data: dict, path: str) -> dict:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    fmt = (data.get("info") or {}).get("format") or {}
+
+    label = data.get("label")
+    if label is None:
+        example = data.get("example")
+        if example:
+            label = os.path.basename(str(example))
+
+    return {
+        "file_name": file_name,
+        "file_type": fmt.get("type"),
+        "label": label,
+        "summary": data.get("summary"),
+        "description": data.get("description"),
+        "example": data.get("example"),
+    }
+
+
+def _process_h5ad(data: dict, path: str, format_type: str) -> list[dict]:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    fmt = (data.get("info") or {}).get("format") or {}
+
+    rows = []
+    for struct_name in ANNDATA_STRUCT_NAMES:
+        fields = fmt.get(struct_name)
+        if not fields:
+            continue
+        if not isinstance(fields, list):
+            fields = [fields]
+        for field in fields:
+            rows.append({
+                "file_name": file_name,
+                "struct": struct_name,
+                "name": field.get("name", struct_name),
+                "type": field.get("type", ""),
+                "required": field.get("required", True),
+                "multiple": field.get("multiple", False),
+                "description": field.get("description"),
+                "summary": field.get("summary"),
+                "data_type": format_type,
+            })
+    return rows
+
+
+def _process_tabular(data: dict, path: str, format_type: str) -> list[dict]:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    columns = (data.get("info") or {}).get("format", {}).get("columns") or []
+
+    return [
+        {
+            "file_name": file_name,
+            "name": col.get("name", ""),
+            "type": col.get("type", ""),
+            "required": col.get("required", True),
+            "description": col.get("description"),
+            "summary": col.get("summary"),
+            "data_type": format_type,
+        }
+        for col in columns
+    ]
+
+
+def _process_keyed(data: dict, path: str, format_type: str) -> list[dict]:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    keys = (data.get("info") or {}).get("format", {}).get("keys") or []
+
+    return [
+        {
+            "file_name": file_name,
+            "name": k.get("name", ""),
+            "type": k.get("type", ""),
+            "required": k.get("required", True),
+            "description": k.get("description"),
+            "summary": k.get("summary"),
+            "data_type": format_type,
+        }
+        for k in keys
+    ]
+
+
+def _process_spatialdata(data: dict, path: str) -> list[dict]:
+    file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path))
+    fmt = (data.get("info") or {}).get("format") or {}
+    rows = []
+    for category in SPATIALDATA_ELEMENT_CATEGORIES:
+        elements = fmt.get(category) or []
+        for elem in elements:
+            row: dict = {
+                "file_name": file_name,
+                "category": category,
+                "name": elem.get("name", ""),
+                "element_type": elem.get("type", ""),
+                "required": elem.get("required", True),
+                "description": elem.get("description"),
+                "data_type": "spatialdata_zarr",
+            }
+            if category in ("points", "shapes"):
+                row["columns"] = [
+                    {
+                        "name": col.get("name", ""),
+                        "type": col.get("type", ""),
+                        "required": col.get("required", True),
+                        "description": col.get("description"),
+                    }
+                    for col in (elem.get("columns") or [])
+                ]
+            elif category == "tables":
+                slots = []
+                for struct_name in ANNDATA_STRUCT_NAMES:
+                    fields = elem.get(struct_name)
+                    if not fields:
+                        continue
+                    if not isinstance(fields, list):
+                        fields = [fields]
+                    for f in fields:
+                        slots.append({
+                            "struct": struct_name,
+                            "name": f.get("name", struct_name),
+                            "type": f.get("type", ""),
+                            "required": f.get("required", True),
+                            "description": f.get("description"),
+                        })
+                row["anndata_slots"] = slots
+            rows.append(row)
+    return rows
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
new file mode 100644
index 0000000..78d460a
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
@@ -0,0 +1,11 @@
+def read_task_config(path):
+    """Read and return a task config (_viash.yaml) file.
+
+    Args:
+        path: Path to a ``_viash.yaml`` project config file.
+
+    Returns:
+        The parsed config as a dict.
+    """
+    from .. import read_nested_yaml
+    return read_nested_yaml(path)
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py
new file mode 100644
index 0000000..69412e6
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+import glob
+import os
+import re
+import warnings
+from collections import deque
+
+
+def read_task_metadata(path: str) -> dict:
+    """Read all API files in a task directory and return structured metadata.
+
+    Scans ``path`` recursively for ``comp_*.yaml`` and ``file_*.yaml`` files,
+    builds a directed task graph, and runs a BFS to determine render order.
+
+    Args:
+        path: Path to the task directory (or ``api/`` subdirectory).  A
+            ``_viash.yaml`` must exist somewhere above this path.
+
+    Returns:
+        A dict with the following keys:
+
+        * ``proj_path`` – path to the project root
+        * ``proj_conf`` – parsed ``_viash.yaml``
+        * ``files`` / ``comps`` – dicts keyed by ``file_name``
+        * ``file_info`` / ``comp_info`` – flat lists of info dicts
+        * ``file_expected_format`` / ``comp_args`` – flat lists
+        * ``task_graph`` – ``networkx.DiGraph``
+        * ``task_graph_root`` – name of the root node
+        * ``task_graph_order`` – BFS-ordered list of node names
+    """
+    from .. import find_project_root
+    from .read_task_config import read_task_config
+    from .read_component_spec import read_component_spec
+    from .read_file_format import read_file_format
+
+    project_path = find_project_root(path)
+    if project_path is None:
+        raise ValueError(f"No project root (_viash.yaml) found from '{path}'")
+
+    proj_conf_file = os.path.join(project_path, "_viash.yaml")
+    if not os.path.exists(proj_conf_file):
+        raise ValueError(f"No _viash.yaml found in project root '{project_path}'")
+
+    proj_conf = read_task_config(proj_conf_file)
+
+    comp_paths = sorted(
+        glob.glob(os.path.join(path, "**/comp_*.yaml"), recursive=True)
+        + glob.glob(os.path.join(path, "**/comp_*.yml"), recursive=True)
+    )
+    comps = {
+        re.sub(r"\.ya?ml$", "", os.path.basename(p)): read_component_spec(p)
+        for p in comp_paths
+    }
+
+    file_paths = sorted(
+        glob.glob(os.path.join(path, "**/file_*.yaml"), recursive=True)
+        + glob.glob(os.path.join(path, "**/file_*.yml"), recursive=True)
+    )
+    files = {
+        re.sub(r"\.ya?ml$", "", os.path.basename(p)): read_file_format(p)
+        for p in file_paths
+    }
+
+    task_graph = _build_graph(files, comps)
+    task_graph_root = _get_root(task_graph)
+    task_graph_order = _bfs_order(task_graph, task_graph_root)
+
+    comp_info = [c["info"] for c in comps.values()]
+    comp_args = [arg for c in comps.values() for arg in c["args"]]
+    file_info = [f["info"] for f in files.values()]
+    file_expected_format = [
+        row for f in files.values() for row in (f.get("expected_format") or [])
+    ]
+
+    return {
+        "proj_path": project_path,
+        "proj_conf": proj_conf,
+        "files": files,
+        "file_info": file_info,
+        "file_expected_format": file_expected_format,
+        "comps": comps,
+        "comp_info": comp_info,
+        "comp_args": comp_args,
+        "task_graph": task_graph,
+        "task_graph_root": task_graph_root,
+        "task_graph_order": task_graph_order,
+    }
+
+
+def _build_graph(files: dict, comps: dict):
+    import networkx as nx
+
+    G = nx.DiGraph()
+
+    for name, f in files.items():
+        G.add_node(name, label=f["info"].get("label") or name, is_comp=False)
+
+    for name, c in comps.items():
+        G.add_node(name, label=c["info"].get("label") or name, is_comp=True)
+
+    for comp_name, c in comps.items():
+        for arg in c["args"]:
+            if arg.get("type") != "file" or not arg.get("parent"):
+                continue
+            parent = arg["parent"]
+            if parent not in G:
+                continue
+            required = bool(arg.get("required", False))
+            if arg.get("direction") == "input":
+                G.add_edge(parent, comp_name, from_to="file_to_comp", required=required)
+            elif arg.get("direction") == "output":
+                G.add_edge(comp_name, parent, from_to="comp_to_file", required=required)
+
+    return G
+
+
+def _get_root(G) -> str:
+    roots = [n for n, d in G.in_degree() if d == 0]
+    if not roots:
+        return next(iter(G.nodes()))
+    if len(roots) > 1:
+        warnings.warn(
+            f"Multiple root nodes with in-degree 0: {roots}. Using first.",
+            stacklevel=4,
+        )
+    return roots[0]
+
+
+def _bfs_order(G, root: str) -> list[str]:
+    """BFS from root; unreachable nodes are appended afterwards (mirrors igraph)."""
+    visited: list[str] = []
+    seen: set[str] = set()
+    queue: deque[str] = deque([root])
+    while queue:
+        node = queue.popleft()
+        if node not in seen:
+            seen.add(node)
+            visited.append(node)
+            for nbr in G.successors(node):
+                if nbr not in seen:
+                    queue.append(nbr)
+    for node in G.nodes():
+        if node not in seen:
+            visited.append(node)
+    return visited
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
new file mode 100644
index 0000000..60360df
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+import re
+
+
+def render_component_spec(spec: dict | str) -> str:
+    """Render a component spec as a Quarto/GFM markdown section.
+
+    Args:
+        spec: Either a ``comp_*.yaml`` path or a dict returned by
+            :func:`read_component_spec`.
+
+    Returns:
+        A markdown string with an ``## Component type:`` heading, summary, and
+        an arguments table.
+    """
+    if isinstance(spec, str):
+        from .read_component_spec import read_component_spec
+        spec = read_component_spec(spec)
+
+    info = spec["info"]
+    args_table = _format_arguments(spec["args"])
+
+    lines = [
+        f"## Component type: {info.get('label', '')}",
+        "",
+        info.get("summary", "") or "",
+        "",
+        "Arguments:",
+        "",
+        ":::{.small}",
+        args_table,
+        ":::",
+        "",
+    ]
+    return "\n".join(lines)
+
+
+def _format_arguments(args: list[dict]) -> str:
+    from ._markdown import format_markdown_table
+
+    file_args = [a for a in args if a.get("type") == "file"]
+    if not file_args:
+        return ""
+
+    rows = []
+    for arg in file_args:
+        tags = []
+        if not arg.get("required", True):
+            tags.append("Optional")
+        if arg.get("direction") == "output":
+            tags.append("Output")
+        tag_str = f"(_{', '.join(tags)}_) " if tags else ""
+
+        summary = re.sub(r" *\n *", " ", (arg.get("summary") or "").strip()).rstrip(".")
+        default = arg.get("default")
+        default_str = f" Default: `{default}`." if default is not None else ""
+
+        rows.append([
+            f"`--{arg['arg_name']}`",
+            f"`{arg.get('type', '')}`",
+            f"{tag_str}{summary}.{default_str}",
+        ])
+
+    return format_markdown_table(["Name", "Type", "Description"], rows, col_widths=[25, 8, 60])
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
new file mode 100644
index 0000000..ecab02b
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
@@ -0,0 +1,211 @@
+from __future__ import annotations
+import re
+
+ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"]
+SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"]
+
+
+def render_file_format(spec: dict | str) -> str:
+    """Render a file format spec as a Quarto/GFM markdown section.
+
+    Args:
+        spec: Either a ``file_*.yaml`` path or a dict returned by
+            :func:`read_file_format`.
+
+    Returns:
+        A markdown string with a ``## File format:`` heading, summary,
+        example path, description, and a format/data-structure table.
+    """
+    if isinstance(spec, str):
+        from .read_file_format import read_file_format
+        spec = read_file_format(spec)
+
+    info = spec["info"]
+    label = info.get("label") or ""
+    summary = (info.get("summary") or "").strip()
+    description = (info.get("description") or "").strip()
+    example = info.get("example")
+
+    example_str = f"Example file: `{example}`" if example else ""
+    description_str = f"Description:\n\n{description}" if description else ""
+
+    expected_format = spec.get("expected_format")
+    expected_format_str = ""
+    if expected_format:
+        format_example_lines = _render_format_example(spec)
+        format_table_lines = _render_format_table(spec)
+        expected_format_str = "\n".join([
+            "Format:",
+            "",
+            ":::{.small}",
+            *format_example_lines,
+            ":::",
+            "",
+            "Data structure:",
+            "",
+            ":::{.small}",
+            *format_table_lines,
+            ":::",
+        ])
+
+    parts = [
+        f"## File format: {label}",
+        "",
+        summary,
+        "",
+        example_str,
+        "",
+        description_str,
+        "",
+        expected_format_str,
+    ]
+
+    # Trim trailing blank lines, keep one trailing newline
+    while parts and parts[-1] == "":
+        parts.pop()
+    return "\n".join(parts) + "\n"
+
+
+def _render_format_example(spec: dict) -> list[str]:
+    fmt_type = spec["info"].get("file_type")
+    expected_format = spec.get("expected_format") or []
+
+    if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"):
+        structs: dict[str, list[str]] = {}
+        for row in expected_format:
+            structs.setdefault(row["struct"], []).append(f"'{row['name']}'")
+        lines = ["    AnnData object"]
+        for struct_name in ANNDATA_STRUCT_NAMES:
+            if struct_name in structs:
+                lines.append(f"     {struct_name}: {', '.join(structs[struct_name])}")
+        return lines
+
+    if fmt_type in ("csv", "tsv", "parquet"):
+        names = ", ".join(f"'{row['name']}'" for row in expected_format)
+        return ["    Tabular data", f"     {names}"]
+
+    if fmt_type in ("json", "yaml"):
+        names = ", ".join(f"'{row['name']}'" for row in expected_format)
+        ext = fmt_type.upper()
+        return [f"    {ext} object", f"     {names}"]
+
+    if fmt_type == "spatialdata_zarr":
+        by_category: dict[str, list[str]] = {}
+        for row in expected_format:
+            by_category.setdefault(row["category"], []).append(f"'{row['name']}'")
+        lines = ["    SpatialData object"]
+        for cat in SPATIALDATA_ELEMENT_CATEGORIES:
+            if cat in by_category:
+                lines.append(f"     {cat}: {', '.join(by_category[cat])}")
+        return lines
+
+    return [""]
+
+
+def _render_format_table(spec: dict) -> list[str]:
+    from ._markdown import format_markdown_table
+
+    fmt_type = spec["info"].get("file_type")
+    expected_format = spec.get("expected_format") or []
+
+    def _tag_str(row: dict) -> str:
+        tags = []
+        if not row.get("required", True):
+            tags.append("Optional")
+        return f"(_{', '.join(tags)}_) " if tags else ""
+
+    def _clean_desc(row: dict) -> str:
+        desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip(".")
+        return desc
+
+    if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"):
+        rows = [
+            [
+                f'`{row["struct"]}["{row["name"]}"]`',
+                f'`{row.get("type", "")}`',
+                f"{_tag_str(row)}{_clean_desc(row)}.",
+            ]
+            for row in expected_format
+        ]
+        return [format_markdown_table(["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+
+    if fmt_type in ("csv", "tsv", "parquet"):
+        rows = [
+            [
+                f'`{row["name"]}`',
+                f'`{row.get("type", "")}`',
+                f"{_tag_str(row)}{_clean_desc(row)}.",
+            ]
+            for row in expected_format
+        ]
+        return [format_markdown_table(["Column", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+
+    if fmt_type in ("json", "yaml"):
+        rows = [
+            [
+                f'`{row["name"]}`',
+                f'`{row.get("type", "")}`',
+                f"{_tag_str(row)}{_clean_desc(row)}.",
+            ]
+            for row in expected_format
+        ]
+        return [format_markdown_table(["Key", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+
+    if fmt_type == "spatialdata_zarr":
+        lines = []
+        by_category: dict[str, list[dict]] = {}
+        for row in expected_format:
+            by_category.setdefault(row["category"], []).append(row)
+
+        for cat in SPATIALDATA_ELEMENT_CATEGORIES:
+            elements = by_category.get(cat)
+            if not elements:
+                continue
+            lines.append(f"*{cat}*")
+            lines.append("")
+
+            if cat in ("images", "labels", "coordinate_systems"):
+                elem_rows = [
+                    [f'`{e["name"]}`', f"{_tag_str(e)}{_clean_desc(e)}."]
+                    for e in elements
+                ]
+                lines.append(format_markdown_table(["Name", "Description"], elem_rows, col_widths=[25, 68]))
+
+            elif cat in ("points", "shapes"):
+                for elem in elements:
+                    lines.append(f"`{elem['name']}`: {_clean_desc(elem)}.")
+                    lines.append("")
+                    col_rows = [
+                        [
+                            f'`{c["name"]}`',
+                            f'`{c.get("type", "")}`',
+                            f"{_tag_str(c)}{re.sub(r' *\n *', ' ', (c.get('description') or '').strip()).rstrip('.')}.",
+                        ]
+                        for c in (elem.get("columns") or [])
+                    ]
+                    if col_rows:
+                        lines.append(format_markdown_table(["Column", "Type", "Description"], col_rows, col_widths=[25, 8, 60]))
+
+            elif cat == "tables":
+                for elem in elements:
+                    lines.append(f"`{elem['name']}`: {_clean_desc(elem)}.")
+                    lines.append("")
+                    slot_rows = [
+                        [
+                            f'`{s["struct"]}["{s["name"]}"]`',
+                            f'`{s.get("type", "")}`',
+                            f"{_tag_str(s)}{re.sub(r' *\n *', ' ', (s.get('description') or '').strip()).rstrip('.')}.",
+                        ]
+                        for s in (elem.get("anndata_slots") or [])
+                    ]
+                    if slot_rows:
+                        lines.append(format_markdown_table(["Slot", "Type", "Description"], slot_rows, col_widths=[25, 8, 60]))
+
+            lines.append("")
+
+        # remove trailing blank line
+        while lines and lines[-1] == "":
+            lines.pop()
+        return lines
+
+    return [""]
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
new file mode 100644
index 0000000..4c94c3e
--- /dev/null
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
@@ -0,0 +1,210 @@
+from __future__ import annotations
+import re
+
+
+def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = False) -> str:
+    """Render the ``README.qmd`` for a task.
+
+    Args:
+        task_metadata: Either a path to the task/API directory or a metadata
+            dict returned by :func:`read_task_metadata`.
+        add_instructions: When ``True``, prepend installation and usage
+            instructions (off by default).
+
+    Returns:
+        A Quarto markdown string suitable for writing to ``README.qmd``.
+    """
+    if isinstance(task_metadata, str):
+        from .read_task_metadata import read_task_metadata
+        task_metadata = read_task_metadata(task_metadata)
+
+    proj_conf = task_metadata["proj_conf"]
+    label = proj_conf.get("label", "")
+    summary = (proj_conf.get("summary") or "").strip()
+    description = (proj_conf.get("description") or "").strip()
+
+    repository_url = (proj_conf.get("links") or {}).get("repository", "")
+    repo_match = re.search(r"https://github\.com/([^/]+/[^/]+)", repository_url)
+    repository_name = repo_match.group(1) if repo_match else repository_url
+
+    authors_str = _render_authors(task_metadata)
+    task_graph = _render_task_graph(task_metadata)
+    task_api_parts = _render_task_parts(task_metadata)
+    instructions = _render_instructions(task_metadata) if add_instructions else ""
+
+    lines = [
+        "---",
+        f'title: "{label}"',
+        "format: gfm",
+        "---",
+        "",
+        "<!--",
+        "This file is automatically generated from the tasks's api/*.yaml files.",
+        "Do not edit this file directly.",
+        "-->",
+        "",
+        summary,
+        "",
+        f"Repository: [{repository_name}]({repository_url})",
+        "",
+    ]
+
+    if instructions:
+        lines += [instructions, ""]
+
+    lines += [
+        "## Description",
+        "",
+        description,
+        "",
+        authors_str,
+        "",
+        "## API",
+        "",
+        task_graph,
+        "",
+        "\n\n".join(task_api_parts),
+    ]
+
+    return "\n".join(lines)
+
+
+def _render_authors(task_metadata: dict) -> str:
+    from ._markdown import format_markdown_table
+
+    authors = task_metadata["proj_conf"].get("authors") or []
+    if not authors:
+        return ""
+
+    # Collect columns: name, roles, then any info keys
+    all_keys: list[str] = ["name", "roles"]
+    for aut in authors:
+        for key in (aut.get("info") or {}):
+            if key not in all_keys:
+                all_keys.append(key)
+
+    rows = []
+    for aut in authors:
+        info = aut.get("info") or {}
+        roles = aut.get("roles") or []
+        roles_str = ", ".join(roles) if isinstance(roles, list) else str(roles)
+        row = [aut.get("name", ""), roles_str] + [info.get(k, "") for k in all_keys[2:]]
+        rows.append(row)
+
+    headers = [k.capitalize() for k in all_keys]
+    table = format_markdown_table(headers, rows)
+    return "\n## Authors & contributors\n\n" + table + "\n"
+
+
+def _render_task_graph(task_metadata: dict) -> str:
+    G = task_metadata["task_graph"]
+    order = task_metadata["task_graph_order"]
+    repository_url = (task_metadata["proj_conf"].get("links") or {}).get("repository")
+
+    def clean_id(node_id: str) -> str:
+        return node_id.replace("graph", "graaf")
+
+    def make_label(node_id: str, label: str, is_comp: bool) -> str:
+        if not repository_url:
+            return label
+        slug = re.sub(r"[^a-z0-9]", "-", label.lower())
+        anchor = f"component-type-{slug}" if is_comp else f"file-format-{slug}"
+        return f"<a href='{repository_url}#{anchor}'>{label}</a>"
+
+    node_order = {name: i for i, name in enumerate(order)}
+
+    sorted_nodes = sorted(G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order)))
+    node_lines = []
+    for node_id, attrs in sorted_nodes:
+        label = make_label(node_id, attrs.get("label", node_id), attrs.get("is_comp", False))
+        cid = clean_id(node_id)
+        if attrs.get("is_comp", False):
+            node_lines.append(f'  {cid}[/"{label}"/]')
+        else:
+            node_lines.append(f'  {cid}("{label}")')
+
+    sorted_edges = sorted(
+        G.edges(data=True),
+        key=lambda e: (node_order.get(e[0], len(order)), node_order.get(e[1], len(order))),
+    )
+    edge_lines = []
+    for from_node, to_node, attrs in sorted_edges:
+        from_to = attrs.get("from_to", "comp_to_file")
+        required = attrs.get("required", True)
+        if from_to == "file_to_comp":
+            edge_type = "---" if required else "-.-"
+        else:
+            edge_type = "-->" if required else ".->"
+        edge_lines.append(f"  {clean_id(from_node)}{edge_type}{clean_id(to_node)}")
+
+    return "\n".join([
+        "```mermaid",
+        "flowchart TB",
+        *node_lines,
+        *edge_lines,
+        "```",
+    ])
+
+
+def _render_task_parts(task_metadata: dict) -> list[str]:
+    from .render_component_spec import render_component_spec
+    from .render_file_format import render_file_format
+
+    parts = []
+    for name in task_metadata["task_graph_order"]:
+        if name in task_metadata["comps"]:
+            parts.append(render_component_spec(task_metadata["comps"][name]))
+        elif name in task_metadata["files"]:
+            parts.append(render_file_format(task_metadata["files"][name]))
+    return parts
+
+
+def _render_instructions(task_metadata: dict) -> str:
+    proj_name = task_metadata["proj_conf"].get("name", "")
+    return "\n".join([
+        "### Installation",
+        "",
+        "You need to have Docker, Java, and Viash installed. Follow",
+        "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)",
+        "to install the required dependencies.",
+        "",
+        "### Add a method",
+        "",
+        "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.",
+        "",
+        "### Initial setup",
+        "",
+        "To get started, you can run the following commands:",
+        "",
+        "```bash",
+        f"git clone git@github.com:openproblems-bio/{proj_name}.git",
+        "",
+        f"cd {proj_name}",
+        "",
+        "# initialise submodule",
+        "scripts/init_submodule.sh",
+        "",
+        "# download resources",
+        "scripts/download_resources.sh",
+        "```",
+        "",
+        "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:",
+        "",
+        "```bash",
+        "viash ns build --parallel --setup cachedbuild",
+        "",
+        "scripts/run_benchmark.sh",
+        "```",
+        "",
+        "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:",
+        "",
+        "```bash",
+        "viash ns test --parallel",
+        "```",
+        "",
+        "Optionally, you can provide the `--query` argument to test only a subset of components:",
+        "",
+        "```bash",
+        "viash ns test --parallel --query 'component_name'",
+        "```",
+    ])
diff --git a/packages/python/openproblems/tests/data/example_project/_viash.yaml b/packages/python/openproblems/tests/data/example_project/_viash.yaml
new file mode 100644
index 0000000..c171bca
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/_viash.yaml
@@ -0,0 +1,88 @@
+viash_version: 0.9.4
+
+# Step 1: Change the name of the task.
+# example: task_name_of_this_task
+name: task_template
+organization: openproblems-bio
+version: dev
+
+license: MIT
+# Step 2: Add keywords to describe the task.
+keywords: [single-cell, openproblems, benchmark]
+# Step 3: Update the `task_template` to the name of the task from step 1.
+links:
+  issue_tracker: https://github.com/openproblems-bio/task_template/issues
+  repository: https://github.com/openproblems-bio/task_template
+  docker_registry: ghcr.io
+
+
+# Step 4: Update the label, summary and description.
+# A unique, human-readable, short label. Used for creating summary tables and visualisations.
+label: Template
+summary: A one sentence summary of purpose and methodology. Used for creating an overview tables.
+description: |
+  Provide a clear and concise description of your task, detailing the specific problem it aims
+  to solve. Outline the input data types, the expected output, and any assumptions or constraints.
+  Be sure to explain any terminology or concepts that are essential for understanding the task.
+
+  Explain the motivation behind your proposed task. Describe the biological or computational 
+  problem you aim to address and why it's important. Discuss the current state of research in
+  this area and any gaps or challenges that your task could help address. This section 
+  should convince readers of the significance and relevance of your task.
+
+# A list of references to relevant literature. Each reference should be a DOI or a bibtex entry
+references:
+  doi:
+    - 10.21203/rs.3.rs-4181617/v1
+  # bibtex:
+  #   - |
+  #     @article{doe_2021_template,
+  #       doi = {10.21203/rs.3.rs-4181617/v1},
+  #       url = {https://doi.org/10.21203/rs.3.rs-4181617/v1},
+  #       author = {Doe, John},
+  #       title = {A template for creating new tasks},
+  #       publisher = {Research Square},
+  #       year = {2021},
+  #     }
+  
+info:
+  image: The name of the image file to use for the component on the website.
+  # Step 5: Replace the task_template to the name of the task.
+  test_resources:
+    - type: s3
+      path: s3://openproblems-data/resources_test/common/
+      dest: resources_test/common
+    - type: s3
+      path: s3://openproblems-data/resources_test/task_template/
+      dest: resources_test/task_template
+
+# Step 6: Update the authors of the task.
+authors: 
+  # Full name of the author, usually in the name of FirstName MiddleName LastName.
+  - name: John Doe
+    # Role of the author. Possible values:
+    # 
+    # * `"author"`: Authors who have made substantial contributions to the component.
+    # * `"maintainer"`: The maintainer of the component.
+    # * `"contributor"`: Authors who have made smaller contributions (such as code patches etc.).
+    roles: [ "author", "maintainer" ]
+    # Additional information on the author
+    info: 
+      github: johndoe
+      orcid: 0000-0000-0000-0000
+      email: john@doe.me
+      twitter: johndoe
+      linkedin: johndoe
+
+# Step 7: Remove all of the comments of the steps you completed
+
+config_mods: |
+  .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
+
+repositories:
+  - name: openproblems
+    type: github
+    repo: openproblems-bio/openproblems
+    tag: build/main # Step 8: Set this to the latest release
+
+# Step 9: High five yourself!
diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml
new file mode 100644
index 0000000..f637aed
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml
@@ -0,0 +1,37 @@
+namespace: control_methods
+info:
+  type: control_method
+  type_info:
+    label: Control Method
+    summary: Quality control methods for verifying the pipeline.
+    description: |
+      This folder contains control components for the task. 
+      These components have the same interface as the regular methods
+      but also receive the solution object as input. It serves as a
+      starting point to test the relative accuracy of new methods in
+      the task, and also as a quality control for the metrics defined
+      in the task.
+arguments:
+  - name: --input_train
+    __merge__: file_train.yaml
+    required: true
+    direction: input
+  - name: --input_test
+    __merge__: file_test.yaml
+    required: true
+    direction: input
+  - name: "--input_solution"
+    __merge__: file_solution.yaml
+    direction: input
+    required: true
+  - name: --output
+    __merge__: file_prediction.yaml
+    required: true
+    direction: output
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - type: python_script
+    path: /common/component_tests/check_config.py
+  - path: /resources_test/task_template/cxg_mouse_pancreas_atlas
+    dest: resources_test/task_template/cxg_mouse_pancreas_atlas
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml
new file mode 100644
index 0000000..1ed53bd
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml
@@ -0,0 +1,31 @@
+namespace: "data_processors"
+info:
+  type: data_processor
+  type_info:
+    label: Data processor
+    summary: A data processor.
+    description: |
+      A component for processing a Common Dataset into a task-specific dataset.
+arguments:
+  - name: "--input"
+    __merge__: file_common_dataset.yaml
+    direction: input
+    required: true
+  - name: "--output_train"
+    __merge__: file_train.yaml
+    direction: output
+    required: true
+  - name: "--output_test"
+    __merge__: file_test.yaml
+    direction: output
+    required: true
+  - name: "--output_solution"
+    __merge__: file_solution.yaml
+    direction: output
+    required: true
+test_resources:
+  - path: /resources_test/common/cxg_mouse_pancreas_atlas
+    dest: resources_test/common/cxg_mouse_pancreas_atlas
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+      
diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml
new file mode 100644
index 0000000..3a93846
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml
@@ -0,0 +1,28 @@
+namespace: "methods"
+info:
+  type: method
+  type_info:
+    label: Method
+    summary: A method.
+    description: |
+      A method to predict the task effects.
+arguments:
+  - name: --input_train
+    __merge__: file_train.yaml
+    required: true
+    direction: input
+  - name: "--input_test"
+    __merge__: file_test.yaml
+    direction: input
+    required: true
+  - name: --output
+    __merge__: file_prediction.yaml
+    required: true
+    direction: output
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - type: python_script
+    path: /common/component_tests/check_config.py
+  - path: /resources_test/task_template/cxg_mouse_pancreas_atlas
+    dest: resources_test/task_template/cxg_mouse_pancreas_atlas
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml
new file mode 100644
index 0000000..1c76a3d
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml
@@ -0,0 +1,28 @@
+namespace: "metrics"
+info:
+  type: metric
+  type_info:
+    label: Metric
+    summary: A task template metric.
+    description: |
+      A metric for evaluating method predictions.
+arguments:
+  - name: "--input_solution"
+    __merge__: file_solution.yaml
+    direction: input
+    required: true
+  - name: "--input_prediction"
+    __merge__: file_prediction.yaml
+    direction: input
+    required: true
+  - name: "--output"
+    __merge__: file_score.yaml
+    direction: output
+    required: true
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - type: python_script
+    path: /common/component_tests/check_config.py
+  - path: /resources_test/task_template/cxg_mouse_pancreas_atlas
+    dest: resources_test/task_template/cxg_mouse_pancreas_atlas
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml b/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml
new file mode 100644
index 0000000..e8a74a0
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml
@@ -0,0 +1,72 @@
+type: file
+example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad"
+label: "Common Dataset"
+summary: A subset of the common dataset.
+info:
+  format:
+    type: h5ad
+    layers:
+      - type: integer
+        name: counts
+        description: Raw counts
+        required: true
+      - type: double
+        name: normalized
+        description: Normalized expression values
+        required: true
+    obs:
+      - type: string
+        name: cell_type
+        description: Cell type information
+        required: true
+      - type: string
+        name: batch
+        description: Batch information
+        required: true
+    var:
+      - type: boolean
+        name: hvg
+        description: Whether or not the feature is considered to be a 'highly variable gene'
+        required: true
+      - type: double
+        name: hvg_score
+        description: A ranking of the features by hvg.
+        required: true
+    obsm:
+      - type: double
+        name: X_pca
+        description: The resulting PCA embedding.
+        required: true
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - name: dataset_name
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - type: string
+        name: dataset_url
+        description: Link to the original source of the dataset.
+        required: false
+      - name: dataset_reference
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: dataset_summary
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: dataset_description
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml b/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml
new file mode 100644
index 0000000..26068ab
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml
@@ -0,0 +1,26 @@
+#TODO: Change to the required and/or optional fields of the anndata
+type: file
+example: "resources_test/task_template/cxg_mouse_pancreas_atlas/prediction.h5ad"
+label: "Predicted data"
+summary: A predicted dataset as output by a method.
+info:
+  format:
+    type: h5ad
+    obs:
+      - type: string
+        name: label_pred
+        description: Predicted labels for the test cells.
+        required: true
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
+      - type: string
+        name: method_id
+        description: "A unique identifier for the method"
+        required: true
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_score.yaml b/packages/python/openproblems/tests/data/example_project/api/file_score.yaml
new file mode 100644
index 0000000..8bdad65
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_score.yaml
@@ -0,0 +1,30 @@
+type: file
+example: "resources_test/task_template/cxg_mouse_pancreas_atlas/score.h5ad"
+label: Score
+summary: "File indicating the score of a metric."
+info:
+  format:
+    type: h5ad
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
+      - type: string
+        name: method_id
+        description: "A unique identifier for the method"
+        required: true
+      - type: string
+        name: metric_ids
+        description: "One or more unique metric identifiers"
+        multiple: true
+        required: true
+      - type: double
+        name: metric_values
+        description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'."
+        multiple: true
+        required: true
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml b/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml
new file mode 100644
index 0000000..d2f6200
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml
@@ -0,0 +1,73 @@
+#TODO: Change to the required and/or optional fields of the anndata
+type: file
+example: "resources_test/task_template/cxg_mouse_pancreas_atlas/solution.h5ad"
+label: "Solution"
+summary: "The solution for the test data"
+info:
+  format:
+    type: h5ad
+    layers: 
+      - type: integer
+        name: counts
+        description: Raw counts
+        required: true
+      - type: double
+        name: normalized
+        description: Normalized counts
+        required: true
+    obs:
+      - type: string
+        name: label
+        description: Ground truth cell type labels
+        required: true
+      - type: string
+        name: batch
+        description: Batch information
+        required: true
+    var:
+      - type: boolean
+        name: hvg
+        description: Whether or not the feature is considered to be a 'highly variable gene'
+        required: true
+      - type: double
+        name: hvg_score
+        description: A ranking of the features by hvg.
+        required: true
+    obsm:
+      - type: double
+        name: X_pca
+        description: The resulting PCA embedding.
+        required: true
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - name: dataset_name
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - type: string
+        name: dataset_url
+        description: Link to the original source of the dataset.
+        required: false
+      - name: dataset_reference
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: dataset_summary
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: dataset_description
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_test.yaml b/packages/python/openproblems/tests/data/example_project/api/file_test.yaml
new file mode 100644
index 0000000..cb9d9a6
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_test.yaml
@@ -0,0 +1,45 @@
+#TODO: Change to the required and/or optional fields of the anndata
+type: file
+example: "resources_test/task_template/cxg_mouse_pancreas_atlas/test.h5ad"
+label: "Test data"
+summary: The subset of molecules used for the test dataset
+info:
+  format:
+    type: h5ad
+    layers: 
+      - type: integer
+        name: counts
+        description: Raw counts
+        required: true
+      - type: double
+        name: normalized
+        description: Normalized counts
+        required: true
+    obs:
+      - type: string
+        name: batch
+        description: Batch information
+        required: true
+    var:
+      - type: boolean
+        name: hvg
+        description: Whether or not the feature is considered to be a 'highly variable gene'
+        required: true
+      - type: double
+        name: hvg_score
+        description: A ranking of the features by hvg.
+        required: true
+    obsm:
+      - type: double
+        name: X_pca
+        description: The resulting PCA embedding.
+        required: true
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/data/example_project/api/file_train.yaml b/packages/python/openproblems/tests/data/example_project/api/file_train.yaml
new file mode 100644
index 0000000..c01eda5
--- /dev/null
+++ b/packages/python/openproblems/tests/data/example_project/api/file_train.yaml
@@ -0,0 +1,49 @@
+#TODO: Change to the required and/or optional fields of the anndata
+type: file
+example: "resources_test/task_template/cxg_mouse_pancreas_atlas/train.h5ad"
+label: "Training data"
+summary: "The training data in h5ad format"
+info:
+  format:
+    type: h5ad
+    layers: 
+      - type: integer
+        name: counts
+        description: Raw counts
+        required: true
+      - type: double
+        name: normalized
+        description: Normalized counts
+        required: true
+    obs:
+      - type: string
+        name: label
+        description: Ground truth cell type labels
+        required: true
+      - type: string
+        name: batch
+        description: Batch information
+        required: true
+    var:
+      - type: boolean
+        name: hvg
+        description: Whether or not the feature is considered to be a 'highly variable gene'
+        required: true
+      - type: double
+        name: hvg_score
+        description: A ranking of the features by hvg.
+        required: true
+    obsm:
+      - type: double
+        name: X_pca
+        description: The resulting PCA embedding.
+        required: true
+    uns:
+      - type: string
+        name: dataset_id
+        description: "A unique identifier for the dataset"
+        required: true
+      - type: string
+        name: normalization_id
+        description: "Which normalization was used"
+        required: true
\ No newline at end of file
diff --git a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py
new file mode 100644
index 0000000..2ad4fc8
--- /dev/null
+++ b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py
@@ -0,0 +1,78 @@
+import os
+import pytest
+
+EXAMPLE_PROJECT = os.path.normpath(os.path.join(
+    os.path.dirname(__file__),
+    "data/example_project",
+))
+
+
+@pytest.fixture(scope="module")
+def task_metadata():
+    from openproblems.project.docs import read_task_metadata
+    return read_task_metadata(EXAMPLE_PROJECT)
+
+
+def test_read_task_metadata_keys(task_metadata):
+    for key in ("proj_path", "proj_conf", "files", "comps", "task_graph", "task_graph_order"):
+        assert key in task_metadata
+
+
+def test_read_task_metadata_graph_nodes(task_metadata):
+    G = task_metadata["task_graph"]
+    assert "comp_method" in G.nodes
+    assert "comp_metric" in G.nodes
+    assert "file_train" in G.nodes
+    assert "file_prediction" in G.nodes
+
+
+def test_read_task_metadata_graph_edges(task_metadata):
+    G = task_metadata["task_graph"]
+    # file -> comp (input)
+    assert G.has_edge("file_train", "comp_method")
+    # comp -> file (output)
+    assert G.has_edge("comp_method", "file_prediction")
+
+
+def test_render_task_readme_qmd_structure(task_metadata):
+    from openproblems.project import render_task_readme_qmd
+    result = render_task_readme_qmd(task_metadata)
+
+    assert '---\ntitle: "Template"\nformat: gfm\n---' in result
+    assert "## Description" in result
+    assert "## Authors & contributors" in result
+    assert "## API" in result
+    assert "```mermaid" in result
+    assert "flowchart TB" in result
+    assert "```" in result
+
+
+def test_render_task_readme_qmd_components(task_metadata):
+    from openproblems.project import render_task_readme_qmd
+    result = render_task_readme_qmd(task_metadata)
+
+    assert "## Component type: Method" in result
+    assert "## Component type: Metric" in result
+
+
+def test_render_task_readme_qmd_file_formats(task_metadata):
+    from openproblems.project import render_task_readme_qmd
+    result = render_task_readme_qmd(task_metadata)
+
+    assert "## File format: Training data" in result
+    assert "## File format: Predicted data" in result
+
+
+def test_render_task_readme_qmd_instructions(task_metadata):
+    from openproblems.project import render_task_readme_qmd
+    without = render_task_readme_qmd(task_metadata, add_instructions=False)
+    with_inst = render_task_readme_qmd(task_metadata, add_instructions=True)
+
+    assert "### Installation" not in without
+    assert "### Installation" in with_inst
+
+
+def test_render_task_readme_qmd_from_path():
+    from openproblems.project import render_task_readme_qmd
+    result = render_task_readme_qmd(EXAMPLE_PROJECT)
+    assert "## API" in result
diff --git a/schemas/api_file_format.yaml b/schemas/api_file_format.yaml
index 26cce7d..eb9b793 100644
--- a/schemas/api_file_format.yaml
+++ b/schemas/api_file_format.yaml
@@ -25,7 +25,11 @@ properties:
       format:
         oneOf:
           - $ref: "schema_openproblems_definitions.yaml#/definitions/H5ADFormat"
+          - $ref: "schema_openproblems_definitions.yaml#/definitions/AnnDataHDF5Format"
+          - $ref: "schema_openproblems_definitions.yaml#/definitions/AnnDataZarrFormat"
           - $ref: "schema_openproblems_definitions.yaml#/definitions/CSVFormat"
           - $ref: "schema_openproblems_definitions.yaml#/definitions/TSVFormat"
           - $ref: "schema_openproblems_definitions.yaml#/definitions/ParquetFormat"
+          - $ref: "schema_openproblems_definitions.yaml#/definitions/JSONFormat"
+          - $ref: "schema_openproblems_definitions.yaml#/definitions/YAMLFormat"
           - $ref: "schema_openproblems_definitions.yaml#/definitions/SpatialDataZarrFormat"
diff --git a/schemas/schema_openproblems_definitions.yaml b/schemas/schema_openproblems_definitions.yaml
index 46e861a..7762f25 100644
--- a/schemas/schema_openproblems_definitions.yaml
+++ b/schemas/schema_openproblems_definitions.yaml
@@ -236,6 +236,86 @@ definitions:
     additionalProperties: false
     allOf: 
       - $ref: "#/definitions/AnnDataObject"
+  AnnDataHDF5Format:
+    type: object
+    properties:
+      type:
+        const: anndata_hdf5
+        description: The file format.
+      X:
+        type: object
+      layers:
+        type: array
+      obs:
+        type: array
+      obsm:
+        type: array
+      obsp:
+        type: array
+      var:
+        type: array
+      varm:
+        type: array
+      varp:
+        type: array
+      uns:
+        type: array
+    required: [type]
+    additionalProperties: false
+    allOf:
+      - $ref: "#/definitions/AnnDataObject"
+  AnnDataZarrFormat:
+    type: object
+    properties:
+      type:
+        const: anndata_zarr
+        description: The file format.
+      X:
+        type: object
+      layers:
+        type: array
+      obs:
+        type: array
+      obsm:
+        type: array
+      obsp:
+        type: array
+      var:
+        type: array
+      varm:
+        type: array
+      varp:
+        type: array
+      uns:
+        type: array
+    required: [type]
+    additionalProperties: false
+    allOf:
+      - $ref: "#/definitions/AnnDataObject"
+  JSONFormat:
+    type: object
+    required: [type]
+    additionalProperties: false
+    properties:
+      type:
+        const: json
+        description: The file format.
+      keys:
+        type: array
+        items:
+          $ref: "#/definitions/DataTypeSpec"
+  YAMLFormat:
+    type: object
+    required: [type]
+    additionalProperties: false
+    properties:
+      type:
+        const: yaml
+        description: The file format.
+      keys:
+        type: array
+        items:
+          $ref: "#/definitions/DataTypeSpec"
   CSVFormat:
     type: object
     required: [type]

From 41fc732d449d72e2ce72b30040b473266297e14a Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 20 Apr 2026 14:18:04 +0200
Subject: [PATCH 2/6] apply black formatting and fix f-string syntax error

---
 .../openproblems/src/openproblems/__init__.py |   4 +-
 .../project/component_tests/check_config.py   |  58 ++++++--
 .../component_tests/run_and_check_output.py   |  42 +++---
 .../openproblems/project/docs/_markdown.py    |   9 +-
 .../project/docs/read_component_spec.py       |  33 +++--
 .../project/docs/read_file_format.py          |  62 +++++---
 .../project/docs/read_task_config.py          |   1 +
 .../project/docs/render_component_spec.py     |  17 ++-
 .../project/docs/render_file_format.py        |  98 +++++++++----
 .../project/docs/render_task_readme_qmd.py    | 132 ++++++++++--------
 .../openproblems/project/find_project_root.py |   7 +-
 .../openproblems/project/read_nested_yaml.py  |  33 +++--
 .../openproblems/project/read_viash_config.py |  13 +-
 .../src/openproblems/project/resolve_path.py  |   8 +-
 .../src/openproblems/utils/__init__.py        |   5 +-
 .../src/openproblems/utils/strip_margin.py    |  37 ++---
 .../tests/test_docs_render_task_readme_qmd.py |  25 +++-
 .../tests/test_project_find_project_root.py   |  17 +--
 18 files changed, 383 insertions(+), 218 deletions(-)

diff --git a/packages/python/openproblems/src/openproblems/__init__.py b/packages/python/openproblems/src/openproblems/__init__.py
index 84bba52..ca915bb 100644
--- a/packages/python/openproblems/src/openproblems/__init__.py
+++ b/packages/python/openproblems/src/openproblems/__init__.py
@@ -2,6 +2,6 @@
 from . import utils
 
 __all__ = [
-  "project",
-  "utils",
+    "project",
+    "utils",
 ]
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
index dd45b49..181fcfa 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
@@ -37,13 +37,17 @@ def check_references(references: Dict[str, Union[str, List[str]]]) -> None:
     doi = references.get("doi")
     bibtex = references.get("bibtex")
 
-    assert doi or bibtex, "One of .references.doi or .references.bibtex should be defined"
+    assert (
+        doi or bibtex
+    ), "One of .references.doi or .references.bibtex should be defined"
 
     if doi:
         if not isinstance(doi, list):
             doi = [doi]
         for d in doi:
-            assert re.match(r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d), f"Invalid DOI format: {doi}"
+            assert re.match(
+                r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d
+            ), f"Invalid DOI format: {doi}"
             assert check_url(f"https://doi.org/{d}"), f"DOI '{d}' is not reachable"
 
     if bibtex:
@@ -53,7 +57,9 @@ def check_references(references: Dict[str, Union[str, List[str]]]) -> None:
             assert re.match(r"^@.*{.*", b), f"Invalid bibtex format: {b}"
 
 
-def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = []) -> None:
+def check_links(
+    links: Dict[str, Union[str, List[str]]], required: List[str] = []
+) -> None:
     if not links:
         return
 
@@ -62,7 +68,9 @@ def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = [
 
     for link_type, link in links.items():
         if link_type != "docker_registry":
-            assert check_url(link), f"Link .links.{link_type} URL '{link}' is not reachable"
+            assert check_url(
+                link
+            ), f"Link .links.{link_type} URL '{link}' is not reachable"
 
 
 def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None:
@@ -79,7 +87,9 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None:
             value = this_config.get(field) or value
         assert value, f"Metadata field '{field}' is not defined"
         assert "FILL IN:" not in value, f"Metadata field '{field}' not filled in"
-        assert len(value) <= max_length, f"Metadata field '{field}' should not exceed {max_length} characters"
+        assert (
+            len(value) <= max_length
+        ), f"Metadata field '{field}' should not exceed {max_length} characters"
 
     links = this_info.get("links") or this_config.get("links") or {}
     required_links: List[str] = []
@@ -117,7 +127,9 @@ def run_check_config(meta: dict) -> None:
 
     print("Check .info.type", flush=True)
     expected_types = ["method", "control_method", "metric"]
-    assert comp_type in expected_types, ".info.type should be equal to 'method' or 'control_method'"
+    assert (
+        comp_type in expected_types
+    ), ".info.type should be equal to 'method' or 'control_method'"
 
     print("Check component metadata", flush=True)
     if comp_type == "metric":
@@ -130,14 +142,26 @@ def run_check_config(meta: dict) -> None:
 
     if "preferred_normalization" in info:
         print("Checking contents of .info.preferred_normalization", flush=True)
-        norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"]
+        norm_methods = [
+            "log_cpm",
+            "log_cp10k",
+            "counts",
+            "log_scran_pooling",
+            "sqrt_cpm",
+            "sqrt_cp10k",
+            "l1_sqrt",
+        ]
         assert info["preferred_normalization"] in norm_methods, (
-            ".info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'."
+            ".info['preferred_normalization'] not one of '"
+            + "', '".join(norm_methods)
+            + "'."
         )
 
     if "variants" in info:
         print("Checking contents of .info.variants", flush=True)
-        arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + ["preferred_normalization"]
+        arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + [
+            "preferred_normalization"
+        ]
         for paramset_id, paramset in info["variants"].items():
             if paramset:
                 for arg_id in paramset:
@@ -155,12 +179,20 @@ def run_check_config(meta: dict) -> None:
     )
 
     assert nextflow_runner, ".runners does not contain a nextflow runner"
-    assert nextflow_runner.get("directives"), "directives not a field in nextflow runner"
+    assert nextflow_runner.get(
+        "directives"
+    ), "directives not a field in nextflow runner"
     nextflow_labels = nextflow_runner["directives"].get("label")
     assert nextflow_labels, "label not a field in nextflow runner directives"
 
-    assert [label for label in nextflow_labels if label in TIME_LABELS], "time label not filled in"
-    assert [label for label in nextflow_labels if label in MEM_LABELS], "mem label not filled in"
-    assert [label for label in nextflow_labels if label in CPU_LABELS], "cpu label not filled in"
+    assert [
+        label for label in nextflow_labels if label in TIME_LABELS
+    ], "time label not filled in"
+    assert [
+        label for label in nextflow_labels if label in MEM_LABELS
+    ], "mem label not filled in"
+    assert [
+        label for label in nextflow_labels if label in CPU_LABELS
+    ], "cpu label not filled in"
 
     print("All checks succeeded!", flush=True)
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
index 3484abe..2c96cb3 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
@@ -9,7 +9,9 @@ def run_component(cmd: list) -> None:
 
     print(">> Running script as test", flush=True)
     out = subprocess.run(cmd)
-    assert out.returncode == 0, f"Script exited with an error. Return code: {out.returncode}"
+    assert (
+        out.returncode == 0
+    ), f"Script exited with an error. Return code: {out.returncode}"
 
 
 def check_input_files(arguments: list) -> None:
@@ -19,9 +21,9 @@ def check_input_files(arguments: list) -> None:
     print(">> Checking whether input files exist", flush=True)
     for arg in arguments:
         if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]:
-            assert not arg["must_exist"] or path.exists(arg["value"]), (
-                f"Input file '{arg['value']}' does not exist"
-            )
+            assert not arg["must_exist"] or path.exists(
+                arg["value"]
+            ), f"Input file '{arg['value']}' does not exist"
 
 
 def check_output_files(arguments: list) -> None:
@@ -31,9 +33,9 @@ def check_output_files(arguments: list) -> None:
     print(">> Checking whether output file exists", flush=True)
     for arg in arguments:
         if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]:
-            assert not arg["must_exist"] or path.exists(arg["value"]), (
-                f"Output file '{arg['value']}' does not exist"
-            )
+            assert not arg["must_exist"] or path.exists(
+                arg["value"]
+            ), f"Output file '{arg['value']}' does not exist"
 
     print(">> Reading output files and checking formats", flush=True)
     for arg in arguments:
@@ -134,9 +136,9 @@ def check_anndata(adata, format_spec: dict, label: str = "") -> None:
         else:
             for item in items:
                 if item.get("required", True):
-                    assert item["name"] in struc_x, (
-                        f"{label} is missing slot .{struc_name}['{item['name']}']"
-                    )
+                    assert (
+                        item["name"] in struc_x
+                    ), f"{label} is missing slot .{struc_name}['{item['name']}']"
 
 
 def check_dataframe(df, columns: list, label: str = "") -> None:
@@ -145,7 +147,9 @@ def check_dataframe(df, columns: list, label: str = "") -> None:
     """
     for item in columns:
         if item.get("required", True):
-            assert item["name"] in df.columns, f"{label} is missing column '{item['name']}'"
+            assert (
+                item["name"] in df.columns
+            ), f"{label} is missing column '{item['name']}'"
 
 
 def check_dictionary(data, arg: dict) -> None:
@@ -157,9 +161,9 @@ def check_dictionary(data, arg: dict) -> None:
     arg_keys = arg_format.get("keys") or arg_info.get("keys") or []
     for item in arg_keys:
         if item.get("required", True):
-            assert isinstance(data, dict) and item["name"] in data, (
-                f"File '{arg['value']}' is missing key '{item['name']}'"
-            )
+            assert (
+                isinstance(data, dict) and item["name"] in data
+            ), f"File '{arg['value']}' is missing key '{item['name']}'"
 
 
 def check_spatialdata(sdata, arg: dict) -> None:
@@ -175,9 +179,9 @@ def check_spatialdata(sdata, arg: dict) -> None:
         category_store = getattr(sdata, category, {})
         for item in items:
             if item.get("required", True):
-                assert item["name"] in category_store, (
-                    f"File '{arg['value']}' is missing {category}['{item['name']}']"
-                )
+                assert (
+                    item["name"] in category_store
+                ), f"File '{arg['value']}' is missing {category}['{item['name']}']"
 
             elem_name = item["name"]
             if elem_name not in category_store:
@@ -191,7 +195,9 @@ def check_spatialdata(sdata, arg: dict) -> None:
                     f"File '{arg['value']}' {category}['{elem_name}']",
                 )
             elif category == "tables":
-                check_anndata(element, item, f"File '{arg['value']}' tables['{elem_name}']")
+                check_anndata(
+                    element, item, f"File '{arg['value']}' tables['{elem_name}']"
+                )
 
 
 def get_argument_sets(config: dict, resources_dir: str) -> dict:
diff --git a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py
index a0dae31..7be1b7e 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py
@@ -18,11 +18,10 @@ def format_markdown_table(headers, rows, col_widths=None):
     if col_widths is not None:
         sep_line = "|" + "".join(f":{'-' * w}|" for w in col_widths)
     else:
-        sep_line = "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |"
+        sep_line = (
+            "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |"
+        )
 
-    data_lines = [
-        "| " + " | ".join(str(cell) for cell in row) + " |"
-        for row in rows
-    ]
+    data_lines = ["| " + " | ".join(str(cell) for cell in row) + " |" for row in rows]
 
     return "\n".join([header_line, sep_line] + data_lines)
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
index 47ea6f2..01dae58 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py
@@ -13,6 +13,7 @@ def read_component_spec(path: str) -> dict:
         A dict with keys ``info`` (dict) and ``args`` (list of dicts).
     """
     from .. import read_nested_yaml
+
     data = read_nested_yaml(path)
     return {
         "info": _process_info(data, path),
@@ -52,24 +53,30 @@ def _process_arguments(data: dict, path: str) -> list[dict]:
     for arg in arguments:
         arg_info = arg.get("info") or {}
         merge_ref = arg.get("__merge__")
-        parent = re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None
+        parent = (
+            re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None
+        )
 
         default = arg.get("default")
         example = arg.get("example")
         if isinstance(example, list):
             example = example[0] if example else None
 
-        result.append({
-            "file_name": file_name,
-            "arg_name": re.sub(r"^-+", "", arg.get("name", "")),
-            "type": arg.get("type", ""),
-            "direction": arg.get("direction") or "input",
-            "required": bool(arg.get("required")) if arg.get("required") is not None else False,
-            "default": str(default) if default is not None else None,
-            "example": str(example) if example is not None else None,
-            "description": arg.get("description") or arg_info.get("description"),
-            "summary": arg.get("summary") or arg_info.get("summary"),
-            "parent": parent,
-        })
+        result.append(
+            {
+                "file_name": file_name,
+                "arg_name": re.sub(r"^-+", "", arg.get("name", "")),
+                "type": arg.get("type", ""),
+                "direction": arg.get("direction") or "input",
+                "required": bool(arg.get("required"))
+                if arg.get("required") is not None
+                else False,
+                "default": str(default) if default is not None else None,
+                "example": str(example) if example is not None else None,
+                "description": arg.get("description") or arg_info.get("description"),
+                "summary": arg.get("summary") or arg_info.get("summary"),
+                "parent": parent,
+            }
+        )
 
     return result
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
index 1d1fa05..99fec60 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py
@@ -2,8 +2,25 @@
 import os
 import re
 
-ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"]
-SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"]
+ANNDATA_STRUCT_NAMES = [
+    "X",
+    "obs",
+    "var",
+    "obsm",
+    "obsp",
+    "varm",
+    "varp",
+    "layers",
+    "uns",
+]
+SPATIALDATA_ELEMENT_CATEGORIES = [
+    "images",
+    "labels",
+    "points",
+    "shapes",
+    "tables",
+    "coordinate_systems",
+]
 
 
 def read_file_format(path: str) -> dict:
@@ -17,6 +34,7 @@ def read_file_format(path: str) -> dict:
         (list of dicts) when the format type is known.
     """
     from .. import read_nested_yaml
+
     data = read_nested_yaml(path)
 
     out: dict = {"info": _process_info(data, path)}
@@ -70,17 +88,19 @@ def _process_h5ad(data: dict, path: str, format_type: str) -> list[dict]:
         if not isinstance(fields, list):
             fields = [fields]
         for field in fields:
-            rows.append({
-                "file_name": file_name,
-                "struct": struct_name,
-                "name": field.get("name", struct_name),
-                "type": field.get("type", ""),
-                "required": field.get("required", True),
-                "multiple": field.get("multiple", False),
-                "description": field.get("description"),
-                "summary": field.get("summary"),
-                "data_type": format_type,
-            })
+            rows.append(
+                {
+                    "file_name": file_name,
+                    "struct": struct_name,
+                    "name": field.get("name", struct_name),
+                    "type": field.get("type", ""),
+                    "required": field.get("required", True),
+                    "multiple": field.get("multiple", False),
+                    "description": field.get("description"),
+                    "summary": field.get("summary"),
+                    "data_type": format_type,
+                }
+            )
     return rows
 
 
@@ -155,13 +175,15 @@ def _process_spatialdata(data: dict, path: str) -> list[dict]:
                     if not isinstance(fields, list):
                         fields = [fields]
                     for f in fields:
-                        slots.append({
-                            "struct": struct_name,
-                            "name": f.get("name", struct_name),
-                            "type": f.get("type", ""),
-                            "required": f.get("required", True),
-                            "description": f.get("description"),
-                        })
+                        slots.append(
+                            {
+                                "struct": struct_name,
+                                "name": f.get("name", struct_name),
+                                "type": f.get("type", ""),
+                                "required": f.get("required", True),
+                                "description": f.get("description"),
+                            }
+                        )
                 row["anndata_slots"] = slots
             rows.append(row)
     return rows
diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
index 78d460a..8b6e226 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py
@@ -8,4 +8,5 @@ def read_task_config(path):
         The parsed config as a dict.
     """
     from .. import read_nested_yaml
+
     return read_nested_yaml(path)
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
index 60360df..5e9719e 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py
@@ -15,6 +15,7 @@ def render_component_spec(spec: dict | str) -> str:
     """
     if isinstance(spec, str):
         from .read_component_spec import read_component_spec
+
         spec = read_component_spec(spec)
 
     info = spec["info"]
@@ -55,10 +56,14 @@ def _format_arguments(args: list[dict]) -> str:
         default = arg.get("default")
         default_str = f" Default: `{default}`." if default is not None else ""
 
-        rows.append([
-            f"`--{arg['arg_name']}`",
-            f"`{arg.get('type', '')}`",
-            f"{tag_str}{summary}.{default_str}",
-        ])
+        rows.append(
+            [
+                f"`--{arg['arg_name']}`",
+                f"`{arg.get('type', '')}`",
+                f"{tag_str}{summary}.{default_str}",
+            ]
+        )
 
-    return format_markdown_table(["Name", "Type", "Description"], rows, col_widths=[25, 8, 60])
+    return format_markdown_table(
+        ["Name", "Type", "Description"], rows, col_widths=[25, 8, 60]
+    )
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
index ecab02b..7049233 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py
@@ -1,8 +1,25 @@
 from __future__ import annotations
 import re
 
-ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"]
-SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"]
+ANNDATA_STRUCT_NAMES = [
+    "X",
+    "obs",
+    "var",
+    "obsm",
+    "obsp",
+    "varm",
+    "varp",
+    "layers",
+    "uns",
+]
+SPATIALDATA_ELEMENT_CATEGORIES = [
+    "images",
+    "labels",
+    "points",
+    "shapes",
+    "tables",
+    "coordinate_systems",
+]
 
 
 def render_file_format(spec: dict | str) -> str:
@@ -18,6 +35,7 @@ def render_file_format(spec: dict | str) -> str:
     """
     if isinstance(spec, str):
         from .read_file_format import read_file_format
+
         spec = read_file_format(spec)
 
     info = spec["info"]
@@ -34,19 +52,21 @@ def render_file_format(spec: dict | str) -> str:
     if expected_format:
         format_example_lines = _render_format_example(spec)
         format_table_lines = _render_format_table(spec)
-        expected_format_str = "\n".join([
-            "Format:",
-            "",
-            ":::{.small}",
-            *format_example_lines,
-            ":::",
-            "",
-            "Data structure:",
-            "",
-            ":::{.small}",
-            *format_table_lines,
-            ":::",
-        ])
+        expected_format_str = "\n".join(
+            [
+                "Format:",
+                "",
+                ":::{.small}",
+                *format_example_lines,
+                ":::",
+                "",
+                "Data structure:",
+                "",
+                ":::{.small}",
+                *format_table_lines,
+                ":::",
+            ]
+        )
 
     parts = [
         f"## File format: {label}",
@@ -115,7 +135,9 @@ def _tag_str(row: dict) -> str:
         return f"(_{', '.join(tags)}_) " if tags else ""
 
     def _clean_desc(row: dict) -> str:
-        desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip(".")
+        desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip(
+            "."
+        )
         return desc
 
     if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"):
@@ -127,7 +149,11 @@ def _clean_desc(row: dict) -> str:
             ]
             for row in expected_format
         ]
-        return [format_markdown_table(["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+        return [
+            format_markdown_table(
+                ["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60]
+            )
+        ]
 
     if fmt_type in ("csv", "tsv", "parquet"):
         rows = [
@@ -138,7 +164,11 @@ def _clean_desc(row: dict) -> str:
             ]
             for row in expected_format
         ]
-        return [format_markdown_table(["Column", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+        return [
+            format_markdown_table(
+                ["Column", "Type", "Description"], rows, col_widths=[25, 8, 60]
+            )
+        ]
 
     if fmt_type in ("json", "yaml"):
         rows = [
@@ -149,7 +179,11 @@ def _clean_desc(row: dict) -> str:
             ]
             for row in expected_format
         ]
-        return [format_markdown_table(["Key", "Type", "Description"], rows, col_widths=[25, 8, 60])]
+        return [
+            format_markdown_table(
+                ["Key", "Type", "Description"], rows, col_widths=[25, 8, 60]
+            )
+        ]
 
     if fmt_type == "spatialdata_zarr":
         lines = []
@@ -169,7 +203,11 @@ def _clean_desc(row: dict) -> str:
                     [f'`{e["name"]}`', f"{_tag_str(e)}{_clean_desc(e)}."]
                     for e in elements
                 ]
-                lines.append(format_markdown_table(["Name", "Description"], elem_rows, col_widths=[25, 68]))
+                lines.append(
+                    format_markdown_table(
+                        ["Name", "Description"], elem_rows, col_widths=[25, 68]
+                    )
+                )
 
             elif cat in ("points", "shapes"):
                 for elem in elements:
@@ -179,12 +217,18 @@ def _clean_desc(row: dict) -> str:
                         [
                             f'`{c["name"]}`',
                             f'`{c.get("type", "")}`',
-                            f"{_tag_str(c)}{re.sub(r' *\n *', ' ', (c.get('description') or '').strip()).rstrip('.')}.",
+                            f"{_tag_str(c)}{_clean_desc(c)}.",
                         ]
                         for c in (elem.get("columns") or [])
                     ]
                     if col_rows:
-                        lines.append(format_markdown_table(["Column", "Type", "Description"], col_rows, col_widths=[25, 8, 60]))
+                        lines.append(
+                            format_markdown_table(
+                                ["Column", "Type", "Description"],
+                                col_rows,
+                                col_widths=[25, 8, 60],
+                            )
+                        )
 
             elif cat == "tables":
                 for elem in elements:
@@ -194,12 +238,18 @@ def _clean_desc(row: dict) -> str:
                         [
                             f'`{s["struct"]}["{s["name"]}"]`',
                             f'`{s.get("type", "")}`',
-                            f"{_tag_str(s)}{re.sub(r' *\n *', ' ', (s.get('description') or '').strip()).rstrip('.')}.",
+                            f"{_tag_str(s)}{_clean_desc(s)}.",
                         ]
                         for s in (elem.get("anndata_slots") or [])
                     ]
                     if slot_rows:
-                        lines.append(format_markdown_table(["Slot", "Type", "Description"], slot_rows, col_widths=[25, 8, 60]))
+                        lines.append(
+                            format_markdown_table(
+                                ["Slot", "Type", "Description"],
+                                slot_rows,
+                                col_widths=[25, 8, 60],
+                            )
+                        )
 
             lines.append("")
 
diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
index 4c94c3e..3c2601d 100644
--- a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
+++ b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py
@@ -2,7 +2,9 @@
 import re
 
 
-def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = False) -> str:
+def render_task_readme_qmd(
+    task_metadata: dict | str, add_instructions: bool = False
+) -> str:
     """Render the ``README.qmd`` for a task.
 
     Args:
@@ -16,6 +18,7 @@ def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = F
     """
     if isinstance(task_metadata, str):
         from .read_task_metadata import read_task_metadata
+
         task_metadata = read_task_metadata(task_metadata)
 
     proj_conf = task_metadata["proj_conf"]
@@ -79,7 +82,7 @@ def _render_authors(task_metadata: dict) -> str:
     # Collect columns: name, roles, then any info keys
     all_keys: list[str] = ["name", "roles"]
     for aut in authors:
-        for key in (aut.get("info") or {}):
+        for key in aut.get("info") or {}:
             if key not in all_keys:
                 all_keys.append(key)
 
@@ -113,10 +116,14 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str:
 
     node_order = {name: i for i, name in enumerate(order)}
 
-    sorted_nodes = sorted(G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order)))
+    sorted_nodes = sorted(
+        G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order))
+    )
     node_lines = []
     for node_id, attrs in sorted_nodes:
-        label = make_label(node_id, attrs.get("label", node_id), attrs.get("is_comp", False))
+        label = make_label(
+            node_id, attrs.get("label", node_id), attrs.get("is_comp", False)
+        )
         cid = clean_id(node_id)
         if attrs.get("is_comp", False):
             node_lines.append(f'  {cid}[/"{label}"/]')
@@ -125,7 +132,10 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str:
 
     sorted_edges = sorted(
         G.edges(data=True),
-        key=lambda e: (node_order.get(e[0], len(order)), node_order.get(e[1], len(order))),
+        key=lambda e: (
+            node_order.get(e[0], len(order)),
+            node_order.get(e[1], len(order)),
+        ),
     )
     edge_lines = []
     for from_node, to_node, attrs in sorted_edges:
@@ -137,13 +147,15 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str:
             edge_type = "-->" if required else ".->"
         edge_lines.append(f"  {clean_id(from_node)}{edge_type}{clean_id(to_node)}")
 
-    return "\n".join([
-        "```mermaid",
-        "flowchart TB",
-        *node_lines,
-        *edge_lines,
-        "```",
-    ])
+    return "\n".join(
+        [
+            "```mermaid",
+            "flowchart TB",
+            *node_lines,
+            *edge_lines,
+            "```",
+        ]
+    )
 
 
 def _render_task_parts(task_metadata: dict) -> list[str]:
@@ -161,50 +173,52 @@ def _render_task_parts(task_metadata: dict) -> list[str]:
 
 def _render_instructions(task_metadata: dict) -> str:
     proj_name = task_metadata["proj_conf"].get("name", "")
-    return "\n".join([
-        "### Installation",
-        "",
-        "You need to have Docker, Java, and Viash installed. Follow",
-        "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)",
-        "to install the required dependencies.",
-        "",
-        "### Add a method",
-        "",
-        "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.",
-        "",
-        "### Initial setup",
-        "",
-        "To get started, you can run the following commands:",
-        "",
-        "```bash",
-        f"git clone git@github.com:openproblems-bio/{proj_name}.git",
-        "",
-        f"cd {proj_name}",
-        "",
-        "# initialise submodule",
-        "scripts/init_submodule.sh",
-        "",
-        "# download resources",
-        "scripts/download_resources.sh",
-        "```",
-        "",
-        "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:",
-        "",
-        "```bash",
-        "viash ns build --parallel --setup cachedbuild",
-        "",
-        "scripts/run_benchmark.sh",
-        "```",
-        "",
-        "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:",
-        "",
-        "```bash",
-        "viash ns test --parallel",
-        "```",
-        "",
-        "Optionally, you can provide the `--query` argument to test only a subset of components:",
-        "",
-        "```bash",
-        "viash ns test --parallel --query 'component_name'",
-        "```",
-    ])
+    return "\n".join(
+        [
+            "### Installation",
+            "",
+            "You need to have Docker, Java, and Viash installed. Follow",
+            "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)",
+            "to install the required dependencies.",
+            "",
+            "### Add a method",
+            "",
+            "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.",
+            "",
+            "### Initial setup",
+            "",
+            "To get started, you can run the following commands:",
+            "",
+            "```bash",
+            f"git clone git@github.com:openproblems-bio/{proj_name}.git",
+            "",
+            f"cd {proj_name}",
+            "",
+            "# initialise submodule",
+            "scripts/init_submodule.sh",
+            "",
+            "# download resources",
+            "scripts/download_resources.sh",
+            "```",
+            "",
+            "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:",
+            "",
+            "```bash",
+            "viash ns build --parallel --setup cachedbuild",
+            "",
+            "scripts/run_benchmark.sh",
+            "```",
+            "",
+            "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:",
+            "",
+            "```bash",
+            "viash ns test --parallel",
+            "```",
+            "",
+            "Optionally, you can provide the `--query` argument to test only a subset of components:",
+            "",
+            "```bash",
+            "viash ns test --parallel --query 'component_name'",
+            "```",
+        ]
+    )
diff --git a/packages/python/openproblems/src/openproblems/project/find_project_root.py b/packages/python/openproblems/src/openproblems/project/find_project_root.py
index 700952b..ac29ef9 100644
--- a/packages/python/openproblems/src/openproblems/project/find_project_root.py
+++ b/packages/python/openproblems/src/openproblems/project/find_project_root.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+
 def find_project_root(path: str = ".") -> str | None:
     """
     Find the root of a Viash project
@@ -15,8 +16,8 @@ def find_project_root(path: str = ".") -> str | None:
     """
 
     import os
-    
-    path = os.path.abspath(path) 
+
+    path = os.path.abspath(path)
 
     while path != "/" and not os.path.exists(os.path.join(path, "_viash.yaml")):
         path = os.path.dirname(path)
@@ -24,4 +25,4 @@ def find_project_root(path: str = ".") -> str | None:
     if path == "/":
         return None
 
-    return path
\ No newline at end of file
+    return path
diff --git a/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py b/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py
index b750861..f3f13fd 100644
--- a/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py
+++ b/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+
 def read_nested_yaml(path: str, project_path: str | None = None) -> dict:
     """
     Read a nested YAML
@@ -29,10 +30,13 @@ def read_nested_yaml(path: str, project_path: str | None = None) -> dict:
             data = yaml.safe_load(f)
     except Exception as e:
         raise ValueError(f"Could not read {path}. Error: {e}")
-    
+
     return process_nested_yaml(data, data, path, project_path)
 
-def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str) -> dict:
+
+def process_nested_yaml(
+    data: any, root_data: dict, path: str, project_path: str
+) -> dict:
     """
     Process the merge keys in a YAML
 
@@ -53,11 +57,18 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str
     from ..utils.deep_merge import deep_merge
 
     if isinstance(data, dict):
-        processed_data = {k: process_nested_yaml(v, root_data, path, project_path) for k, v in data.items()}
+        processed_data = {
+            k: process_nested_yaml(v, root_data, path, project_path)
+            for k, v in data.items()
+        }
 
         new_data = {}
-        if "__merge__" in processed_data and not isinstance(processed_data["__merge__"], dict):
-            new_data_path = resolve_path(processed_data["__merge__"], project_path, os.path.dirname(path))
+        if "__merge__" in processed_data and not isinstance(
+            processed_data["__merge__"], dict
+        ):
+            new_data_path = resolve_path(
+                processed_data["__merge__"], project_path, os.path.dirname(path)
+            )
             new_data = read_nested_yaml(new_data_path, project_path)
         elif "$ref" in processed_data and not isinstance(processed_data["$ref"], dict):
             ref_parts = processed_data["$ref"].split("#")
@@ -65,7 +76,9 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str
             if ref_parts[0] == "":
                 x = root_data
             else:
-                new_data_path = resolve_path(ref_parts[0], project_path, os.path.dirname(path))
+                new_data_path = resolve_path(
+                    ref_parts[0], project_path, os.path.dirname(path)
+                )
                 new_data_path = os.path.normpath(new_data_path)
 
                 try:
@@ -73,7 +86,7 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str
                         x = yaml.safe_load(f)
                 except Exception as e:
                     raise ValueError(f"Could not read {new_data_path}. Error: {e}")
-                
+
             x_root = x
 
             ref_path_parts = ref_parts[1].split("/")
@@ -83,8 +96,10 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str
                 elif part in x:
                     x = x[part]
                 else:
-                    raise ValueError(f"Could not find {processed_data['$ref']} in {path}")
-                
+                    raise ValueError(
+                        f"Could not find {processed_data['$ref']} in {path}"
+                    )
+
             if ref_parts[0] == "":
                 new_data = x
             else:
diff --git a/packages/python/openproblems/src/openproblems/project/read_viash_config.py b/packages/python/openproblems/src/openproblems/project/read_viash_config.py
index 81c2e23..8e7ae86 100644
--- a/packages/python/openproblems/src/openproblems/project/read_viash_config.py
+++ b/packages/python/openproblems/src/openproblems/project/read_viash_config.py
@@ -2,6 +2,7 @@
 from .read_nested_yaml import read_nested_yaml
 from .find_project_root import find_project_root
 
+
 def read_viash_config(target_config_path, project_root_dir=None):
     """
     Process a Viash config file.
@@ -41,7 +42,9 @@ def read_viash_config(target_config_path, project_root_dir=None):
         # Fix 'build_info' if present
         if config["build_info"]:
             rel_config_dir = os.path.dirname(rel_target_config_path)
-            abs_build_dir = config["build_info"]["output"].replace(f"/{rel_config_dir}", "")
+            abs_build_dir = config["build_info"]["output"].replace(
+                f"/{rel_config_dir}", ""
+            )
 
             # Get platform types (assuming 'platforms' is a list of dicts)
             platform_types = [platform["type"] for platform in config["platforms"]]
@@ -66,11 +69,7 @@ def read_viash_config(target_config_path, project_root_dir=None):
                 )
                 config["build_info"]["runner"] = "executable"
                 config["build_info"]["engine"] = "|".join(
-                    [
-                        pt
-                        for pt in platform_types
-                        if pt in ["docker", "native"]
-                    ]
+                    [pt for pt in platform_types if pt in ["docker", "native"]]
                 )
 
         # Move 'functionality' to top level
@@ -87,7 +86,7 @@ def read_viash_config(target_config_path, project_root_dir=None):
                     for i, grp in enumerate(config["argument_groups"])
                     if grp["name"] == "Arguments"
                 ),
-                None
+                None,
             )
 
             if existing_ix is not None:
diff --git a/packages/python/openproblems/src/openproblems/project/resolve_path.py b/packages/python/openproblems/src/openproblems/project/resolve_path.py
index 7e2fb50..20b1e12 100644
--- a/packages/python/openproblems/src/openproblems/project/resolve_path.py
+++ b/packages/python/openproblems/src/openproblems/project/resolve_path.py
@@ -14,9 +14,9 @@ def resolve_path(path: str, project_path: str, parent_path: str) -> str:
 
     Returns:
         str: The resolved path
-      
+
     Example:
-      
+
       ```python
       project_path <- "/path/to/project"
       parent_path <- "/path/to/project/subdir"
@@ -28,9 +28,9 @@ def resolve_path(path: str, project_path: str, parent_path: str) -> str:
       # "/path/to/project/file.yaml"
       ```
     """
-    
+
     import os
-    
+
     if path.startswith("/"):
         return os.path.join(project_path, path)
     else:
diff --git a/packages/python/openproblems/src/openproblems/utils/__init__.py b/packages/python/openproblems/src/openproblems/utils/__init__.py
index e7d0600..7042aa3 100644
--- a/packages/python/openproblems/src/openproblems/utils/__init__.py
+++ b/packages/python/openproblems/src/openproblems/utils/__init__.py
@@ -1,7 +1,4 @@
 from .strip_margin import strip_margin
 from .deep_merge import deep_merge
 
-__all__ = [
-    "strip_margin",
-    "deep_merge"
-]
+__all__ = ["strip_margin", "deep_merge"]
diff --git a/packages/python/openproblems/src/openproblems/utils/strip_margin.py b/packages/python/openproblems/src/openproblems/utils/strip_margin.py
index 524fa7f..37a96e7 100644
--- a/packages/python/openproblems/src/openproblems/utils/strip_margin.py
+++ b/packages/python/openproblems/src/openproblems/utils/strip_margin.py
@@ -1,23 +1,24 @@
 def strip_margin(text: str, symbol: str = "\\|") -> str:
-  """
-  Strip margin from a string
+    """
+    Strip margin from a string
 
-  Args:
-      text (str): A character vector.
-      symbol (str): The margin symbol to strip.
-  
-  Returns:
-      str: A character vector with the margin stripped.
+    Args:
+        text (str): A character vector.
+        symbol (str): The margin symbol to strip.
 
-  Example:
+    Returns:
+        str: A character vector with the margin stripped.
 
-    ```python
-    strip_margin("
-      |hello_world:
-      |  this_is: "a yaml"
-      |")
-    ```
-  """
+    Example:
 
-  import re
-  return re.sub("(^|\n)[ \t]*" + symbol, "\\1", text)
+      ```python
+      strip_margin("
+        |hello_world:
+        |  this_is: "a yaml"
+        |")
+      ```
+    """
+
+    import re
+
+    return re.sub("(^|\n)[ \t]*" + symbol, "\\1", text)
diff --git a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py
index 2ad4fc8..c1ea848 100644
--- a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py
+++ b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py
@@ -1,20 +1,30 @@
 import os
 import pytest
 
-EXAMPLE_PROJECT = os.path.normpath(os.path.join(
-    os.path.dirname(__file__),
-    "data/example_project",
-))
+EXAMPLE_PROJECT = os.path.normpath(
+    os.path.join(
+        os.path.dirname(__file__),
+        "data/example_project",
+    )
+)
 
 
 @pytest.fixture(scope="module")
 def task_metadata():
     from openproblems.project.docs import read_task_metadata
+
     return read_task_metadata(EXAMPLE_PROJECT)
 
 
 def test_read_task_metadata_keys(task_metadata):
-    for key in ("proj_path", "proj_conf", "files", "comps", "task_graph", "task_graph_order"):
+    for key in (
+        "proj_path",
+        "proj_conf",
+        "files",
+        "comps",
+        "task_graph",
+        "task_graph_order",
+    ):
         assert key in task_metadata
 
 
@@ -36,6 +46,7 @@ def test_read_task_metadata_graph_edges(task_metadata):
 
 def test_render_task_readme_qmd_structure(task_metadata):
     from openproblems.project import render_task_readme_qmd
+
     result = render_task_readme_qmd(task_metadata)
 
     assert '---\ntitle: "Template"\nformat: gfm\n---' in result
@@ -49,6 +60,7 @@ def test_render_task_readme_qmd_structure(task_metadata):
 
 def test_render_task_readme_qmd_components(task_metadata):
     from openproblems.project import render_task_readme_qmd
+
     result = render_task_readme_qmd(task_metadata)
 
     assert "## Component type: Method" in result
@@ -57,6 +69,7 @@ def test_render_task_readme_qmd_components(task_metadata):
 
 def test_render_task_readme_qmd_file_formats(task_metadata):
     from openproblems.project import render_task_readme_qmd
+
     result = render_task_readme_qmd(task_metadata)
 
     assert "## File format: Training data" in result
@@ -65,6 +78,7 @@ def test_render_task_readme_qmd_file_formats(task_metadata):
 
 def test_render_task_readme_qmd_instructions(task_metadata):
     from openproblems.project import render_task_readme_qmd
+
     without = render_task_readme_qmd(task_metadata, add_instructions=False)
     with_inst = render_task_readme_qmd(task_metadata, add_instructions=True)
 
@@ -74,5 +88,6 @@ def test_render_task_readme_qmd_instructions(task_metadata):
 
 def test_render_task_readme_qmd_from_path():
     from openproblems.project import render_task_readme_qmd
+
     result = render_task_readme_qmd(EXAMPLE_PROJECT)
     assert "## API" in result
diff --git a/packages/python/openproblems/tests/test_project_find_project_root.py b/packages/python/openproblems/tests/test_project_find_project_root.py
index 2fdd88f..5841727 100644
--- a/packages/python/openproblems/tests/test_project_find_project_root.py
+++ b/packages/python/openproblems/tests/test_project_find_project_root.py
@@ -1,22 +1,23 @@
 import os
 from openproblems.project import find_project_root
 
+
 def test_find_project_root(tmpdir):
     # Create project directory and subdirectories
-    proj_dir = os.path.join(tmpdir, 'project')
+    proj_dir = os.path.join(tmpdir, "project")
     os.makedirs(proj_dir, exist_ok=True)
-    src_dir = os.path.join(proj_dir, 'src')
+    src_dir = os.path.join(proj_dir, "src")
     os.makedirs(src_dir, exist_ok=True)
 
     # Create files
-    proj_config = os.path.join(proj_dir, '_viash.yaml')
-    open(proj_config, 'w').close()
+    proj_config = os.path.join(proj_dir, "_viash.yaml")
+    open(proj_config, "w").close()
 
-    comp_config = os.path.join(src_dir, 'config.vsh.yaml')
-    open(comp_config, 'w').close()
+    comp_config = os.path.join(src_dir, "config.vsh.yaml")
+    open(comp_config, "w").close()
 
-    comp_script = os.path.join(src_dir, 'script.R')
-    open(comp_script, 'w').close()
+    comp_script = os.path.join(src_dir, "script.R")
+    open(comp_script, "w").close()
 
     # Perform assertions
     assert find_project_root(comp_script) == proj_dir

From ff10ba13cabae2d0e563f6806925f120c0493f40 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 20 Apr 2026 14:47:30 +0200
Subject: [PATCH 3/6] simplify functions

---
 .../openproblems/project/component_tests/check_config.py | 9 ++-------
 .../project/component_tests/run_and_check_output.py      | 9 +++------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
index 181fcfa..6906b3c 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
@@ -105,20 +105,15 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None:
         check_references(references)
 
 
-def run_check_config(meta: dict) -> None:
+def run_check_config(config: dict) -> None:
     """Validate a viash component config.
 
     Checks namespace, info.type, component metadata, preferred_normalization,
     variants, and Nextflow runner labels.
 
     Args:
-        meta: Viash meta dict with at least a ``"config"`` key pointing to the
-            ``.config.vsh.yaml`` path.
+        config: Parsed viash config dict (from ``read_viash_config``).
     """
-    import openproblems
-
-    print("Load config data", flush=True)
-    config = openproblems.project.read_viash_config(meta["config"])
     info = config.get("info", {})
     comp_type = info.get("type")
 
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
index 2c96cb3..1a72635 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
@@ -265,16 +265,13 @@ def generate_cmd_args(argument_set: list) -> list:
     return cmd_args
 
 
-def run_and_check_output(meta: dict) -> None:
+def run_and_check_output(meta: dict, config: dict) -> None:
     """Run a viash component with test resources and validate its outputs.
 
     Args:
-        meta: Viash meta dict with keys ``"executable"``, ``"config"``, and
-            ``"resources_dir"``.
+        meta: Viash meta dict with keys ``"executable"`` and ``"resources_dir"``.
+        config: Parsed viash config dict (from ``read_viash_config``).
     """
-    import openproblems
-
-    config = openproblems.project.read_viash_config(meta["config"])
     argument_sets = get_argument_sets(config, meta["resources_dir"])
 
     for argset_name, argset_args in argument_sets.items():

From 0fcb2258ec0148030ea62c6d660db97fa3ed8f1b Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 21 Apr 2026 11:51:53 +0200
Subject: [PATCH 4/6] add python publish workflow

---
 .github/workflows/python-publish.yaml | 54 +++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 .github/workflows/python-publish.yaml

diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml
new file mode 100644
index 0000000..e0e13e1
--- /dev/null
+++ b/.github/workflows/python-publish.yaml
@@ -0,0 +1,54 @@
+name: Publish Python packages to PyPI
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # required for setuptools_scm to determine version from git tags
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install build
+        run: python -m pip install build
+
+      - name: Build package
+        run: |
+          cd packages/python/openproblems
+          python -m build
+
+      - name: Upload distribution artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: packages/python/openproblems/dist/
+
+  publish-to-pypi:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/openproblems
+    permissions:
+      id-token: write  # required for OIDC trusted publishing
+
+    steps:
+      - name: Download distribution artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

From bce4261f72e1f9390c1b055538d6db4fa847fba6 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 21 Apr 2026 11:52:11 +0200
Subject: [PATCH 5/6] migrate more functions

---
 packages/python/openproblems/CHANGELOG.md     | 24 +++++++++++++++++++
 packages/python/openproblems/pyproject.toml   | 20 ++++++++++++++--
 .../project/component_tests/check_config.py   | 16 ++++++-------
 .../component_tests/run_and_check_output.py   |  8 +++----
 4 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/packages/python/openproblems/CHANGELOG.md b/packages/python/openproblems/CHANGELOG.md
index 9ceafc5..a9bab40 100644
--- a/packages/python/openproblems/CHANGELOG.md
+++ b/packages/python/openproblems/CHANGELOG.md
@@ -1,3 +1,27 @@
+# openproblems core Python v0.2.0
+
+## NEW FUNCTIONALITY
+
+* `project`:
+  - `resolve_path`: Resolve a path relative to a parent path or project root.
+
+* `project.component_tests`:
+  - `run_check_config` / `check_config`: Validate a component's Viash config (namespace, type, metadata, normalization, variants, Nextflow runner).
+  - `run_and_check_output`: Run a component executable and validate its output files against format specifications.
+
+* `project.docs`:
+  - `read_task_config`: Read a task-level configuration file.
+  - `read_task_metadata`: Read and assemble full task metadata by traversing the task's component graph.
+  - `read_component_spec`: Read a component API specification.
+  - `read_file_format`: Read a file format specification.
+  - `render_task_readme_qmd`: Render a Quarto README document for a task.
+  - `render_component_spec`: Render a component specification as a Markdown section.
+  - `render_file_format`: Render a file format specification as a Markdown section.
+
+## MINOR CHANGES
+
+* Improve diagnostic print messages in `check_config` and `run_and_check_output` to be more descriptive.
+
 # openproblems core Python v0.1.1
 
 ## NEW FUNCTIONALITY
diff --git a/packages/python/openproblems/pyproject.toml b/packages/python/openproblems/pyproject.toml
index e381e1a..a31f416 100644
--- a/packages/python/openproblems/pyproject.toml
+++ b/packages/python/openproblems/pyproject.toml
@@ -12,6 +12,20 @@ authors = [
 license = { text = "MIT" }
 readme = "README.md"
 requires-python = ">= 3.9"
+keywords = ["openproblems", "benchmarking", "bioinformatics", "viash"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Science/Research",
+  "License :: OSI Approved :: MIT License",
+  "Operating System :: OS Independent",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Topic :: Scientific/Engineering :: Bio-Informatics",
+]
 dependencies = [
   'PyYAML',
   'networkx',
@@ -23,8 +37,10 @@ test = [
 ]
 
 [project.urls]
-homepage = "https://openproblems.bio/documentation"
-repository = "https://github.com/openproblems-bio/core"
+Homepage = "https://openproblems.bio/documentation"
+Repository = "https://github.com/openproblems-bio/core"
+"Bug Tracker" = "https://github.com/openproblems-bio/core/issues"
+Changelog = "https://github.com/openproblems-bio/core/blob/main/packages/python/openproblems/CHANGELOG.md"
 
 [tool.setuptools.packages.find]
 where = ["src"]
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
index 6906b3c..14d3cce 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py
@@ -101,7 +101,7 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None:
     if comp_type != "metric":
         references = this_config.get("references") or references
     if comp_type != "control_method" or references:
-        print("Check references fields", flush=True)
+        print("Check references fields (doi or bibtex)", flush=True)
         check_references(references)
 
 
@@ -117,16 +117,16 @@ def run_check_config(config: dict) -> None:
     info = config.get("info", {})
     comp_type = info.get("type")
 
-    print("Check .namespace", flush=True)
+    print("Check that .namespace is defined", flush=True)
     assert config.get("namespace"), ".namespace is not defined"
 
-    print("Check .info.type", flush=True)
+    print("Check that .info.type is 'method', 'control_method', or 'metric'", flush=True)
     expected_types = ["method", "control_method", "metric"]
     assert (
         comp_type in expected_types
-    ), ".info.type should be equal to 'method' or 'control_method'"
+    ), f".info.type is '{comp_type}' but should be one of: {', '.join(expected_types)}"
 
-    print("Check component metadata", flush=True)
+    print("Check component metadata fields (name, label, summary, description)", flush=True)
     if comp_type == "metric":
         metric_infos = info.get("metrics", [])
         assert metric_infos, ".info.metrics is not defined"
@@ -136,7 +136,7 @@ def run_check_config(config: dict) -> None:
         check_info(info, config, comp_type=comp_type)
 
     if "preferred_normalization" in info:
-        print("Checking contents of .info.preferred_normalization", flush=True)
+        print("Check that .info.preferred_normalization is a valid normalization method", flush=True)
         norm_methods = [
             "log_cpm",
             "log_cp10k",
@@ -153,7 +153,7 @@ def run_check_config(config: dict) -> None:
         )
 
     if "variants" in info:
-        print("Checking contents of .info.variants", flush=True)
+        print("Check that .info.variants only references valid argument names", flush=True)
         arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + [
             "preferred_normalization"
         ]
@@ -167,7 +167,7 @@ def run_check_config(config: dict) -> None:
 
     runners = config.get("runners", [])
 
-    print("Check Nextflow runner", flush=True)
+    print("Check that a Nextflow runner with time, mem, and cpu labels is defined", flush=True)
     nextflow_runner = next(
         (runner for runner in runners if runner["type"] == "nextflow"),
         None,
diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
index 1a72635..75fdfe3 100644
--- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
+++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py
@@ -7,7 +7,7 @@ def run_component(cmd: list) -> None:
     """Run a component executable and assert it exits successfully."""
     import subprocess
 
-    print(">> Running script as test", flush=True)
+    print(">> Running the component executable", flush=True)
     out = subprocess.run(cmd)
     assert (
         out.returncode == 0
@@ -18,7 +18,7 @@ def check_input_files(arguments: list) -> None:
     """Assert that all required input files exist."""
     from os import path
 
-    print(">> Checking whether input files exist", flush=True)
+    print(">> Checking that all required input files exist", flush=True)
     for arg in arguments:
         if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]:
             assert not arg["must_exist"] or path.exists(
@@ -30,14 +30,14 @@ def check_output_files(arguments: list) -> None:
     """Assert that all required output files exist and match their format spec."""
     from os import path
 
-    print(">> Checking whether output file exists", flush=True)
+    print(">> Checking that all required output files were created", flush=True)
     for arg in arguments:
         if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]:
             assert not arg["must_exist"] or path.exists(
                 arg["value"]
             ), f"Output file '{arg['value']}' does not exist"
 
-    print(">> Reading output files and checking formats", flush=True)
+    print(">> Validating the contents and format of output files", flush=True)
     for arg in arguments:
         if arg["type"] != "file" or arg["direction"] != "output":
             continue

From 073936f3603017957c015083a6a0455b6ad1eefa Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 21 Apr 2026 11:52:46 +0200
Subject: [PATCH 6/6] add readme

---
 packages/python/openproblems/README.md | 56 ++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 packages/python/openproblems/README.md

diff --git a/packages/python/openproblems/README.md b/packages/python/openproblems/README.md
new file mode 100644
index 0000000..508ee13
--- /dev/null
+++ b/packages/python/openproblems/README.md
@@ -0,0 +1,56 @@
+# openproblems
+
+[![PyPI](https://img.shields.io/pypi/v/openproblems)](https://pypi.org/project/openproblems/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/openproblems)](https://pypi.org/project/openproblems/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+
+Core Python helper functions for [OpenProblems](https://openproblems.bio) benchmarking tasks.
+
+## Installation
+
+```bash
+pip install openproblems
+```
+
+## Modules
+
+### `openproblems.project`
+
+Utilities for working with Viash projects.
+
+- `find_project_root`: Find the root of a Viash project.
+- `read_nested_yaml`: Read a nested YAML file.
+- `read_viash_config`: Read a Viash configuration file.
+- `resolve_path`: Resolve a path relative to a parent or project path.
+
+#### `openproblems.project.component_tests`
+
+Helpers for writing component tests.
+
+- `run_check_config` / `check_config`: Validate a component's Viash configuration.
+- `run_and_check_output`: Run a component and validate its output files against format specs.
+
+#### `openproblems.project.docs`
+
+Utilities for generating task documentation.
+
+- `read_task_config`: Read a task-level configuration file.
+- `read_task_metadata`: Read and assemble full task metadata.
+- `read_component_spec`: Read a component API specification.
+- `read_file_format`: Read a file format specification.
+- `render_task_readme_qmd`: Render a Quarto README for a task.
+- `render_component_spec`: Render a component specification as Markdown.
+- `render_file_format`: Render a file format specification as Markdown.
+
+### `openproblems.utils`
+
+General-purpose utilities.
+
+- `strip_margin`: Strip leading margin characters from a multiline string.
+- `deep_merge`: Recursively merge two dictionaries.
+
+## Links
+
+- **Documentation**: <https://openproblems.bio/documentation>
+- **Repository**: <https://github.com/openproblems-bio/core>
+- **Issue tracker**: <https://github.com/openproblems-bio/core/issues>