From 70377e2274fa890801a2a504284a0e6b1b487162 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 20 Apr 2026 13:47:27 +0200 Subject: [PATCH 1/6] migrate doc rendering and component testing functions --- packages/python/openproblems/pyproject.toml | 3 +- .../src/openproblems/project/__init__.py | 10 +- .../project/component_tests/__init__.py | 41 +++ .../project/component_tests/check_config.py | 166 +++++++++++ .../component_tests/run_and_check_output.py | 282 ++++++++++++++++++ .../src/openproblems/project/docs/__init__.py | 17 ++ .../openproblems/project/docs/_markdown.py | 28 ++ .../project/docs/read_component_spec.py | 75 +++++ .../project/docs/read_file_format.py | 167 +++++++++++ .../project/docs/read_task_config.py | 11 + .../project/docs/read_task_metadata.py | 145 +++++++++ .../project/docs/render_component_spec.py | 64 ++++ .../project/docs/render_file_format.py | 211 +++++++++++++ .../project/docs/render_task_readme_qmd.py | 210 +++++++++++++ .../tests/data/example_project/_viash.yaml | 88 ++++++ .../api/comp_control_method.yaml | 37 +++ .../api/comp_data_processor.yaml | 31 ++ .../data/example_project/api/comp_method.yaml | 28 ++ .../data/example_project/api/comp_metric.yaml | 28 ++ .../api/file_common_dataset.yaml | 72 +++++ .../example_project/api/file_prediction.yaml | 26 ++ .../data/example_project/api/file_score.yaml | 30 ++ .../example_project/api/file_solution.yaml | 73 +++++ .../data/example_project/api/file_test.yaml | 45 +++ .../data/example_project/api/file_train.yaml | 49 +++ .../tests/test_docs_render_task_readme_qmd.py | 78 +++++ schemas/api_file_format.yaml | 4 + schemas/schema_openproblems_definitions.yaml | 80 +++++ 28 files changed, 2097 insertions(+), 2 deletions(-) create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/__init__.py create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/check_config.py create mode 100644 packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/__init__.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/_markdown.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_file_format.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_task_config.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_file_format.py create mode 100644 packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py create mode 100644 packages/python/openproblems/tests/data/example_project/_viash.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_method.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_score.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_solution.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_test.yaml create mode 100644 packages/python/openproblems/tests/data/example_project/api/file_train.yaml create mode 100644 packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py diff --git a/packages/python/openproblems/pyproject.toml b/packages/python/openproblems/pyproject.toml index 7bffaa8..e381e1a 100644 --- a/packages/python/openproblems/pyproject.toml +++ b/packages/python/openproblems/pyproject.toml @@ -13,7 +13,8 @@ license = { text = "MIT" } readme = "README.md" requires-python = ">= 3.9" dependencies = [ - 'PyYAML' + 'PyYAML', + 'networkx', ] [project.optional-dependencies] diff --git a/packages/python/openproblems/src/openproblems/project/__init__.py b/packages/python/openproblems/src/openproblems/project/__init__.py index 04f8a64..10832fb 100644 --- a/packages/python/openproblems/src/openproblems/project/__init__.py +++ b/packages/python/openproblems/src/openproblems/project/__init__.py @@ -1,9 +1,17 @@ from .find_project_root import find_project_root from .read_viash_config import read_viash_config from .read_nested_yaml import read_nested_yaml +from .component_tests.check_config import run_check_config as check_config +from .component_tests.run_and_check_output import run_and_check_output +from .docs.read_task_metadata import read_task_metadata +from .docs.render_task_readme_qmd import render_task_readme_qmd __all__ = [ "find_project_root", "read_viash_config", "read_nested_yaml", -] \ No newline at end of file + "check_config", + "run_and_check_output", + "read_task_metadata", + "render_task_readme_qmd", +] diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py b/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py new file mode 100644 index 0000000..b72ae5e --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/component_tests/__init__.py @@ -0,0 +1,41 @@ +from .check_config import ( + check_info, + check_links, + check_references, + check_url, + run_check_config, +) +from .run_and_check_output import ( + check_anndata, + check_dataframe, + check_dictionary, + check_format, + check_input_files, + check_output_files, + check_spatialdata, + generate_cmd_args, + get_argument_sets, + run_and_check_output, + run_component, +) + +__all__ = [ + # check_config + "check_info", + "check_links", + "check_references", + "check_url", + "run_check_config", + # run_and_check_output + "check_anndata", + "check_dataframe", + "check_dictionary", + "check_format", + "check_input_files", + "check_output_files", + "check_spatialdata", + "generate_cmd_args", + "get_argument_sets", + "run_and_check_output", + "run_component", +] diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py new file mode 100644 index 0000000..dd45b49 --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +import re +from typing import Dict, List, Union + +## CONSTANTS +NAME_MAXLEN = 50 +LABEL_MAXLEN = 50 +SUMMARY_MAXLEN = 400 +DESCRIPTION_MAXLEN = 5000 + +TIME_LABELS = ["lowtime", "midtime", "hightime", "veryhightime"] +MEM_LABELS = ["lowmem", "midmem", "highmem", "veryhighmem"] +CPU_LABELS = ["lowcpu", "midcpu", "highcpu", "veryhighcpu"] + + +def check_url(url: str) -> bool: + import requests + from urllib3.util.retry import Retry + from requests.adapters import HTTPAdapter + + session = requests.Session() + retry = Retry(connect=3, backoff_factor=0.5) + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + + get = session.head(url) + + if get.ok or get.status_code == 429: # 429 rejected, too many requests + return True + else: + return False + + +def check_references(references: Dict[str, Union[str, List[str]]]) -> None: + doi = references.get("doi") + bibtex = references.get("bibtex") + + assert doi or bibtex, "One of .references.doi or .references.bibtex should be defined" + + if doi: + if not isinstance(doi, list): + doi = [doi] + for d in doi: + assert re.match(r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d), f"Invalid DOI format: {doi}" + assert check_url(f"https://doi.org/{d}"), f"DOI '{d}' is not reachable" + + if bibtex: + if not isinstance(bibtex, list): + bibtex = [bibtex] + for b in bibtex: + assert re.match(r"^@.*{.*", b), f"Invalid bibtex format: {b}" + + +def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = []) -> None: + if not links: + return + + for expected_link in required: + assert expected_link in links, f"Link .links.{expected_link} is not defined" + + for link_type, link in links.items(): + if link_type != "docker_registry": + assert check_url(link), f"Link .links.{link_type} URL '{link}' is not reachable" + + +def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None: + metadata_field_lengths = { + "name": NAME_MAXLEN, + "label": LABEL_MAXLEN, + "summary": SUMMARY_MAXLEN, + "description": DESCRIPTION_MAXLEN, + } + + for field, max_length in metadata_field_lengths.items(): + value = this_info.get(field) + if comp_type != "metric": + value = this_config.get(field) or value + assert value, f"Metadata field '{field}' is not defined" + assert "FILL IN:" not in value, f"Metadata field '{field}' not filled in" + assert len(value) <= max_length, f"Metadata field '{field}' should not exceed {max_length} characters" + + links = this_info.get("links") or this_config.get("links") or {} + required_links: List[str] = [] + if comp_type == "method": + required_links = ["documentation", "repository"] + check_links(links, required_links) + + references = this_info.get("references") or {} + if comp_type != "metric": + references = this_config.get("references") or references + if comp_type != "control_method" or references: + print("Check references fields", flush=True) + check_references(references) + + +def run_check_config(meta: dict) -> None: + """Validate a viash component config. + + Checks namespace, info.type, component metadata, preferred_normalization, + variants, and Nextflow runner labels. + + Args: + meta: Viash meta dict with at least a ``"config"`` key pointing to the + ``.config.vsh.yaml`` path. + """ + import openproblems + + print("Load config data", flush=True) + config = openproblems.project.read_viash_config(meta["config"]) + info = config.get("info", {}) + comp_type = info.get("type") + + print("Check .namespace", flush=True) + assert config.get("namespace"), ".namespace is not defined" + + print("Check .info.type", flush=True) + expected_types = ["method", "control_method", "metric"] + assert comp_type in expected_types, ".info.type should be equal to 'method' or 'control_method'" + + print("Check component metadata", flush=True) + if comp_type == "metric": + metric_infos = info.get("metrics", []) + assert metric_infos, ".info.metrics is not defined" + for metric_info in metric_infos: + check_info(metric_info, config, comp_type=comp_type) + else: + check_info(info, config, comp_type=comp_type) + + if "preferred_normalization" in info: + print("Checking contents of .info.preferred_normalization", flush=True) + norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"] + assert info["preferred_normalization"] in norm_methods, ( + ".info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'." + ) + + if "variants" in info: + print("Checking contents of .info.variants", flush=True) + arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + ["preferred_normalization"] + for paramset_id, paramset in info["variants"].items(): + if paramset: + for arg_id in paramset: + assert arg_id in arg_names, ( + f"Argument '{arg_id}' in `.info.variants['{paramset_id}']` " + "is not an argument in `.arguments`." + ) + + runners = config.get("runners", []) + + print("Check Nextflow runner", flush=True) + nextflow_runner = next( + (runner for runner in runners if runner["type"] == "nextflow"), + None, + ) + + assert nextflow_runner, ".runners does not contain a nextflow runner" + assert nextflow_runner.get("directives"), "directives not a field in nextflow runner" + nextflow_labels = nextflow_runner["directives"].get("label") + assert nextflow_labels, "label not a field in nextflow runner directives" + + assert [label for label in nextflow_labels if label in TIME_LABELS], "time label not filled in" + assert [label for label in nextflow_labels if label in MEM_LABELS], "mem label not filled in" + assert [label for label in nextflow_labels if label in CPU_LABELS], "cpu label not filled in" + + print("All checks succeeded!", flush=True) diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py new file mode 100644 index 0000000..3484abe --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import re + + +def run_component(cmd: list) -> None: + """Run a component executable and assert it exits successfully.""" + import subprocess + + print(">> Running script as test", flush=True) + out = subprocess.run(cmd) + assert out.returncode == 0, f"Script exited with an error. Return code: {out.returncode}" + + +def check_input_files(arguments: list) -> None: + """Assert that all required input files exist.""" + from os import path + + print(">> Checking whether input files exist", flush=True) + for arg in arguments: + if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]: + assert not arg["must_exist"] or path.exists(arg["value"]), ( + f"Input file '{arg['value']}' does not exist" + ) + + +def check_output_files(arguments: list) -> None: + """Assert that all required output files exist and match their format spec.""" + from os import path + + print(">> Checking whether output file exists", flush=True) + for arg in arguments: + if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]: + assert not arg["must_exist"] or path.exists(arg["value"]), ( + f"Output file '{arg['value']}' does not exist" + ) + + print(">> Reading output files and checking formats", flush=True) + for arg in arguments: + if arg["type"] != "file" or arg["direction"] != "output": + continue + check_format(arg) + + +def check_format(arg: dict) -> None: + """Read an output file and validate its contents against the format spec.""" + arg_info = arg.get("info") or {} + if arg["type"] == "file": + arg_format = arg_info.get("format", {}) + file_type = arg_format.get("type") or arg_info.get("file_type") + + # Tabular data + if file_type in ["parquet", "csv", "tsv"]: + import pandas as pd + + print(f"Reading and checking {arg['clean_name']}", flush=True) + if file_type == "csv": + df = pd.read_csv(arg["value"]) + elif file_type == "tsv": + df = pd.read_csv(arg["value"], sep="\t") + else: + df = pd.read_parquet(arg["value"]) + print(f" {df}") + + arg_columns = arg_format.get("columns") or arg_info.get("columns") or [] + check_dataframe(df, arg_columns, f"File '{arg['value']}'") + + # Hierarchical data + elif file_type == "json": + import json + + print(f"Reading and checking {arg['clean_name']}", flush=True) + with open(arg["value"]) as f: + data = json.load(f) + print( + f" {type(data).__name__} with {len(data)} entries" + if isinstance(data, (dict, list)) + else f" {data}" + ) + check_dictionary(data, arg) + + elif file_type == "yaml": + import yaml + + print(f"Reading and checking {arg['clean_name']}", flush=True) + with open(arg["value"]) as f: + data = yaml.safe_load(f) + print( + f" {type(data).__name__} with {len(data)} entries" + if isinstance(data, (dict, list)) + else f" {data}" + ) + check_dictionary(data, arg) + + # AnnData / SpatialData + elif file_type in ["h5ad", "anndata_hdf5"]: + import anndata as ad + + print(f"Reading and checking {arg['clean_name']}", flush=True) + adata = ad.read_h5ad(arg["value"]) + print(f" {adata}") + check_anndata(adata, arg_format, f"File '{arg['value']}'") + + elif file_type == "anndata_zarr": + import anndata as ad + + print(f"Reading and checking {arg['clean_name']}", flush=True) + store = ad.read_zarr(arg["value"]) + print(f" {store}") + check_anndata(store, arg_format, f"File '{arg['value']}'") + + elif file_type == "spatialdata_zarr": + import spatialdata + + print(f"Reading and checking {arg['clean_name']}", flush=True) + sdata = spatialdata.read_zarr(arg["value"]) + print(f" {sdata}") + check_spatialdata(sdata, arg) + + +def check_anndata(adata, format_spec: dict, label: str = "") -> None: + """Check whether an AnnData object contains all required slots + defined in the given format spec dict. + """ + for struc_name, items in format_spec.items(): + if not hasattr(adata, struc_name): + continue + + struc_x = getattr(adata, struc_name) + + if struc_name == "X": + if items.get("required", True): + assert struc_x is not None, f"{label} is missing slot .{struc_name}" + else: + for item in items: + if item.get("required", True): + assert item["name"] in struc_x, ( + f"{label} is missing slot .{struc_name}['{item['name']}']" + ) + + +def check_dataframe(df, columns: list, label: str = "") -> None: + """Check whether a DataFrame contains all required columns + defined in the given columns spec list. + """ + for item in columns: + if item.get("required", True): + assert item["name"] in df.columns, f"{label} is missing column '{item['name']}'" + + +def check_dictionary(data, arg: dict) -> None: + """Check whether a JSON/YAML object contains all required top-level keys + in the corresponding .info.format.keys field. + """ + arg_info = arg.get("info") or {} + arg_format = arg_info.get("format", {}) + arg_keys = arg_format.get("keys") or arg_info.get("keys") or [] + for item in arg_keys: + if item.get("required", True): + assert isinstance(data, dict) and item["name"] in data, ( + f"File '{arg['value']}' is missing key '{item['name']}'" + ) + + +def check_spatialdata(sdata, arg: dict) -> None: + """Check whether a SpatialData object contains all required elements + in the corresponding .info.format field. Supported element categories: + images, labels, points, shapes, tables. + """ + arg_info = arg.get("info") or {} + arg_format = arg_info.get("format") or {} + element_categories = ["images", "labels", "points", "shapes", "tables"] + for category in element_categories: + items = arg_format.get(category) or [] + category_store = getattr(sdata, category, {}) + for item in items: + if item.get("required", True): + assert item["name"] in category_store, ( + f"File '{arg['value']}' is missing {category}['{item['name']}']" + ) + + elem_name = item["name"] + if elem_name not in category_store: + continue + element = category_store[elem_name] + + if category in ["points", "shapes"]: + check_dataframe( + element, + item.get("columns") or [], + f"File '{arg['value']}' {category}['{elem_name}']", + ) + elif category == "tables": + check_anndata(element, item, f"File '{arg['value']}' tables['{elem_name}']") + + +def get_argument_sets(config: dict, resources_dir: str) -> dict: + """Build argument sets from a viash config, resolving input file paths. + + Args: + config: Parsed viash config dict (from ``read_viash_config``). + resources_dir: Directory where test resource files are located. + + Returns: + Dict mapping argument-set name to list of resolved argument dicts. + """ + arguments = [] + + for arg in config["all_arguments"]: + new_arg = arg.copy() + arg_info = new_arg.get("info") or {} + example = arg.get("example", [None])[0] + + if example and arg["type"] == "file": + if arg["direction"] == "input": + value = f"{resources_dir}/{example}" + else: + ext_res = re.search(r"\.(\w+)$", example) + if ext_res: + value = f"{arg['clean_name']}.{ext_res.group(1)}" + else: + value = f"{arg['clean_name']}" + new_arg["value"] = value + elif "test_default" in arg_info: + new_arg["value"] = arg_info["test_default"] + + arguments.append(new_arg) + + config_info = config.get("info") or {} + if "test_setup" not in config_info: + argument_sets = {"run": arguments} + else: + test_setup = config_info["test_setup"] + argument_sets = {} + for name, test_instance in test_setup.items(): + new_arguments = [] + for arg in arguments: + new_arg = arg.copy() + if arg["clean_name"] in test_instance: + val = test_instance[arg["clean_name"]] + if new_arg["type"] == "file" and new_arg["direction"] == "input": + val = f"{resources_dir}/{val}" + new_arg["value"] = val + new_arguments.append(new_arg) + argument_sets[name] = new_arguments + + return argument_sets + + +def generate_cmd_args(argument_set: list) -> list: + """Convert a list of resolved argument dicts to a flat list of CLI args.""" + cmd_args = [] + for arg in argument_set: + if "value" in arg: + value = arg["value"] + if arg["multiple"] and isinstance(value, list): + value = arg["multiple_sep"].join(value) + cmd_args.extend([arg["name"], str(value)]) + return cmd_args + + +def run_and_check_output(meta: dict) -> None: + """Run a viash component with test resources and validate its outputs. + + Args: + meta: Viash meta dict with keys ``"executable"``, ``"config"``, and + ``"resources_dir"``. + """ + import openproblems + + config = openproblems.project.read_viash_config(meta["config"]) + argument_sets = get_argument_sets(config, meta["resources_dir"]) + + for argset_name, argset_args in argument_sets.items(): + print(f">> Running test '{argset_name}'", flush=True) + cmd = [meta["executable"]] + generate_cmd_args(argset_args) + + check_input_files(argset_args) + run_component(cmd) + check_output_files(argset_args) + + print("All checks succeeded!", flush=True) diff --git a/packages/python/openproblems/src/openproblems/project/docs/__init__.py b/packages/python/openproblems/src/openproblems/project/docs/__init__.py new file mode 100644 index 0000000..74c1aff --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/__init__.py @@ -0,0 +1,17 @@ +from .read_task_config import read_task_config +from .read_component_spec import read_component_spec +from .read_file_format import read_file_format +from .read_task_metadata import read_task_metadata +from .render_component_spec import render_component_spec +from .render_file_format import render_file_format +from .render_task_readme_qmd import render_task_readme_qmd + +__all__ = [ + "read_task_config", + "read_component_spec", + "read_file_format", + "read_task_metadata", + "render_component_spec", + "render_file_format", + "render_task_readme_qmd", +] diff --git a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py new file mode 100644 index 0000000..a0dae31 --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py @@ -0,0 +1,28 @@ +def format_markdown_table(headers, rows, col_widths=None): + """Format a GitHub-Flavored Markdown pipe table. + + Args: + headers: Column header names. + rows: List of rows, each a list of cell values. + col_widths: Optional list of exact dash-counts for the separator row + (matches R's ``align_kable_widths`` behaviour). + + Returns: + A GFM pipe table string, or an empty string when ``rows`` is empty. + """ + if not rows: + return "" + + header_line = "| " + " | ".join(str(h) for h in headers) + " |" + + if col_widths is not None: + sep_line = "|" + "".join(f":{'-' * w}|" for w in col_widths) + else: + sep_line = "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |" + + data_lines = [ + "| " + " | ".join(str(cell) for cell in row) + " |" + for row in rows + ] + + return "\n".join([header_line, sep_line] + data_lines) diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py new file mode 100644 index 0000000..47ea6f2 --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py @@ -0,0 +1,75 @@ +from __future__ import annotations +import os +import re + + +def read_component_spec(path: str) -> dict: + """Read a component spec from a ``comp_*.yaml`` file. + + Args: + path: Path to a component spec yaml (usually ``src/api/comp_*.yaml``). + + Returns: + A dict with keys ``info`` (dict) and ``args`` (list of dicts). + """ + from .. import read_nested_yaml + data = read_nested_yaml(path) + return { + "info": _process_info(data, path), + "args": _process_arguments(data, path), + } + + +def _process_info(data: dict, path: str) -> dict: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + info: dict = {"file_name": file_name} + + # Top-level fields + for key in ("label", "summary", "description", "namespace"): + info[key] = data.get(key) + + # Merge info block (may override Nones above) + for key, val in (data.get("info") or {}).items(): + if info.get(key) is None: + info[key] = val + + # Merge info.type_info + for key, val in ((data.get("info") or {}).get("type_info") or {}).items(): + if info.get(key) is None: + info[key] = val + + return info + + +def _process_arguments(data: dict, path: str) -> list[dict]: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + + arguments = list(data.get("arguments") or []) + for arg_group in data.get("argument_groups") or []: + arguments.extend(arg_group.get("arguments") or []) + + result = [] + for arg in arguments: + arg_info = arg.get("info") or {} + merge_ref = arg.get("__merge__") + parent = re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None + + default = arg.get("default") + example = arg.get("example") + if isinstance(example, list): + example = example[0] if example else None + + result.append({ + "file_name": file_name, + "arg_name": re.sub(r"^-+", "", arg.get("name", "")), + "type": arg.get("type", ""), + "direction": arg.get("direction") or "input", + "required": bool(arg.get("required")) if arg.get("required") is not None else False, + "default": str(default) if default is not None else None, + "example": str(example) if example is not None else None, + "description": arg.get("description") or arg_info.get("description"), + "summary": arg.get("summary") or arg_info.get("summary"), + "parent": parent, + }) + + return result diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py new file mode 100644 index 0000000..1d1fa05 --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py @@ -0,0 +1,167 @@ +from __future__ import annotations +import os +import re + +ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"] +SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"] + + +def read_file_format(path: str) -> dict: + """Read a file format spec from a ``file_*.yaml`` file. + + Args: + path: Path to a file format yaml (usually ``src/api/file_*.yaml``). + + Returns: + A dict with key ``info`` (dict) and optionally ``expected_format`` + (list of dicts) when the format type is known. + """ + from .. import read_nested_yaml + data = read_nested_yaml(path) + + out: dict = {"info": _process_info(data, path)} + + fmt = (data.get("info") or {}).get("format") or {} + format_type = fmt.get("type") + + if format_type == "h5ad": + out["expected_format"] = _process_h5ad(data, path, format_type) + elif format_type in ("anndata_hdf5", "anndata_zarr"): + out["expected_format"] = _process_h5ad(data, path, format_type) + elif format_type in ("tabular", "csv", "tsv", "parquet"): + out["expected_format"] = _process_tabular(data, path, format_type) + elif format_type in ("json", "yaml"): + out["expected_format"] = _process_keyed(data, path, format_type) + elif format_type == "spatialdata_zarr": + out["expected_format"] = _process_spatialdata(data, path) + + return out + + +def _process_info(data: dict, path: str) -> dict: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + fmt = (data.get("info") or {}).get("format") or {} + + label = data.get("label") + if label is None: + example = data.get("example") + if example: + label = os.path.basename(str(example)) + + return { + "file_name": file_name, + "file_type": fmt.get("type"), + "label": label, + "summary": data.get("summary"), + "description": data.get("description"), + "example": data.get("example"), + } + + +def _process_h5ad(data: dict, path: str, format_type: str) -> list[dict]: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + fmt = (data.get("info") or {}).get("format") or {} + + rows = [] + for struct_name in ANNDATA_STRUCT_NAMES: + fields = fmt.get(struct_name) + if not fields: + continue + if not isinstance(fields, list): + fields = [fields] + for field in fields: + rows.append({ + "file_name": file_name, + "struct": struct_name, + "name": field.get("name", struct_name), + "type": field.get("type", ""), + "required": field.get("required", True), + "multiple": field.get("multiple", False), + "description": field.get("description"), + "summary": field.get("summary"), + "data_type": format_type, + }) + return rows + + +def _process_tabular(data: dict, path: str, format_type: str) -> list[dict]: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + columns = (data.get("info") or {}).get("format", {}).get("columns") or [] + + return [ + { + "file_name": file_name, + "name": col.get("name", ""), + "type": col.get("type", ""), + "required": col.get("required", True), + "description": col.get("description"), + "summary": col.get("summary"), + "data_type": format_type, + } + for col in columns + ] + + +def _process_keyed(data: dict, path: str, format_type: str) -> list[dict]: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + keys = (data.get("info") or {}).get("format", {}).get("keys") or [] + + return [ + { + "file_name": file_name, + "name": k.get("name", ""), + "type": k.get("type", ""), + "required": k.get("required", True), + "description": k.get("description"), + "summary": k.get("summary"), + "data_type": format_type, + } + for k in keys + ] + + +def _process_spatialdata(data: dict, path: str) -> list[dict]: + file_name = re.sub(r"\.ya?ml$", "", os.path.basename(path)) + fmt = (data.get("info") or {}).get("format") or {} + rows = [] + for category in SPATIALDATA_ELEMENT_CATEGORIES: + elements = fmt.get(category) or [] + for elem in elements: + row: dict = { + "file_name": file_name, + "category": category, + "name": elem.get("name", ""), + "element_type": elem.get("type", ""), + "required": elem.get("required", True), + "description": elem.get("description"), + "data_type": "spatialdata_zarr", + } + if category in ("points", "shapes"): + row["columns"] = [ + { + "name": col.get("name", ""), + "type": col.get("type", ""), + "required": col.get("required", True), + "description": col.get("description"), + } + for col in (elem.get("columns") or []) + ] + elif category == "tables": + slots = [] + for struct_name in ANNDATA_STRUCT_NAMES: + fields = elem.get(struct_name) + if not fields: + continue + if not isinstance(fields, list): + fields = [fields] + for f in fields: + slots.append({ + "struct": struct_name, + "name": f.get("name", struct_name), + "type": f.get("type", ""), + "required": f.get("required", True), + "description": f.get("description"), + }) + row["anndata_slots"] = slots + rows.append(row) + return rows diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py new file mode 100644 index 0000000..78d460a --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py @@ -0,0 +1,11 @@ +def read_task_config(path): + """Read and return a task config (_viash.yaml) file. + + Args: + path: Path to a ``_viash.yaml`` project config file. + + Returns: + The parsed config as a dict. + """ + from .. import read_nested_yaml + return read_nested_yaml(path) diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py new file mode 100644 index 0000000..69412e6 --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_metadata.py @@ -0,0 +1,145 @@ +from __future__ import annotations +import glob +import os +import re +import warnings +from collections import deque + + +def read_task_metadata(path: str) -> dict: + """Read all API files in a task directory and return structured metadata. + + Scans ``path`` recursively for ``comp_*.yaml`` and ``file_*.yaml`` files, + builds a directed task graph, and runs a BFS to determine render order. + + Args: + path: Path to the task directory (or ``api/`` subdirectory). A + ``_viash.yaml`` must exist somewhere above this path. + + Returns: + A dict with the following keys: + + * ``proj_path`` – path to the project root + * ``proj_conf`` – parsed ``_viash.yaml`` + * ``files`` / ``comps`` – dicts keyed by ``file_name`` + * ``file_info`` / ``comp_info`` – flat lists of info dicts + * ``file_expected_format`` / ``comp_args`` – flat lists + * ``task_graph`` – ``networkx.DiGraph`` + * ``task_graph_root`` – name of the root node + * ``task_graph_order`` – BFS-ordered list of node names + """ + from .. import find_project_root + from .read_task_config import read_task_config + from .read_component_spec import read_component_spec + from .read_file_format import read_file_format + + project_path = find_project_root(path) + if project_path is None: + raise ValueError(f"No project root (_viash.yaml) found from '{path}'") + + proj_conf_file = os.path.join(project_path, "_viash.yaml") + if not os.path.exists(proj_conf_file): + raise ValueError(f"No _viash.yaml found in project root '{project_path}'") + + proj_conf = read_task_config(proj_conf_file) + + comp_paths = sorted( + glob.glob(os.path.join(path, "**/comp_*.yaml"), recursive=True) + + glob.glob(os.path.join(path, "**/comp_*.yml"), recursive=True) + ) + comps = { + re.sub(r"\.ya?ml$", "", os.path.basename(p)): read_component_spec(p) + for p in comp_paths + } + + file_paths = sorted( + glob.glob(os.path.join(path, "**/file_*.yaml"), recursive=True) + + glob.glob(os.path.join(path, "**/file_*.yml"), recursive=True) + ) + files = { + re.sub(r"\.ya?ml$", "", os.path.basename(p)): read_file_format(p) + for p in file_paths + } + + task_graph = _build_graph(files, comps) + task_graph_root = _get_root(task_graph) + task_graph_order = _bfs_order(task_graph, task_graph_root) + + comp_info = [c["info"] for c in comps.values()] + comp_args = [arg for c in comps.values() for arg in c["args"]] + file_info = [f["info"] for f in files.values()] + file_expected_format = [ + row for f in files.values() for row in (f.get("expected_format") or []) + ] + + return { + "proj_path": project_path, + "proj_conf": proj_conf, + "files": files, + "file_info": file_info, + "file_expected_format": file_expected_format, + "comps": comps, + "comp_info": comp_info, + "comp_args": comp_args, + "task_graph": task_graph, + "task_graph_root": task_graph_root, + "task_graph_order": task_graph_order, + } + + +def _build_graph(files: dict, comps: dict): + import networkx as nx + + G = nx.DiGraph() + + for name, f in files.items(): + G.add_node(name, label=f["info"].get("label") or name, is_comp=False) + + for name, c in comps.items(): + G.add_node(name, label=c["info"].get("label") or name, is_comp=True) + + for comp_name, c in comps.items(): + for arg in c["args"]: + if arg.get("type") != "file" or not arg.get("parent"): + continue + parent = arg["parent"] + if parent not in G: + continue + required = bool(arg.get("required", False)) + if arg.get("direction") == "input": + G.add_edge(parent, comp_name, from_to="file_to_comp", required=required) + elif arg.get("direction") == "output": + G.add_edge(comp_name, parent, from_to="comp_to_file", required=required) + + return G + + +def _get_root(G) -> str: + roots = [n for n, d in G.in_degree() if d == 0] + if not roots: + return next(iter(G.nodes())) + if len(roots) > 1: + warnings.warn( + f"Multiple root nodes with in-degree 0: {roots}. Using first.", + stacklevel=4, + ) + return roots[0] + + +def _bfs_order(G, root: str) -> list[str]: + """BFS from root; unreachable nodes are appended afterwards (mirrors igraph).""" + visited: list[str] = [] + seen: set[str] = set() + queue: deque[str] = deque([root]) + while queue: + node = queue.popleft() + if node not in seen: + seen.add(node) + visited.append(node) + for nbr in G.successors(node): + if nbr not in seen: + queue.append(nbr) + for node in G.nodes(): + if node not in seen: + visited.append(node) + return visited diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py new file mode 100644 index 0000000..60360df --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py @@ -0,0 +1,64 @@ +from __future__ import annotations +import re + + +def render_component_spec(spec: dict | str) -> str: + """Render a component spec as a Quarto/GFM markdown section. + + Args: + spec: Either a ``comp_*.yaml`` path or a dict returned by + :func:`read_component_spec`. + + Returns: + A markdown string with an ``## Component type:`` heading, summary, and + an arguments table. + """ + if isinstance(spec, str): + from .read_component_spec import read_component_spec + spec = read_component_spec(spec) + + info = spec["info"] + args_table = _format_arguments(spec["args"]) + + lines = [ + f"## Component type: {info.get('label', '')}", + "", + info.get("summary", "") or "", + "", + "Arguments:", + "", + ":::{.small}", + args_table, + ":::", + "", + ] + return "\n".join(lines) + + +def _format_arguments(args: list[dict]) -> str: + from ._markdown import format_markdown_table + + file_args = [a for a in args if a.get("type") == "file"] + if not file_args: + return "" + + rows = [] + for arg in file_args: + tags = [] + if not arg.get("required", True): + tags.append("Optional") + if arg.get("direction") == "output": + tags.append("Output") + tag_str = f"(_{', '.join(tags)}_) " if tags else "" + + summary = re.sub(r" *\n *", " ", (arg.get("summary") or "").strip()).rstrip(".") + default = arg.get("default") + default_str = f" Default: `{default}`." if default is not None else "" + + rows.append([ + f"`--{arg['arg_name']}`", + f"`{arg.get('type', '')}`", + f"{tag_str}{summary}.{default_str}", + ]) + + return format_markdown_table(["Name", "Type", "Description"], rows, col_widths=[25, 8, 60]) diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py new file mode 100644 index 0000000..ecab02b --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py @@ -0,0 +1,211 @@ +from __future__ import annotations +import re + +ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"] +SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"] + + +def render_file_format(spec: dict | str) -> str: + """Render a file format spec as a Quarto/GFM markdown section. + + Args: + spec: Either a ``file_*.yaml`` path or a dict returned by + :func:`read_file_format`. + + Returns: + A markdown string with a ``## File format:`` heading, summary, + example path, description, and a format/data-structure table. + """ + if isinstance(spec, str): + from .read_file_format import read_file_format + spec = read_file_format(spec) + + info = spec["info"] + label = info.get("label") or "" + summary = (info.get("summary") or "").strip() + description = (info.get("description") or "").strip() + example = info.get("example") + + example_str = f"Example file: `{example}`" if example else "" + description_str = f"Description:\n\n{description}" if description else "" + + expected_format = spec.get("expected_format") + expected_format_str = "" + if expected_format: + format_example_lines = _render_format_example(spec) + format_table_lines = _render_format_table(spec) + expected_format_str = "\n".join([ + "Format:", + "", + ":::{.small}", + *format_example_lines, + ":::", + "", + "Data structure:", + "", + ":::{.small}", + *format_table_lines, + ":::", + ]) + + parts = [ + f"## File format: {label}", + "", + summary, + "", + example_str, + "", + description_str, + "", + expected_format_str, + ] + + # Trim trailing blank lines, keep one trailing newline + while parts and parts[-1] == "": + parts.pop() + return "\n".join(parts) + "\n" + + +def _render_format_example(spec: dict) -> list[str]: + fmt_type = spec["info"].get("file_type") + expected_format = spec.get("expected_format") or [] + + if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"): + structs: dict[str, list[str]] = {} + for row in expected_format: + structs.setdefault(row["struct"], []).append(f"'{row['name']}'") + lines = [" AnnData object"] + for struct_name in ANNDATA_STRUCT_NAMES: + if struct_name in structs: + lines.append(f" {struct_name}: {', '.join(structs[struct_name])}") + return lines + + if fmt_type in ("csv", "tsv", "parquet"): + names = ", ".join(f"'{row['name']}'" for row in expected_format) + return [" Tabular data", f" {names}"] + + if fmt_type in ("json", "yaml"): + names = ", ".join(f"'{row['name']}'" for row in expected_format) + ext = fmt_type.upper() + return [f" {ext} object", f" {names}"] + + if fmt_type == "spatialdata_zarr": + by_category: dict[str, list[str]] = {} + for row in expected_format: + by_category.setdefault(row["category"], []).append(f"'{row['name']}'") + lines = [" SpatialData object"] + for cat in SPATIALDATA_ELEMENT_CATEGORIES: + if cat in by_category: + lines.append(f" {cat}: {', '.join(by_category[cat])}") + return lines + + return [""] + + +def _render_format_table(spec: dict) -> list[str]: + from ._markdown import format_markdown_table + + fmt_type = spec["info"].get("file_type") + expected_format = spec.get("expected_format") or [] + + def _tag_str(row: dict) -> str: + tags = [] + if not row.get("required", True): + tags.append("Optional") + return f"(_{', '.join(tags)}_) " if tags else "" + + def _clean_desc(row: dict) -> str: + desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip(".") + return desc + + if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"): + rows = [ + [ + f'`{row["struct"]}["{row["name"]}"]`', + f'`{row.get("type", "")}`', + f"{_tag_str(row)}{_clean_desc(row)}.", + ] + for row in expected_format + ] + return [format_markdown_table(["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60])] + + if fmt_type in ("csv", "tsv", "parquet"): + rows = [ + [ + f'`{row["name"]}`', + f'`{row.get("type", "")}`', + f"{_tag_str(row)}{_clean_desc(row)}.", + ] + for row in expected_format + ] + return [format_markdown_table(["Column", "Type", "Description"], rows, col_widths=[25, 8, 60])] + + if fmt_type in ("json", "yaml"): + rows = [ + [ + f'`{row["name"]}`', + f'`{row.get("type", "")}`', + f"{_tag_str(row)}{_clean_desc(row)}.", + ] + for row in expected_format + ] + return [format_markdown_table(["Key", "Type", "Description"], rows, col_widths=[25, 8, 60])] + + if fmt_type == "spatialdata_zarr": + lines = [] + by_category: dict[str, list[dict]] = {} + for row in expected_format: + by_category.setdefault(row["category"], []).append(row) + + for cat in SPATIALDATA_ELEMENT_CATEGORIES: + elements = by_category.get(cat) + if not elements: + continue + lines.append(f"*{cat}*") + lines.append("") + + if cat in ("images", "labels", "coordinate_systems"): + elem_rows = [ + [f'`{e["name"]}`', f"{_tag_str(e)}{_clean_desc(e)}."] + for e in elements + ] + lines.append(format_markdown_table(["Name", "Description"], elem_rows, col_widths=[25, 68])) + + elif cat in ("points", "shapes"): + for elem in elements: + lines.append(f"`{elem['name']}`: {_clean_desc(elem)}.") + lines.append("") + col_rows = [ + [ + f'`{c["name"]}`', + f'`{c.get("type", "")}`', + f"{_tag_str(c)}{re.sub(r' *\n *', ' ', (c.get('description') or '').strip()).rstrip('.')}.", + ] + for c in (elem.get("columns") or []) + ] + if col_rows: + lines.append(format_markdown_table(["Column", "Type", "Description"], col_rows, col_widths=[25, 8, 60])) + + elif cat == "tables": + for elem in elements: + lines.append(f"`{elem['name']}`: {_clean_desc(elem)}.") + lines.append("") + slot_rows = [ + [ + f'`{s["struct"]}["{s["name"]}"]`', + f'`{s.get("type", "")}`', + f"{_tag_str(s)}{re.sub(r' *\n *', ' ', (s.get('description') or '').strip()).rstrip('.')}.", + ] + for s in (elem.get("anndata_slots") or []) + ] + if slot_rows: + lines.append(format_markdown_table(["Slot", "Type", "Description"], slot_rows, col_widths=[25, 8, 60])) + + lines.append("") + + # remove trailing blank line + while lines and lines[-1] == "": + lines.pop() + return lines + + return [""] diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py new file mode 100644 index 0000000..4c94c3e --- /dev/null +++ b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py @@ -0,0 +1,210 @@ +from __future__ import annotations +import re + + +def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = False) -> str: + """Render the ``README.qmd`` for a task. + + Args: + task_metadata: Either a path to the task/API directory or a metadata + dict returned by :func:`read_task_metadata`. + add_instructions: When ``True``, prepend installation and usage + instructions (off by default). + + Returns: + A Quarto markdown string suitable for writing to ``README.qmd``. + """ + if isinstance(task_metadata, str): + from .read_task_metadata import read_task_metadata + task_metadata = read_task_metadata(task_metadata) + + proj_conf = task_metadata["proj_conf"] + label = proj_conf.get("label", "") + summary = (proj_conf.get("summary") or "").strip() + description = (proj_conf.get("description") or "").strip() + + repository_url = (proj_conf.get("links") or {}).get("repository", "") + repo_match = re.search(r"https://github\.com/([^/]+/[^/]+)", repository_url) + repository_name = repo_match.group(1) if repo_match else repository_url + + authors_str = _render_authors(task_metadata) + task_graph = _render_task_graph(task_metadata) + task_api_parts = _render_task_parts(task_metadata) + instructions = _render_instructions(task_metadata) if add_instructions else "" + + lines = [ + "---", + f'title: "{label}"', + "format: gfm", + "---", + "", + "", + "", + summary, + "", + f"Repository: [{repository_name}]({repository_url})", + "", + ] + + if instructions: + lines += [instructions, ""] + + lines += [ + "## Description", + "", + description, + "", + authors_str, + "", + "## API", + "", + task_graph, + "", + "\n\n".join(task_api_parts), + ] + + return "\n".join(lines) + + +def _render_authors(task_metadata: dict) -> str: + from ._markdown import format_markdown_table + + authors = task_metadata["proj_conf"].get("authors") or [] + if not authors: + return "" + + # Collect columns: name, roles, then any info keys + all_keys: list[str] = ["name", "roles"] + for aut in authors: + for key in (aut.get("info") or {}): + if key not in all_keys: + all_keys.append(key) + + rows = [] + for aut in authors: + info = aut.get("info") or {} + roles = aut.get("roles") or [] + roles_str = ", ".join(roles) if isinstance(roles, list) else str(roles) + row = [aut.get("name", ""), roles_str] + [info.get(k, "") for k in all_keys[2:]] + rows.append(row) + + headers = [k.capitalize() for k in all_keys] + table = format_markdown_table(headers, rows) + return "\n## Authors & contributors\n\n" + table + "\n" + + +def _render_task_graph(task_metadata: dict) -> str: + G = task_metadata["task_graph"] + order = task_metadata["task_graph_order"] + repository_url = (task_metadata["proj_conf"].get("links") or {}).get("repository") + + def clean_id(node_id: str) -> str: + return node_id.replace("graph", "graaf") + + def make_label(node_id: str, label: str, is_comp: bool) -> str: + if not repository_url: + return label + slug = re.sub(r"[^a-z0-9]", "-", label.lower()) + anchor = f"component-type-{slug}" if is_comp else f"file-format-{slug}" + return f"{label}" + + node_order = {name: i for i, name in enumerate(order)} + + sorted_nodes = sorted(G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order))) + node_lines = [] + for node_id, attrs in sorted_nodes: + label = make_label(node_id, attrs.get("label", node_id), attrs.get("is_comp", False)) + cid = clean_id(node_id) + if attrs.get("is_comp", False): + node_lines.append(f' {cid}[/"{label}"/]') + else: + node_lines.append(f' {cid}("{label}")') + + sorted_edges = sorted( + G.edges(data=True), + key=lambda e: (node_order.get(e[0], len(order)), node_order.get(e[1], len(order))), + ) + edge_lines = [] + for from_node, to_node, attrs in sorted_edges: + from_to = attrs.get("from_to", "comp_to_file") + required = attrs.get("required", True) + if from_to == "file_to_comp": + edge_type = "---" if required else "-.-" + else: + edge_type = "-->" if required else ".->" + edge_lines.append(f" {clean_id(from_node)}{edge_type}{clean_id(to_node)}") + + return "\n".join([ + "```mermaid", + "flowchart TB", + *node_lines, + *edge_lines, + "```", + ]) + + +def _render_task_parts(task_metadata: dict) -> list[str]: + from .render_component_spec import render_component_spec + from .render_file_format import render_file_format + + parts = [] + for name in task_metadata["task_graph_order"]: + if name in task_metadata["comps"]: + parts.append(render_component_spec(task_metadata["comps"][name])) + elif name in task_metadata["files"]: + parts.append(render_file_format(task_metadata["files"][name])) + return parts + + +def _render_instructions(task_metadata: dict) -> str: + proj_name = task_metadata["proj_conf"].get("name", "") + return "\n".join([ + "### Installation", + "", + "You need to have Docker, Java, and Viash installed. Follow", + "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)", + "to install the required dependencies.", + "", + "### Add a method", + "", + "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.", + "", + "### Initial setup", + "", + "To get started, you can run the following commands:", + "", + "```bash", + f"git clone git@github.com:openproblems-bio/{proj_name}.git", + "", + f"cd {proj_name}", + "", + "# initialise submodule", + "scripts/init_submodule.sh", + "", + "# download resources", + "scripts/download_resources.sh", + "```", + "", + "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:", + "", + "```bash", + "viash ns build --parallel --setup cachedbuild", + "", + "scripts/run_benchmark.sh", + "```", + "", + "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:", + "", + "```bash", + "viash ns test --parallel", + "```", + "", + "Optionally, you can provide the `--query` argument to test only a subset of components:", + "", + "```bash", + "viash ns test --parallel --query 'component_name'", + "```", + ]) diff --git a/packages/python/openproblems/tests/data/example_project/_viash.yaml b/packages/python/openproblems/tests/data/example_project/_viash.yaml new file mode 100644 index 0000000..c171bca --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/_viash.yaml @@ -0,0 +1,88 @@ +viash_version: 0.9.4 + +# Step 1: Change the name of the task. +# example: task_name_of_this_task +name: task_template +organization: openproblems-bio +version: dev + +license: MIT +# Step 2: Add keywords to describe the task. +keywords: [single-cell, openproblems, benchmark] +# Step 3: Update the `task_template` to the name of the task from step 1. +links: + issue_tracker: https://github.com/openproblems-bio/task_template/issues + repository: https://github.com/openproblems-bio/task_template + docker_registry: ghcr.io + + +# Step 4: Update the label, summary and description. +# A unique, human-readable, short label. Used for creating summary tables and visualisations. +label: Template +summary: A one sentence summary of purpose and methodology. Used for creating an overview tables. +description: | + Provide a clear and concise description of your task, detailing the specific problem it aims + to solve. Outline the input data types, the expected output, and any assumptions or constraints. + Be sure to explain any terminology or concepts that are essential for understanding the task. + + Explain the motivation behind your proposed task. Describe the biological or computational + problem you aim to address and why it's important. Discuss the current state of research in + this area and any gaps or challenges that your task could help address. This section + should convince readers of the significance and relevance of your task. + +# A list of references to relevant literature. Each reference should be a DOI or a bibtex entry +references: + doi: + - 10.21203/rs.3.rs-4181617/v1 + # bibtex: + # - | + # @article{doe_2021_template, + # doi = {10.21203/rs.3.rs-4181617/v1}, + # url = {https://doi.org/10.21203/rs.3.rs-4181617/v1}, + # author = {Doe, John}, + # title = {A template for creating new tasks}, + # publisher = {Research Square}, + # year = {2021}, + # } + +info: + image: The name of the image file to use for the component on the website. + # Step 5: Replace the task_template to the name of the task. + test_resources: + - type: s3 + path: s3://openproblems-data/resources_test/common/ + dest: resources_test/common + - type: s3 + path: s3://openproblems-data/resources_test/task_template/ + dest: resources_test/task_template + +# Step 6: Update the authors of the task. +authors: + # Full name of the author, usually in the name of FirstName MiddleName LastName. + - name: John Doe + # Role of the author. Possible values: + # + # * `"author"`: Authors who have made substantial contributions to the component. + # * `"maintainer"`: The maintainer of the component. + # * `"contributor"`: Authors who have made smaller contributions (such as code patches etc.). + roles: [ "author", "maintainer" ] + # Additional information on the author + info: + github: johndoe + orcid: 0000-0000-0000-0000 + email: john@doe.me + twitter: johndoe + linkedin: johndoe + +# Step 7: Remove all of the comments of the steps you completed + +config_mods: | + .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" } + +repositories: + - name: openproblems + type: github + repo: openproblems-bio/openproblems + tag: build/main # Step 8: Set this to the latest release + +# Step 9: High five yourself! diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml new file mode 100644 index 0000000..f637aed --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/comp_control_method.yaml @@ -0,0 +1,37 @@ +namespace: control_methods +info: + type: control_method + type_info: + label: Control Method + summary: Quality control methods for verifying the pipeline. + description: | + This folder contains control components for the task. + These components have the same interface as the regular methods + but also receive the solution object as input. It serves as a + starting point to test the relative accuracy of new methods in + the task, and also as a quality control for the metrics defined + in the task. +arguments: + - name: --input_train + __merge__: file_train.yaml + required: true + direction: input + - name: --input_test + __merge__: file_test.yaml + required: true + direction: input + - name: "--input_solution" + __merge__: file_solution.yaml + direction: input + required: true + - name: --output + __merge__: file_prediction.yaml + required: true + direction: output +test_resources: + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_config.py + - path: /resources_test/task_template/cxg_mouse_pancreas_atlas + dest: resources_test/task_template/cxg_mouse_pancreas_atlas \ No newline at end of file diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml new file mode 100644 index 0000000..1ed53bd --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/comp_data_processor.yaml @@ -0,0 +1,31 @@ +namespace: "data_processors" +info: + type: data_processor + type_info: + label: Data processor + summary: A data processor. + description: | + A component for processing a Common Dataset into a task-specific dataset. +arguments: + - name: "--input" + __merge__: file_common_dataset.yaml + direction: input + required: true + - name: "--output_train" + __merge__: file_train.yaml + direction: output + required: true + - name: "--output_test" + __merge__: file_test.yaml + direction: output + required: true + - name: "--output_solution" + __merge__: file_solution.yaml + direction: output + required: true +test_resources: + - path: /resources_test/common/cxg_mouse_pancreas_atlas + dest: resources_test/common/cxg_mouse_pancreas_atlas + - type: python_script + path: /common/component_tests/run_and_check_output.py + diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml new file mode 100644 index 0000000..3a93846 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/comp_method.yaml @@ -0,0 +1,28 @@ +namespace: "methods" +info: + type: method + type_info: + label: Method + summary: A method. + description: | + A method to predict the task effects. +arguments: + - name: --input_train + __merge__: file_train.yaml + required: true + direction: input + - name: "--input_test" + __merge__: file_test.yaml + direction: input + required: true + - name: --output + __merge__: file_prediction.yaml + required: true + direction: output +test_resources: + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_config.py + - path: /resources_test/task_template/cxg_mouse_pancreas_atlas + dest: resources_test/task_template/cxg_mouse_pancreas_atlas \ No newline at end of file diff --git a/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml b/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml new file mode 100644 index 0000000..1c76a3d --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/comp_metric.yaml @@ -0,0 +1,28 @@ +namespace: "metrics" +info: + type: metric + type_info: + label: Metric + summary: A task template metric. + description: | + A metric for evaluating method predictions. +arguments: + - name: "--input_solution" + __merge__: file_solution.yaml + direction: input + required: true + - name: "--input_prediction" + __merge__: file_prediction.yaml + direction: input + required: true + - name: "--output" + __merge__: file_score.yaml + direction: output + required: true +test_resources: + - type: python_script + path: /common/component_tests/run_and_check_output.py + - type: python_script + path: /common/component_tests/check_config.py + - path: /resources_test/task_template/cxg_mouse_pancreas_atlas + dest: resources_test/task_template/cxg_mouse_pancreas_atlas diff --git a/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml b/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml new file mode 100644 index 0000000..e8a74a0 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_common_dataset.yaml @@ -0,0 +1,72 @@ +type: file +example: "resources_test/common/cxg_mouse_pancreas_atlas/dataset.h5ad" +label: "Common Dataset" +summary: A subset of the common dataset. +info: + format: + type: h5ad + layers: + - type: integer + name: counts + description: Raw counts + required: true + - type: double + name: normalized + description: Normalized expression values + required: true + obs: + - type: string + name: cell_type + description: Cell type information + required: true + - type: string + name: batch + description: Batch information + required: true + var: + - type: boolean + name: hvg + description: Whether or not the feature is considered to be a 'highly variable gene' + required: true + - type: double + name: hvg_score + description: A ranking of the features by hvg. + required: true + obsm: + - type: double + name: X_pca + description: The resulting PCA embedding. + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - name: dataset_name + type: string + description: Nicely formatted name. + required: true + - type: string + name: dataset_url + description: Link to the original source of the dataset. + required: false + - name: dataset_reference + type: string + description: Bibtex reference of the paper in which the dataset was published. + required: false + - name: dataset_summary + type: string + description: Short description of the dataset. + required: true + - name: dataset_description + type: string + description: Long description of the dataset. + required: true + - name: dataset_organism + type: string + description: The organism of the sample in the dataset. + required: false + - type: string + name: normalization_id + description: "Which normalization was used" + required: true diff --git a/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml b/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml new file mode 100644 index 0000000..26068ab --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_prediction.yaml @@ -0,0 +1,26 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/task_template/cxg_mouse_pancreas_atlas/prediction.h5ad" +label: "Predicted data" +summary: A predicted dataset as output by a method. +info: + format: + type: h5ad + obs: + - type: string + name: label_pred + description: Predicted labels for the test cells. + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: normalization_id + description: "Which normalization was used" + required: true + - type: string + name: method_id + description: "A unique identifier for the method" + required: true \ No newline at end of file diff --git a/packages/python/openproblems/tests/data/example_project/api/file_score.yaml b/packages/python/openproblems/tests/data/example_project/api/file_score.yaml new file mode 100644 index 0000000..8bdad65 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_score.yaml @@ -0,0 +1,30 @@ +type: file +example: "resources_test/task_template/cxg_mouse_pancreas_atlas/score.h5ad" +label: Score +summary: "File indicating the score of a metric." +info: + format: + type: h5ad + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: normalization_id + description: "Which normalization was used" + required: true + - type: string + name: method_id + description: "A unique identifier for the method" + required: true + - type: string + name: metric_ids + description: "One or more unique metric identifiers" + multiple: true + required: true + - type: double + name: metric_values + description: "The metric values obtained for the given prediction. Must be of same length as 'metric_ids'." + multiple: true + required: true \ No newline at end of file diff --git a/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml b/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml new file mode 100644 index 0000000..d2f6200 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_solution.yaml @@ -0,0 +1,73 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/task_template/cxg_mouse_pancreas_atlas/solution.h5ad" +label: "Solution" +summary: "The solution for the test data" +info: + format: + type: h5ad + layers: + - type: integer + name: counts + description: Raw counts + required: true + - type: double + name: normalized + description: Normalized counts + required: true + obs: + - type: string + name: label + description: Ground truth cell type labels + required: true + - type: string + name: batch + description: Batch information + required: true + var: + - type: boolean + name: hvg + description: Whether or not the feature is considered to be a 'highly variable gene' + required: true + - type: double + name: hvg_score + description: A ranking of the features by hvg. + required: true + obsm: + - type: double + name: X_pca + description: The resulting PCA embedding. + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - name: dataset_name + type: string + description: Nicely formatted name. + required: true + - type: string + name: dataset_url + description: Link to the original source of the dataset. + required: false + - name: dataset_reference + type: string + description: Bibtex reference of the paper in which the dataset was published. + required: false + - name: dataset_summary + type: string + description: Short description of the dataset. + required: true + - name: dataset_description + type: string + description: Long description of the dataset. + required: true + - name: dataset_organism + type: string + description: The organism of the sample in the dataset. + required: false + - type: string + name: normalization_id + description: "Which normalization was used" + required: true diff --git a/packages/python/openproblems/tests/data/example_project/api/file_test.yaml b/packages/python/openproblems/tests/data/example_project/api/file_test.yaml new file mode 100644 index 0000000..cb9d9a6 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_test.yaml @@ -0,0 +1,45 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/task_template/cxg_mouse_pancreas_atlas/test.h5ad" +label: "Test data" +summary: The subset of molecules used for the test dataset +info: + format: + type: h5ad + layers: + - type: integer + name: counts + description: Raw counts + required: true + - type: double + name: normalized + description: Normalized counts + required: true + obs: + - type: string + name: batch + description: Batch information + required: true + var: + - type: boolean + name: hvg + description: Whether or not the feature is considered to be a 'highly variable gene' + required: true + - type: double + name: hvg_score + description: A ranking of the features by hvg. + required: true + obsm: + - type: double + name: X_pca + description: The resulting PCA embedding. + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: normalization_id + description: "Which normalization was used" + required: true \ No newline at end of file diff --git a/packages/python/openproblems/tests/data/example_project/api/file_train.yaml b/packages/python/openproblems/tests/data/example_project/api/file_train.yaml new file mode 100644 index 0000000..c01eda5 --- /dev/null +++ b/packages/python/openproblems/tests/data/example_project/api/file_train.yaml @@ -0,0 +1,49 @@ +#TODO: Change to the required and/or optional fields of the anndata +type: file +example: "resources_test/task_template/cxg_mouse_pancreas_atlas/train.h5ad" +label: "Training data" +summary: "The training data in h5ad format" +info: + format: + type: h5ad + layers: + - type: integer + name: counts + description: Raw counts + required: true + - type: double + name: normalized + description: Normalized counts + required: true + obs: + - type: string + name: label + description: Ground truth cell type labels + required: true + - type: string + name: batch + description: Batch information + required: true + var: + - type: boolean + name: hvg + description: Whether or not the feature is considered to be a 'highly variable gene' + required: true + - type: double + name: hvg_score + description: A ranking of the features by hvg. + required: true + obsm: + - type: double + name: X_pca + description: The resulting PCA embedding. + required: true + uns: + - type: string + name: dataset_id + description: "A unique identifier for the dataset" + required: true + - type: string + name: normalization_id + description: "Which normalization was used" + required: true \ No newline at end of file diff --git a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py new file mode 100644 index 0000000..2ad4fc8 --- /dev/null +++ b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py @@ -0,0 +1,78 @@ +import os +import pytest + +EXAMPLE_PROJECT = os.path.normpath(os.path.join( + os.path.dirname(__file__), + "data/example_project", +)) + + +@pytest.fixture(scope="module") +def task_metadata(): + from openproblems.project.docs import read_task_metadata + return read_task_metadata(EXAMPLE_PROJECT) + + +def test_read_task_metadata_keys(task_metadata): + for key in ("proj_path", "proj_conf", "files", "comps", "task_graph", "task_graph_order"): + assert key in task_metadata + + +def test_read_task_metadata_graph_nodes(task_metadata): + G = task_metadata["task_graph"] + assert "comp_method" in G.nodes + assert "comp_metric" in G.nodes + assert "file_train" in G.nodes + assert "file_prediction" in G.nodes + + +def test_read_task_metadata_graph_edges(task_metadata): + G = task_metadata["task_graph"] + # file -> comp (input) + assert G.has_edge("file_train", "comp_method") + # comp -> file (output) + assert G.has_edge("comp_method", "file_prediction") + + +def test_render_task_readme_qmd_structure(task_metadata): + from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) + + assert '---\ntitle: "Template"\nformat: gfm\n---' in result + assert "## Description" in result + assert "## Authors & contributors" in result + assert "## API" in result + assert "```mermaid" in result + assert "flowchart TB" in result + assert "```" in result + + +def test_render_task_readme_qmd_components(task_metadata): + from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) + + assert "## Component type: Method" in result + assert "## Component type: Metric" in result + + +def test_render_task_readme_qmd_file_formats(task_metadata): + from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) + + assert "## File format: Training data" in result + assert "## File format: Predicted data" in result + + +def test_render_task_readme_qmd_instructions(task_metadata): + from openproblems.project import render_task_readme_qmd + without = render_task_readme_qmd(task_metadata, add_instructions=False) + with_inst = render_task_readme_qmd(task_metadata, add_instructions=True) + + assert "### Installation" not in without + assert "### Installation" in with_inst + + +def test_render_task_readme_qmd_from_path(): + from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(EXAMPLE_PROJECT) + assert "## API" in result diff --git a/schemas/api_file_format.yaml b/schemas/api_file_format.yaml index 26cce7d..eb9b793 100644 --- a/schemas/api_file_format.yaml +++ b/schemas/api_file_format.yaml @@ -25,7 +25,11 @@ properties: format: oneOf: - $ref: "schema_openproblems_definitions.yaml#/definitions/H5ADFormat" + - $ref: "schema_openproblems_definitions.yaml#/definitions/AnnDataHDF5Format" + - $ref: "schema_openproblems_definitions.yaml#/definitions/AnnDataZarrFormat" - $ref: "schema_openproblems_definitions.yaml#/definitions/CSVFormat" - $ref: "schema_openproblems_definitions.yaml#/definitions/TSVFormat" - $ref: "schema_openproblems_definitions.yaml#/definitions/ParquetFormat" + - $ref: "schema_openproblems_definitions.yaml#/definitions/JSONFormat" + - $ref: "schema_openproblems_definitions.yaml#/definitions/YAMLFormat" - $ref: "schema_openproblems_definitions.yaml#/definitions/SpatialDataZarrFormat" diff --git a/schemas/schema_openproblems_definitions.yaml b/schemas/schema_openproblems_definitions.yaml index 46e861a..7762f25 100644 --- a/schemas/schema_openproblems_definitions.yaml +++ b/schemas/schema_openproblems_definitions.yaml @@ -236,6 +236,86 @@ definitions: additionalProperties: false allOf: - $ref: "#/definitions/AnnDataObject" + AnnDataHDF5Format: + type: object + properties: + type: + const: anndata_hdf5 + description: The file format. + X: + type: object + layers: + type: array + obs: + type: array + obsm: + type: array + obsp: + type: array + var: + type: array + varm: + type: array + varp: + type: array + uns: + type: array + required: [type] + additionalProperties: false + allOf: + - $ref: "#/definitions/AnnDataObject" + AnnDataZarrFormat: + type: object + properties: + type: + const: anndata_zarr + description: The file format. + X: + type: object + layers: + type: array + obs: + type: array + obsm: + type: array + obsp: + type: array + var: + type: array + varm: + type: array + varp: + type: array + uns: + type: array + required: [type] + additionalProperties: false + allOf: + - $ref: "#/definitions/AnnDataObject" + JSONFormat: + type: object + required: [type] + additionalProperties: false + properties: + type: + const: json + description: The file format. + keys: + type: array + items: + $ref: "#/definitions/DataTypeSpec" + YAMLFormat: + type: object + required: [type] + additionalProperties: false + properties: + type: + const: yaml + description: The file format. + keys: + type: array + items: + $ref: "#/definitions/DataTypeSpec" CSVFormat: type: object required: [type] From 41fc732d449d72e2ce72b30040b473266297e14a Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 20 Apr 2026 14:18:04 +0200 Subject: [PATCH 2/6] apply black formatting and fix f-string syntax error --- .../openproblems/src/openproblems/__init__.py | 4 +- .../project/component_tests/check_config.py | 58 ++++++-- .../component_tests/run_and_check_output.py | 42 +++--- .../openproblems/project/docs/_markdown.py | 9 +- .../project/docs/read_component_spec.py | 33 +++-- .../project/docs/read_file_format.py | 62 +++++--- .../project/docs/read_task_config.py | 1 + .../project/docs/render_component_spec.py | 17 ++- .../project/docs/render_file_format.py | 98 +++++++++---- .../project/docs/render_task_readme_qmd.py | 132 ++++++++++-------- .../openproblems/project/find_project_root.py | 7 +- .../openproblems/project/read_nested_yaml.py | 33 +++-- .../openproblems/project/read_viash_config.py | 13 +- .../src/openproblems/project/resolve_path.py | 8 +- .../src/openproblems/utils/__init__.py | 5 +- .../src/openproblems/utils/strip_margin.py | 37 ++--- .../tests/test_docs_render_task_readme_qmd.py | 25 +++- .../tests/test_project_find_project_root.py | 17 +-- 18 files changed, 383 insertions(+), 218 deletions(-) diff --git a/packages/python/openproblems/src/openproblems/__init__.py b/packages/python/openproblems/src/openproblems/__init__.py index 84bba52..ca915bb 100644 --- a/packages/python/openproblems/src/openproblems/__init__.py +++ b/packages/python/openproblems/src/openproblems/__init__.py @@ -2,6 +2,6 @@ from . import utils __all__ = [ - "project", - "utils", + "project", + "utils", ] diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py index dd45b49..181fcfa 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py @@ -37,13 +37,17 @@ def check_references(references: Dict[str, Union[str, List[str]]]) -> None: doi = references.get("doi") bibtex = references.get("bibtex") - assert doi or bibtex, "One of .references.doi or .references.bibtex should be defined" + assert ( + doi or bibtex + ), "One of .references.doi or .references.bibtex should be defined" if doi: if not isinstance(doi, list): doi = [doi] for d in doi: - assert re.match(r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d), f"Invalid DOI format: {doi}" + assert re.match( + r"^10.\d{4,9}/[-._;()/:A-Za-z0-9]+$", d + ), f"Invalid DOI format: {doi}" assert check_url(f"https://doi.org/{d}"), f"DOI '{d}' is not reachable" if bibtex: @@ -53,7 +57,9 @@ def check_references(references: Dict[str, Union[str, List[str]]]) -> None: assert re.match(r"^@.*{.*", b), f"Invalid bibtex format: {b}" -def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = []) -> None: +def check_links( + links: Dict[str, Union[str, List[str]]], required: List[str] = [] +) -> None: if not links: return @@ -62,7 +68,9 @@ def check_links(links: Dict[str, Union[str, List[str]]], required: List[str] = [ for link_type, link in links.items(): if link_type != "docker_registry": - assert check_url(link), f"Link .links.{link_type} URL '{link}' is not reachable" + assert check_url( + link + ), f"Link .links.{link_type} URL '{link}' is not reachable" def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None: @@ -79,7 +87,9 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None: value = this_config.get(field) or value assert value, f"Metadata field '{field}' is not defined" assert "FILL IN:" not in value, f"Metadata field '{field}' not filled in" - assert len(value) <= max_length, f"Metadata field '{field}' should not exceed {max_length} characters" + assert ( + len(value) <= max_length + ), f"Metadata field '{field}' should not exceed {max_length} characters" links = this_info.get("links") or this_config.get("links") or {} required_links: List[str] = [] @@ -117,7 +127,9 @@ def run_check_config(meta: dict) -> None: print("Check .info.type", flush=True) expected_types = ["method", "control_method", "metric"] - assert comp_type in expected_types, ".info.type should be equal to 'method' or 'control_method'" + assert ( + comp_type in expected_types + ), ".info.type should be equal to 'method' or 'control_method'" print("Check component metadata", flush=True) if comp_type == "metric": @@ -130,14 +142,26 @@ def run_check_config(meta: dict) -> None: if "preferred_normalization" in info: print("Checking contents of .info.preferred_normalization", flush=True) - norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"] + norm_methods = [ + "log_cpm", + "log_cp10k", + "counts", + "log_scran_pooling", + "sqrt_cpm", + "sqrt_cp10k", + "l1_sqrt", + ] assert info["preferred_normalization"] in norm_methods, ( - ".info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'." + ".info['preferred_normalization'] not one of '" + + "', '".join(norm_methods) + + "'." ) if "variants" in info: print("Checking contents of .info.variants", flush=True) - arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + ["preferred_normalization"] + arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + [ + "preferred_normalization" + ] for paramset_id, paramset in info["variants"].items(): if paramset: for arg_id in paramset: @@ -155,12 +179,20 @@ def run_check_config(meta: dict) -> None: ) assert nextflow_runner, ".runners does not contain a nextflow runner" - assert nextflow_runner.get("directives"), "directives not a field in nextflow runner" + assert nextflow_runner.get( + "directives" + ), "directives not a field in nextflow runner" nextflow_labels = nextflow_runner["directives"].get("label") assert nextflow_labels, "label not a field in nextflow runner directives" - assert [label for label in nextflow_labels if label in TIME_LABELS], "time label not filled in" - assert [label for label in nextflow_labels if label in MEM_LABELS], "mem label not filled in" - assert [label for label in nextflow_labels if label in CPU_LABELS], "cpu label not filled in" + assert [ + label for label in nextflow_labels if label in TIME_LABELS + ], "time label not filled in" + assert [ + label for label in nextflow_labels if label in MEM_LABELS + ], "mem label not filled in" + assert [ + label for label in nextflow_labels if label in CPU_LABELS + ], "cpu label not filled in" print("All checks succeeded!", flush=True) diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py index 3484abe..2c96cb3 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py @@ -9,7 +9,9 @@ def run_component(cmd: list) -> None: print(">> Running script as test", flush=True) out = subprocess.run(cmd) - assert out.returncode == 0, f"Script exited with an error. Return code: {out.returncode}" + assert ( + out.returncode == 0 + ), f"Script exited with an error. Return code: {out.returncode}" def check_input_files(arguments: list) -> None: @@ -19,9 +21,9 @@ def check_input_files(arguments: list) -> None: print(">> Checking whether input files exist", flush=True) for arg in arguments: if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]: - assert not arg["must_exist"] or path.exists(arg["value"]), ( - f"Input file '{arg['value']}' does not exist" - ) + assert not arg["must_exist"] or path.exists( + arg["value"] + ), f"Input file '{arg['value']}' does not exist" def check_output_files(arguments: list) -> None: @@ -31,9 +33,9 @@ def check_output_files(arguments: list) -> None: print(">> Checking whether output file exists", flush=True) for arg in arguments: if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]: - assert not arg["must_exist"] or path.exists(arg["value"]), ( - f"Output file '{arg['value']}' does not exist" - ) + assert not arg["must_exist"] or path.exists( + arg["value"] + ), f"Output file '{arg['value']}' does not exist" print(">> Reading output files and checking formats", flush=True) for arg in arguments: @@ -134,9 +136,9 @@ def check_anndata(adata, format_spec: dict, label: str = "") -> None: else: for item in items: if item.get("required", True): - assert item["name"] in struc_x, ( - f"{label} is missing slot .{struc_name}['{item['name']}']" - ) + assert ( + item["name"] in struc_x + ), f"{label} is missing slot .{struc_name}['{item['name']}']" def check_dataframe(df, columns: list, label: str = "") -> None: @@ -145,7 +147,9 @@ def check_dataframe(df, columns: list, label: str = "") -> None: """ for item in columns: if item.get("required", True): - assert item["name"] in df.columns, f"{label} is missing column '{item['name']}'" + assert ( + item["name"] in df.columns + ), f"{label} is missing column '{item['name']}'" def check_dictionary(data, arg: dict) -> None: @@ -157,9 +161,9 @@ def check_dictionary(data, arg: dict) -> None: arg_keys = arg_format.get("keys") or arg_info.get("keys") or [] for item in arg_keys: if item.get("required", True): - assert isinstance(data, dict) and item["name"] in data, ( - f"File '{arg['value']}' is missing key '{item['name']}'" - ) + assert ( + isinstance(data, dict) and item["name"] in data + ), f"File '{arg['value']}' is missing key '{item['name']}'" def check_spatialdata(sdata, arg: dict) -> None: @@ -175,9 +179,9 @@ def check_spatialdata(sdata, arg: dict) -> None: category_store = getattr(sdata, category, {}) for item in items: if item.get("required", True): - assert item["name"] in category_store, ( - f"File '{arg['value']}' is missing {category}['{item['name']}']" - ) + assert ( + item["name"] in category_store + ), f"File '{arg['value']}' is missing {category}['{item['name']}']" elem_name = item["name"] if elem_name not in category_store: @@ -191,7 +195,9 @@ def check_spatialdata(sdata, arg: dict) -> None: f"File '{arg['value']}' {category}['{elem_name}']", ) elif category == "tables": - check_anndata(element, item, f"File '{arg['value']}' tables['{elem_name}']") + check_anndata( + element, item, f"File '{arg['value']}' tables['{elem_name}']" + ) def get_argument_sets(config: dict, resources_dir: str) -> dict: diff --git a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py index a0dae31..7be1b7e 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/_markdown.py +++ b/packages/python/openproblems/src/openproblems/project/docs/_markdown.py @@ -18,11 +18,10 @@ def format_markdown_table(headers, rows, col_widths=None): if col_widths is not None: sep_line = "|" + "".join(f":{'-' * w}|" for w in col_widths) else: - sep_line = "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |" + sep_line = ( + "| " + " | ".join(f":{'-' * max(len(str(h)), 3)}" for h in headers) + " |" + ) - data_lines = [ - "| " + " | ".join(str(cell) for cell in row) + " |" - for row in rows - ] + data_lines = ["| " + " | ".join(str(cell) for cell in row) + " |" for row in rows] return "\n".join([header_line, sep_line] + data_lines) diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py index 47ea6f2..01dae58 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py +++ b/packages/python/openproblems/src/openproblems/project/docs/read_component_spec.py @@ -13,6 +13,7 @@ def read_component_spec(path: str) -> dict: A dict with keys ``info`` (dict) and ``args`` (list of dicts). """ from .. import read_nested_yaml + data = read_nested_yaml(path) return { "info": _process_info(data, path), @@ -52,24 +53,30 @@ def _process_arguments(data: dict, path: str) -> list[dict]: for arg in arguments: arg_info = arg.get("info") or {} merge_ref = arg.get("__merge__") - parent = re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None + parent = ( + re.sub(r"\.ya?ml$", "", os.path.basename(merge_ref)) if merge_ref else None + ) default = arg.get("default") example = arg.get("example") if isinstance(example, list): example = example[0] if example else None - result.append({ - "file_name": file_name, - "arg_name": re.sub(r"^-+", "", arg.get("name", "")), - "type": arg.get("type", ""), - "direction": arg.get("direction") or "input", - "required": bool(arg.get("required")) if arg.get("required") is not None else False, - "default": str(default) if default is not None else None, - "example": str(example) if example is not None else None, - "description": arg.get("description") or arg_info.get("description"), - "summary": arg.get("summary") or arg_info.get("summary"), - "parent": parent, - }) + result.append( + { + "file_name": file_name, + "arg_name": re.sub(r"^-+", "", arg.get("name", "")), + "type": arg.get("type", ""), + "direction": arg.get("direction") or "input", + "required": bool(arg.get("required")) + if arg.get("required") is not None + else False, + "default": str(default) if default is not None else None, + "example": str(example) if example is not None else None, + "description": arg.get("description") or arg_info.get("description"), + "summary": arg.get("summary") or arg_info.get("summary"), + "parent": parent, + } + ) return result diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py index 1d1fa05..99fec60 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py +++ b/packages/python/openproblems/src/openproblems/project/docs/read_file_format.py @@ -2,8 +2,25 @@ import os import re -ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"] -SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"] +ANNDATA_STRUCT_NAMES = [ + "X", + "obs", + "var", + "obsm", + "obsp", + "varm", + "varp", + "layers", + "uns", +] +SPATIALDATA_ELEMENT_CATEGORIES = [ + "images", + "labels", + "points", + "shapes", + "tables", + "coordinate_systems", +] def read_file_format(path: str) -> dict: @@ -17,6 +34,7 @@ def read_file_format(path: str) -> dict: (list of dicts) when the format type is known. """ from .. import read_nested_yaml + data = read_nested_yaml(path) out: dict = {"info": _process_info(data, path)} @@ -70,17 +88,19 @@ def _process_h5ad(data: dict, path: str, format_type: str) -> list[dict]: if not isinstance(fields, list): fields = [fields] for field in fields: - rows.append({ - "file_name": file_name, - "struct": struct_name, - "name": field.get("name", struct_name), - "type": field.get("type", ""), - "required": field.get("required", True), - "multiple": field.get("multiple", False), - "description": field.get("description"), - "summary": field.get("summary"), - "data_type": format_type, - }) + rows.append( + { + "file_name": file_name, + "struct": struct_name, + "name": field.get("name", struct_name), + "type": field.get("type", ""), + "required": field.get("required", True), + "multiple": field.get("multiple", False), + "description": field.get("description"), + "summary": field.get("summary"), + "data_type": format_type, + } + ) return rows @@ -155,13 +175,15 @@ def _process_spatialdata(data: dict, path: str) -> list[dict]: if not isinstance(fields, list): fields = [fields] for f in fields: - slots.append({ - "struct": struct_name, - "name": f.get("name", struct_name), - "type": f.get("type", ""), - "required": f.get("required", True), - "description": f.get("description"), - }) + slots.append( + { + "struct": struct_name, + "name": f.get("name", struct_name), + "type": f.get("type", ""), + "required": f.get("required", True), + "description": f.get("description"), + } + ) row["anndata_slots"] = slots rows.append(row) return rows diff --git a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py index 78d460a..8b6e226 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py +++ b/packages/python/openproblems/src/openproblems/project/docs/read_task_config.py @@ -8,4 +8,5 @@ def read_task_config(path): The parsed config as a dict. """ from .. import read_nested_yaml + return read_nested_yaml(path) diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py index 60360df..5e9719e 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py +++ b/packages/python/openproblems/src/openproblems/project/docs/render_component_spec.py @@ -15,6 +15,7 @@ def render_component_spec(spec: dict | str) -> str: """ if isinstance(spec, str): from .read_component_spec import read_component_spec + spec = read_component_spec(spec) info = spec["info"] @@ -55,10 +56,14 @@ def _format_arguments(args: list[dict]) -> str: default = arg.get("default") default_str = f" Default: `{default}`." if default is not None else "" - rows.append([ - f"`--{arg['arg_name']}`", - f"`{arg.get('type', '')}`", - f"{tag_str}{summary}.{default_str}", - ]) + rows.append( + [ + f"`--{arg['arg_name']}`", + f"`{arg.get('type', '')}`", + f"{tag_str}{summary}.{default_str}", + ] + ) - return format_markdown_table(["Name", "Type", "Description"], rows, col_widths=[25, 8, 60]) + return format_markdown_table( + ["Name", "Type", "Description"], rows, col_widths=[25, 8, 60] + ) diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py index ecab02b..7049233 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py +++ b/packages/python/openproblems/src/openproblems/project/docs/render_file_format.py @@ -1,8 +1,25 @@ from __future__ import annotations import re -ANNDATA_STRUCT_NAMES = ["X", "obs", "var", "obsm", "obsp", "varm", "varp", "layers", "uns"] -SPATIALDATA_ELEMENT_CATEGORIES = ["images", "labels", "points", "shapes", "tables", "coordinate_systems"] +ANNDATA_STRUCT_NAMES = [ + "X", + "obs", + "var", + "obsm", + "obsp", + "varm", + "varp", + "layers", + "uns", +] +SPATIALDATA_ELEMENT_CATEGORIES = [ + "images", + "labels", + "points", + "shapes", + "tables", + "coordinate_systems", +] def render_file_format(spec: dict | str) -> str: @@ -18,6 +35,7 @@ def render_file_format(spec: dict | str) -> str: """ if isinstance(spec, str): from .read_file_format import read_file_format + spec = read_file_format(spec) info = spec["info"] @@ -34,19 +52,21 @@ def render_file_format(spec: dict | str) -> str: if expected_format: format_example_lines = _render_format_example(spec) format_table_lines = _render_format_table(spec) - expected_format_str = "\n".join([ - "Format:", - "", - ":::{.small}", - *format_example_lines, - ":::", - "", - "Data structure:", - "", - ":::{.small}", - *format_table_lines, - ":::", - ]) + expected_format_str = "\n".join( + [ + "Format:", + "", + ":::{.small}", + *format_example_lines, + ":::", + "", + "Data structure:", + "", + ":::{.small}", + *format_table_lines, + ":::", + ] + ) parts = [ f"## File format: {label}", @@ -115,7 +135,9 @@ def _tag_str(row: dict) -> str: return f"(_{', '.join(tags)}_) " if tags else "" def _clean_desc(row: dict) -> str: - desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip(".") + desc = re.sub(r" *\n *", " ", (row.get("description") or "").strip()).rstrip( + "." + ) return desc if fmt_type in ("h5ad", "anndata_hdf5", "anndata_zarr"): @@ -127,7 +149,11 @@ def _clean_desc(row: dict) -> str: ] for row in expected_format ] - return [format_markdown_table(["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60])] + return [ + format_markdown_table( + ["Slot", "Type", "Description"], rows, col_widths=[25, 8, 60] + ) + ] if fmt_type in ("csv", "tsv", "parquet"): rows = [ @@ -138,7 +164,11 @@ def _clean_desc(row: dict) -> str: ] for row in expected_format ] - return [format_markdown_table(["Column", "Type", "Description"], rows, col_widths=[25, 8, 60])] + return [ + format_markdown_table( + ["Column", "Type", "Description"], rows, col_widths=[25, 8, 60] + ) + ] if fmt_type in ("json", "yaml"): rows = [ @@ -149,7 +179,11 @@ def _clean_desc(row: dict) -> str: ] for row in expected_format ] - return [format_markdown_table(["Key", "Type", "Description"], rows, col_widths=[25, 8, 60])] + return [ + format_markdown_table( + ["Key", "Type", "Description"], rows, col_widths=[25, 8, 60] + ) + ] if fmt_type == "spatialdata_zarr": lines = [] @@ -169,7 +203,11 @@ def _clean_desc(row: dict) -> str: [f'`{e["name"]}`', f"{_tag_str(e)}{_clean_desc(e)}."] for e in elements ] - lines.append(format_markdown_table(["Name", "Description"], elem_rows, col_widths=[25, 68])) + lines.append( + format_markdown_table( + ["Name", "Description"], elem_rows, col_widths=[25, 68] + ) + ) elif cat in ("points", "shapes"): for elem in elements: @@ -179,12 +217,18 @@ def _clean_desc(row: dict) -> str: [ f'`{c["name"]}`', f'`{c.get("type", "")}`', - f"{_tag_str(c)}{re.sub(r' *\n *', ' ', (c.get('description') or '').strip()).rstrip('.')}.", + f"{_tag_str(c)}{_clean_desc(c)}.", ] for c in (elem.get("columns") or []) ] if col_rows: - lines.append(format_markdown_table(["Column", "Type", "Description"], col_rows, col_widths=[25, 8, 60])) + lines.append( + format_markdown_table( + ["Column", "Type", "Description"], + col_rows, + col_widths=[25, 8, 60], + ) + ) elif cat == "tables": for elem in elements: @@ -194,12 +238,18 @@ def _clean_desc(row: dict) -> str: [ f'`{s["struct"]}["{s["name"]}"]`', f'`{s.get("type", "")}`', - f"{_tag_str(s)}{re.sub(r' *\n *', ' ', (s.get('description') or '').strip()).rstrip('.')}.", + f"{_tag_str(s)}{_clean_desc(s)}.", ] for s in (elem.get("anndata_slots") or []) ] if slot_rows: - lines.append(format_markdown_table(["Slot", "Type", "Description"], slot_rows, col_widths=[25, 8, 60])) + lines.append( + format_markdown_table( + ["Slot", "Type", "Description"], + slot_rows, + col_widths=[25, 8, 60], + ) + ) lines.append("") diff --git a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py index 4c94c3e..3c2601d 100644 --- a/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py +++ b/packages/python/openproblems/src/openproblems/project/docs/render_task_readme_qmd.py @@ -2,7 +2,9 @@ import re -def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = False) -> str: +def render_task_readme_qmd( + task_metadata: dict | str, add_instructions: bool = False +) -> str: """Render the ``README.qmd`` for a task. Args: @@ -16,6 +18,7 @@ def render_task_readme_qmd(task_metadata: dict | str, add_instructions: bool = F """ if isinstance(task_metadata, str): from .read_task_metadata import read_task_metadata + task_metadata = read_task_metadata(task_metadata) proj_conf = task_metadata["proj_conf"] @@ -79,7 +82,7 @@ def _render_authors(task_metadata: dict) -> str: # Collect columns: name, roles, then any info keys all_keys: list[str] = ["name", "roles"] for aut in authors: - for key in (aut.get("info") or {}): + for key in aut.get("info") or {}: if key not in all_keys: all_keys.append(key) @@ -113,10 +116,14 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str: node_order = {name: i for i, name in enumerate(order)} - sorted_nodes = sorted(G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order))) + sorted_nodes = sorted( + G.nodes(data=True), key=lambda x: node_order.get(x[0], len(order)) + ) node_lines = [] for node_id, attrs in sorted_nodes: - label = make_label(node_id, attrs.get("label", node_id), attrs.get("is_comp", False)) + label = make_label( + node_id, attrs.get("label", node_id), attrs.get("is_comp", False) + ) cid = clean_id(node_id) if attrs.get("is_comp", False): node_lines.append(f' {cid}[/"{label}"/]') @@ -125,7 +132,10 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str: sorted_edges = sorted( G.edges(data=True), - key=lambda e: (node_order.get(e[0], len(order)), node_order.get(e[1], len(order))), + key=lambda e: ( + node_order.get(e[0], len(order)), + node_order.get(e[1], len(order)), + ), ) edge_lines = [] for from_node, to_node, attrs in sorted_edges: @@ -137,13 +147,15 @@ def make_label(node_id: str, label: str, is_comp: bool) -> str: edge_type = "-->" if required else ".->" edge_lines.append(f" {clean_id(from_node)}{edge_type}{clean_id(to_node)}") - return "\n".join([ - "```mermaid", - "flowchart TB", - *node_lines, - *edge_lines, - "```", - ]) + return "\n".join( + [ + "```mermaid", + "flowchart TB", + *node_lines, + *edge_lines, + "```", + ] + ) def _render_task_parts(task_metadata: dict) -> list[str]: @@ -161,50 +173,52 @@ def _render_task_parts(task_metadata: dict) -> list[str]: def _render_instructions(task_metadata: dict) -> str: proj_name = task_metadata["proj_conf"].get("name", "") - return "\n".join([ - "### Installation", - "", - "You need to have Docker, Java, and Viash installed. Follow", - "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)", - "to install the required dependencies.", - "", - "### Add a method", - "", - "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.", - "", - "### Initial setup", - "", - "To get started, you can run the following commands:", - "", - "```bash", - f"git clone git@github.com:openproblems-bio/{proj_name}.git", - "", - f"cd {proj_name}", - "", - "# initialise submodule", - "scripts/init_submodule.sh", - "", - "# download resources", - "scripts/download_resources.sh", - "```", - "", - "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:", - "", - "```bash", - "viash ns build --parallel --setup cachedbuild", - "", - "scripts/run_benchmark.sh", - "```", - "", - "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:", - "", - "```bash", - "viash ns test --parallel", - "```", - "", - "Optionally, you can provide the `--query` argument to test only a subset of components:", - "", - "```bash", - "viash ns test --parallel --query 'component_name'", - "```", - ]) + return "\n".join( + [ + "### Installation", + "", + "You need to have Docker, Java, and Viash installed. Follow", + "[these instructions](https://openproblems.bio/documentation/fundamentals/requirements)", + "to install the required dependencies.", + "", + "### Add a method", + "", + "To add a method to the repository, follow the instructions in the `scripts/add_a_method.sh` script.", + "", + "### Initial setup", + "", + "To get started, you can run the following commands:", + "", + "```bash", + f"git clone git@github.com:openproblems-bio/{proj_name}.git", + "", + f"cd {proj_name}", + "", + "# initialise submodule", + "scripts/init_submodule.sh", + "", + "# download resources", + "scripts/download_resources.sh", + "```", + "", + "To run the benchmark, you first need to build the components. Afterwards, you can run the benchmark:", + "", + "```bash", + "viash ns build --parallel --setup cachedbuild", + "", + "scripts/run_benchmark.sh", + "```", + "", + "After adding a component, it is recommended to run the tests to ensure that the component is working correctly:", + "", + "```bash", + "viash ns test --parallel", + "```", + "", + "Optionally, you can provide the `--query` argument to test only a subset of components:", + "", + "```bash", + "viash ns test --parallel --query 'component_name'", + "```", + ] + ) diff --git a/packages/python/openproblems/src/openproblems/project/find_project_root.py b/packages/python/openproblems/src/openproblems/project/find_project_root.py index 700952b..ac29ef9 100644 --- a/packages/python/openproblems/src/openproblems/project/find_project_root.py +++ b/packages/python/openproblems/src/openproblems/project/find_project_root.py @@ -1,5 +1,6 @@ from __future__ import annotations + def find_project_root(path: str = ".") -> str | None: """ Find the root of a Viash project @@ -15,8 +16,8 @@ def find_project_root(path: str = ".") -> str | None: """ import os - - path = os.path.abspath(path) + + path = os.path.abspath(path) while path != "/" and not os.path.exists(os.path.join(path, "_viash.yaml")): path = os.path.dirname(path) @@ -24,4 +25,4 @@ def find_project_root(path: str = ".") -> str | None: if path == "/": return None - return path \ No newline at end of file + return path diff --git a/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py b/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py index b750861..f3f13fd 100644 --- a/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py +++ b/packages/python/openproblems/src/openproblems/project/read_nested_yaml.py @@ -1,5 +1,6 @@ from __future__ import annotations + def read_nested_yaml(path: str, project_path: str | None = None) -> dict: """ Read a nested YAML @@ -29,10 +30,13 @@ def read_nested_yaml(path: str, project_path: str | None = None) -> dict: data = yaml.safe_load(f) except Exception as e: raise ValueError(f"Could not read {path}. Error: {e}") - + return process_nested_yaml(data, data, path, project_path) -def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str) -> dict: + +def process_nested_yaml( + data: any, root_data: dict, path: str, project_path: str +) -> dict: """ Process the merge keys in a YAML @@ -53,11 +57,18 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str from ..utils.deep_merge import deep_merge if isinstance(data, dict): - processed_data = {k: process_nested_yaml(v, root_data, path, project_path) for k, v in data.items()} + processed_data = { + k: process_nested_yaml(v, root_data, path, project_path) + for k, v in data.items() + } new_data = {} - if "__merge__" in processed_data and not isinstance(processed_data["__merge__"], dict): - new_data_path = resolve_path(processed_data["__merge__"], project_path, os.path.dirname(path)) + if "__merge__" in processed_data and not isinstance( + processed_data["__merge__"], dict + ): + new_data_path = resolve_path( + processed_data["__merge__"], project_path, os.path.dirname(path) + ) new_data = read_nested_yaml(new_data_path, project_path) elif "$ref" in processed_data and not isinstance(processed_data["$ref"], dict): ref_parts = processed_data["$ref"].split("#") @@ -65,7 +76,9 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str if ref_parts[0] == "": x = root_data else: - new_data_path = resolve_path(ref_parts[0], project_path, os.path.dirname(path)) + new_data_path = resolve_path( + ref_parts[0], project_path, os.path.dirname(path) + ) new_data_path = os.path.normpath(new_data_path) try: @@ -73,7 +86,7 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str x = yaml.safe_load(f) except Exception as e: raise ValueError(f"Could not read {new_data_path}. Error: {e}") - + x_root = x ref_path_parts = ref_parts[1].split("/") @@ -83,8 +96,10 @@ def process_nested_yaml(data: any, root_data: dict, path: str, project_path: str elif part in x: x = x[part] else: - raise ValueError(f"Could not find {processed_data['$ref']} in {path}") - + raise ValueError( + f"Could not find {processed_data['$ref']} in {path}" + ) + if ref_parts[0] == "": new_data = x else: diff --git a/packages/python/openproblems/src/openproblems/project/read_viash_config.py b/packages/python/openproblems/src/openproblems/project/read_viash_config.py index 81c2e23..8e7ae86 100644 --- a/packages/python/openproblems/src/openproblems/project/read_viash_config.py +++ b/packages/python/openproblems/src/openproblems/project/read_viash_config.py @@ -2,6 +2,7 @@ from .read_nested_yaml import read_nested_yaml from .find_project_root import find_project_root + def read_viash_config(target_config_path, project_root_dir=None): """ Process a Viash config file. @@ -41,7 +42,9 @@ def read_viash_config(target_config_path, project_root_dir=None): # Fix 'build_info' if present if config["build_info"]: rel_config_dir = os.path.dirname(rel_target_config_path) - abs_build_dir = config["build_info"]["output"].replace(f"/{rel_config_dir}", "") + abs_build_dir = config["build_info"]["output"].replace( + f"/{rel_config_dir}", "" + ) # Get platform types (assuming 'platforms' is a list of dicts) platform_types = [platform["type"] for platform in config["platforms"]] @@ -66,11 +69,7 @@ def read_viash_config(target_config_path, project_root_dir=None): ) config["build_info"]["runner"] = "executable" config["build_info"]["engine"] = "|".join( - [ - pt - for pt in platform_types - if pt in ["docker", "native"] - ] + [pt for pt in platform_types if pt in ["docker", "native"]] ) # Move 'functionality' to top level @@ -87,7 +86,7 @@ def read_viash_config(target_config_path, project_root_dir=None): for i, grp in enumerate(config["argument_groups"]) if grp["name"] == "Arguments" ), - None + None, ) if existing_ix is not None: diff --git a/packages/python/openproblems/src/openproblems/project/resolve_path.py b/packages/python/openproblems/src/openproblems/project/resolve_path.py index 7e2fb50..20b1e12 100644 --- a/packages/python/openproblems/src/openproblems/project/resolve_path.py +++ b/packages/python/openproblems/src/openproblems/project/resolve_path.py @@ -14,9 +14,9 @@ def resolve_path(path: str, project_path: str, parent_path: str) -> str: Returns: str: The resolved path - + Example: - + ```python project_path <- "/path/to/project" parent_path <- "/path/to/project/subdir" @@ -28,9 +28,9 @@ def resolve_path(path: str, project_path: str, parent_path: str) -> str: # "/path/to/project/file.yaml" ``` """ - + import os - + if path.startswith("/"): return os.path.join(project_path, path) else: diff --git a/packages/python/openproblems/src/openproblems/utils/__init__.py b/packages/python/openproblems/src/openproblems/utils/__init__.py index e7d0600..7042aa3 100644 --- a/packages/python/openproblems/src/openproblems/utils/__init__.py +++ b/packages/python/openproblems/src/openproblems/utils/__init__.py @@ -1,7 +1,4 @@ from .strip_margin import strip_margin from .deep_merge import deep_merge -__all__ = [ - "strip_margin", - "deep_merge" -] +__all__ = ["strip_margin", "deep_merge"] diff --git a/packages/python/openproblems/src/openproblems/utils/strip_margin.py b/packages/python/openproblems/src/openproblems/utils/strip_margin.py index 524fa7f..37a96e7 100644 --- a/packages/python/openproblems/src/openproblems/utils/strip_margin.py +++ b/packages/python/openproblems/src/openproblems/utils/strip_margin.py @@ -1,23 +1,24 @@ def strip_margin(text: str, symbol: str = "\\|") -> str: - """ - Strip margin from a string + """ + Strip margin from a string - Args: - text (str): A character vector. - symbol (str): The margin symbol to strip. - - Returns: - str: A character vector with the margin stripped. + Args: + text (str): A character vector. + symbol (str): The margin symbol to strip. - Example: + Returns: + str: A character vector with the margin stripped. - ```python - strip_margin(" - |hello_world: - | this_is: "a yaml" - |") - ``` - """ + Example: - import re - return re.sub("(^|\n)[ \t]*" + symbol, "\\1", text) + ```python + strip_margin(" + |hello_world: + | this_is: "a yaml" + |") + ``` + """ + + import re + + return re.sub("(^|\n)[ \t]*" + symbol, "\\1", text) diff --git a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py index 2ad4fc8..c1ea848 100644 --- a/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py +++ b/packages/python/openproblems/tests/test_docs_render_task_readme_qmd.py @@ -1,20 +1,30 @@ import os import pytest -EXAMPLE_PROJECT = os.path.normpath(os.path.join( - os.path.dirname(__file__), - "data/example_project", -)) +EXAMPLE_PROJECT = os.path.normpath( + os.path.join( + os.path.dirname(__file__), + "data/example_project", + ) +) @pytest.fixture(scope="module") def task_metadata(): from openproblems.project.docs import read_task_metadata + return read_task_metadata(EXAMPLE_PROJECT) def test_read_task_metadata_keys(task_metadata): - for key in ("proj_path", "proj_conf", "files", "comps", "task_graph", "task_graph_order"): + for key in ( + "proj_path", + "proj_conf", + "files", + "comps", + "task_graph", + "task_graph_order", + ): assert key in task_metadata @@ -36,6 +46,7 @@ def test_read_task_metadata_graph_edges(task_metadata): def test_render_task_readme_qmd_structure(task_metadata): from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) assert '---\ntitle: "Template"\nformat: gfm\n---' in result @@ -49,6 +60,7 @@ def test_render_task_readme_qmd_structure(task_metadata): def test_render_task_readme_qmd_components(task_metadata): from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) assert "## Component type: Method" in result @@ -57,6 +69,7 @@ def test_render_task_readme_qmd_components(task_metadata): def test_render_task_readme_qmd_file_formats(task_metadata): from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(task_metadata) assert "## File format: Training data" in result @@ -65,6 +78,7 @@ def test_render_task_readme_qmd_file_formats(task_metadata): def test_render_task_readme_qmd_instructions(task_metadata): from openproblems.project import render_task_readme_qmd + without = render_task_readme_qmd(task_metadata, add_instructions=False) with_inst = render_task_readme_qmd(task_metadata, add_instructions=True) @@ -74,5 +88,6 @@ def test_render_task_readme_qmd_instructions(task_metadata): def test_render_task_readme_qmd_from_path(): from openproblems.project import render_task_readme_qmd + result = render_task_readme_qmd(EXAMPLE_PROJECT) assert "## API" in result diff --git a/packages/python/openproblems/tests/test_project_find_project_root.py b/packages/python/openproblems/tests/test_project_find_project_root.py index 2fdd88f..5841727 100644 --- a/packages/python/openproblems/tests/test_project_find_project_root.py +++ b/packages/python/openproblems/tests/test_project_find_project_root.py @@ -1,22 +1,23 @@ import os from openproblems.project import find_project_root + def test_find_project_root(tmpdir): # Create project directory and subdirectories - proj_dir = os.path.join(tmpdir, 'project') + proj_dir = os.path.join(tmpdir, "project") os.makedirs(proj_dir, exist_ok=True) - src_dir = os.path.join(proj_dir, 'src') + src_dir = os.path.join(proj_dir, "src") os.makedirs(src_dir, exist_ok=True) # Create files - proj_config = os.path.join(proj_dir, '_viash.yaml') - open(proj_config, 'w').close() + proj_config = os.path.join(proj_dir, "_viash.yaml") + open(proj_config, "w").close() - comp_config = os.path.join(src_dir, 'config.vsh.yaml') - open(comp_config, 'w').close() + comp_config = os.path.join(src_dir, "config.vsh.yaml") + open(comp_config, "w").close() - comp_script = os.path.join(src_dir, 'script.R') - open(comp_script, 'w').close() + comp_script = os.path.join(src_dir, "script.R") + open(comp_script, "w").close() # Perform assertions assert find_project_root(comp_script) == proj_dir From ff10ba13cabae2d0e563f6806925f120c0493f40 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 20 Apr 2026 14:47:30 +0200 Subject: [PATCH 3/6] simplify functions --- .../openproblems/project/component_tests/check_config.py | 9 ++------- .../project/component_tests/run_and_check_output.py | 9 +++------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py index 181fcfa..6906b3c 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py @@ -105,20 +105,15 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None: check_references(references) -def run_check_config(meta: dict) -> None: +def run_check_config(config: dict) -> None: """Validate a viash component config. Checks namespace, info.type, component metadata, preferred_normalization, variants, and Nextflow runner labels. Args: - meta: Viash meta dict with at least a ``"config"`` key pointing to the - ``.config.vsh.yaml`` path. + config: Parsed viash config dict (from ``read_viash_config``). """ - import openproblems - - print("Load config data", flush=True) - config = openproblems.project.read_viash_config(meta["config"]) info = config.get("info", {}) comp_type = info.get("type") diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py index 2c96cb3..1a72635 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py @@ -265,16 +265,13 @@ def generate_cmd_args(argument_set: list) -> list: return cmd_args -def run_and_check_output(meta: dict) -> None: +def run_and_check_output(meta: dict, config: dict) -> None: """Run a viash component with test resources and validate its outputs. Args: - meta: Viash meta dict with keys ``"executable"``, ``"config"``, and - ``"resources_dir"``. + meta: Viash meta dict with keys ``"executable"`` and ``"resources_dir"``. + config: Parsed viash config dict (from ``read_viash_config``). """ - import openproblems - - config = openproblems.project.read_viash_config(meta["config"]) argument_sets = get_argument_sets(config, meta["resources_dir"]) for argset_name, argset_args in argument_sets.items(): From 0fcb2258ec0148030ea62c6d660db97fa3ed8f1b Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 21 Apr 2026 11:51:53 +0200 Subject: [PATCH 4/6] add python publish workflow --- .github/workflows/python-publish.yaml | 54 +++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .github/workflows/python-publish.yaml diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml new file mode 100644 index 0000000..e0e13e1 --- /dev/null +++ b/.github/workflows/python-publish.yaml @@ -0,0 +1,54 @@ +name: Publish Python packages to PyPI + +on: + release: + types: [published] + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # required for setuptools_scm to determine version from git tags + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install build + run: python -m pip install build + + - name: Build package + run: | + cd packages/python/openproblems + python -m build + + - name: Upload distribution artifacts + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: packages/python/openproblems/dist/ + + publish-to-pypi: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/openproblems + permissions: + id-token: write # required for OIDC trusted publishing + + steps: + - name: Download distribution artifacts + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From bce4261f72e1f9390c1b055538d6db4fa847fba6 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 21 Apr 2026 11:52:11 +0200 Subject: [PATCH 5/6] migrate more functions --- packages/python/openproblems/CHANGELOG.md | 24 +++++++++++++++++++ packages/python/openproblems/pyproject.toml | 20 ++++++++++++++-- .../project/component_tests/check_config.py | 16 ++++++------- .../component_tests/run_and_check_output.py | 8 +++---- 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/packages/python/openproblems/CHANGELOG.md b/packages/python/openproblems/CHANGELOG.md index 9ceafc5..a9bab40 100644 --- a/packages/python/openproblems/CHANGELOG.md +++ b/packages/python/openproblems/CHANGELOG.md @@ -1,3 +1,27 @@ +# openproblems core Python v0.2.0 + +## NEW FUNCTIONALITY + +* `project`: + - `resolve_path`: Resolve a path relative to a parent path or project root. + +* `project.component_tests`: + - `run_check_config` / `check_config`: Validate a component's Viash config (namespace, type, metadata, normalization, variants, Nextflow runner). + - `run_and_check_output`: Run a component executable and validate its output files against format specifications. + +* `project.docs`: + - `read_task_config`: Read a task-level configuration file. + - `read_task_metadata`: Read and assemble full task metadata by traversing the task's component graph. + - `read_component_spec`: Read a component API specification. + - `read_file_format`: Read a file format specification. + - `render_task_readme_qmd`: Render a Quarto README document for a task. + - `render_component_spec`: Render a component specification as a Markdown section. + - `render_file_format`: Render a file format specification as a Markdown section. + +## MINOR CHANGES + +* Improve diagnostic print messages in `check_config` and `run_and_check_output` to be more descriptive. + # openproblems core Python v0.1.1 ## NEW FUNCTIONALITY diff --git a/packages/python/openproblems/pyproject.toml b/packages/python/openproblems/pyproject.toml index e381e1a..a31f416 100644 --- a/packages/python/openproblems/pyproject.toml +++ b/packages/python/openproblems/pyproject.toml @@ -12,6 +12,20 @@ authors = [ license = { text = "MIT" } readme = "README.md" requires-python = ">= 3.9" +keywords = ["openproblems", "benchmarking", "bioinformatics", "viash"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] dependencies = [ 'PyYAML', 'networkx', @@ -23,8 +37,10 @@ test = [ ] [project.urls] -homepage = "https://openproblems.bio/documentation" -repository = "https://github.com/openproblems-bio/core" +Homepage = "https://openproblems.bio/documentation" +Repository = "https://github.com/openproblems-bio/core" +"Bug Tracker" = "https://github.com/openproblems-bio/core/issues" +Changelog = "https://github.com/openproblems-bio/core/blob/main/packages/python/openproblems/CHANGELOG.md" [tool.setuptools.packages.find] where = ["src"] diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py index 6906b3c..14d3cce 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/check_config.py @@ -101,7 +101,7 @@ def check_info(this_info: Dict, this_config: Dict, comp_type: str) -> None: if comp_type != "metric": references = this_config.get("references") or references if comp_type != "control_method" or references: - print("Check references fields", flush=True) + print("Check references fields (doi or bibtex)", flush=True) check_references(references) @@ -117,16 +117,16 @@ def run_check_config(config: dict) -> None: info = config.get("info", {}) comp_type = info.get("type") - print("Check .namespace", flush=True) + print("Check that .namespace is defined", flush=True) assert config.get("namespace"), ".namespace is not defined" - print("Check .info.type", flush=True) + print("Check that .info.type is 'method', 'control_method', or 'metric'", flush=True) expected_types = ["method", "control_method", "metric"] assert ( comp_type in expected_types - ), ".info.type should be equal to 'method' or 'control_method'" + ), f".info.type is '{comp_type}' but should be one of: {', '.join(expected_types)}" - print("Check component metadata", flush=True) + print("Check component metadata fields (name, label, summary, description)", flush=True) if comp_type == "metric": metric_infos = info.get("metrics", []) assert metric_infos, ".info.metrics is not defined" @@ -136,7 +136,7 @@ def run_check_config(config: dict) -> None: check_info(info, config, comp_type=comp_type) if "preferred_normalization" in info: - print("Checking contents of .info.preferred_normalization", flush=True) + print("Check that .info.preferred_normalization is a valid normalization method", flush=True) norm_methods = [ "log_cpm", "log_cp10k", @@ -153,7 +153,7 @@ def run_check_config(config: dict) -> None: ) if "variants" in info: - print("Checking contents of .info.variants", flush=True) + print("Check that .info.variants only references valid argument names", flush=True) arg_names = [arg["clean_name"] for arg in config["all_arguments"]] + [ "preferred_normalization" ] @@ -167,7 +167,7 @@ def run_check_config(config: dict) -> None: runners = config.get("runners", []) - print("Check Nextflow runner", flush=True) + print("Check that a Nextflow runner with time, mem, and cpu labels is defined", flush=True) nextflow_runner = next( (runner for runner in runners if runner["type"] == "nextflow"), None, diff --git a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py index 1a72635..75fdfe3 100644 --- a/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py +++ b/packages/python/openproblems/src/openproblems/project/component_tests/run_and_check_output.py @@ -7,7 +7,7 @@ def run_component(cmd: list) -> None: """Run a component executable and assert it exits successfully.""" import subprocess - print(">> Running script as test", flush=True) + print(">> Running the component executable", flush=True) out = subprocess.run(cmd) assert ( out.returncode == 0 @@ -18,7 +18,7 @@ def check_input_files(arguments: list) -> None: """Assert that all required input files exist.""" from os import path - print(">> Checking whether input files exist", flush=True) + print(">> Checking that all required input files exist", flush=True) for arg in arguments: if arg["type"] == "file" and arg["direction"] == "input" and arg["required"]: assert not arg["must_exist"] or path.exists( @@ -30,14 +30,14 @@ def check_output_files(arguments: list) -> None: """Assert that all required output files exist and match their format spec.""" from os import path - print(">> Checking whether output file exists", flush=True) + print(">> Checking that all required output files were created", flush=True) for arg in arguments: if arg["type"] == "file" and arg["direction"] == "output" and arg["required"]: assert not arg["must_exist"] or path.exists( arg["value"] ), f"Output file '{arg['value']}' does not exist" - print(">> Reading output files and checking formats", flush=True) + print(">> Validating the contents and format of output files", flush=True) for arg in arguments: if arg["type"] != "file" or arg["direction"] != "output": continue From 073936f3603017957c015083a6a0455b6ad1eefa Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 21 Apr 2026 11:52:46 +0200 Subject: [PATCH 6/6] add readme --- packages/python/openproblems/README.md | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 packages/python/openproblems/README.md diff --git a/packages/python/openproblems/README.md b/packages/python/openproblems/README.md new file mode 100644 index 0000000..508ee13 --- /dev/null +++ b/packages/python/openproblems/README.md @@ -0,0 +1,56 @@ +# openproblems + +[![PyPI](https://img.shields.io/pypi/v/openproblems)](https://pypi.org/project/openproblems/) +[![Python Versions](https://img.shields.io/pypi/pyversions/openproblems)](https://pypi.org/project/openproblems/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +Core Python helper functions for [OpenProblems](https://openproblems.bio) benchmarking tasks. + +## Installation + +```bash +pip install openproblems +``` + +## Modules + +### `openproblems.project` + +Utilities for working with Viash projects. + +- `find_project_root`: Find the root of a Viash project. +- `read_nested_yaml`: Read a nested YAML file. +- `read_viash_config`: Read a Viash configuration file. +- `resolve_path`: Resolve a path relative to a parent or project path. + +#### `openproblems.project.component_tests` + +Helpers for writing component tests. + +- `run_check_config` / `check_config`: Validate a component's Viash configuration. +- `run_and_check_output`: Run a component and validate its output files against format specs. + +#### `openproblems.project.docs` + +Utilities for generating task documentation. + +- `read_task_config`: Read a task-level configuration file. +- `read_task_metadata`: Read and assemble full task metadata. +- `read_component_spec`: Read a component API specification. +- `read_file_format`: Read a file format specification. +- `render_task_readme_qmd`: Render a Quarto README for a task. +- `render_component_spec`: Render a component specification as Markdown. +- `render_file_format`: Render a file format specification as Markdown. + +### `openproblems.utils` + +General-purpose utilities. + +- `strip_margin`: Strip leading margin characters from a multiline string. +- `deep_merge`: Recursively merge two dictionaries. + +## Links + +- **Documentation**: +- **Repository**: +- **Issue tracker**: