From ac69f9bf40eeb63c2ac458d9896a6a7d8fbd821d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 12:28:46 +0000 Subject: [PATCH 1/5] Wien/BadAussee: ET0 mm/d->mm/h fix + per-run thinning to cut RAM ET0 fix: both full workflows now divide the daily ET0 by its interval in hours (period_et = 24) before writing //Kurven/ET0, mirroring the rain conversion. The kernel reads the ET0 curve as a mm/h rate, so unconverted daily values were integrated 24x too high. RAM: run_one() now thins each run to its single optimisation row immediately (get_simulation_results_optim(lean = TRUE) + add_overflow_events_and_waterbalance) and returns it; run_scenarios() collects the one-row tibbles and the analyse chunk just binds them. This removes the get_simulation_results_optim_parallel() pass that loaded every run's full time series into memory at once. New 'lean' arg on get_simulation_results_optim() reads only the fields the optimisation summary needs (element rates + both water balances), nulling states/meta/ connected-area rates; its intro message is gated behind debug. --- NEWS.md | 21 ++++++++++++-- R/get_simulation_results_optim.R | 32 +++++++++++++------- vignettes/workflow_badaussee.Rmd | 50 +++++++++++++++++++++----------- vignettes/workflow_wien.Rmd | 50 +++++++++++++++++++++----------- 4 files changed, 105 insertions(+), 48 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3038ce8..d6aaace 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,15 +2,30 @@ ## Bug fixes -* `vignettes/example_wien_minimal.Rmd` now converts ET0 from mm/day to mm/h +* `vignettes/example_wien_minimal.Rmd`, `vignettes/workflow_wien.Rmd` and + `vignettes/workflow_badaussee.Rmd` now convert ET0 from mm/day to mm/h (`value / period_et`) before writing `//Kurven/ET0`, mirroring the existing rain conversion. The engine reads the ET0 curve as a mm/h rate, so the unconverted daily values were integrated 24× too high — the cause of the - implausibly large modelled ET share. The timeseries-info summary now labels - ET0 as mm/h and recovers its total via `value * period_h`. + implausibly large modelled ET share. The minimal vignette's timeseries-info + summary now labels ET0 as mm/h and recovers its total via `value * period_h`. ## New features +* The Wien and Bad Aussee workflows now thin each run to its optimisation row + **inside** `run_one()` (via `get_simulation_results_optim(..., lean = TRUE)` + + `add_overflow_events_and_waterbalance()`) and `run_scenarios()` returns + those one-row tibbles for a final `dplyr::bind_rows()`. This replaces the + previous "run everything, then read every run's full results into memory at + once" pass (`get_simulation_results_optim_parallel()`), drastically cutting + peak RAM for large parameter grids. + +* `get_simulation_results_optim()` gains a `lean` argument. When `TRUE` it + reads only the fields consumed downstream (`element$rates`, + `element$water_balance`, `connected_area$water_balance`) and leaves the + unused `meta`/`states` and `connected_area$rates` as `NULL`, minimising + per-run memory and I/O. Its intro message is now gated behind `debug`. + * `inst/scripts/prepare_eisenstadt_swmm_timeseries.R` extracts the rain (`/Kurven/Regen`) and ET0 (`/Kurven/ET0`) curves from an engine HDF5 and writes SWMM-5 external time-series files. It converts **out** of the diff --git a/R/get_simulation_results_optim.R b/R/get_simulation_results_optim.R index 0540e13..e4d48b0 100644 --- a/R/get_simulation_results_optim.R +++ b/R/get_simulation_results_optim.R @@ -26,6 +26,13 @@ #' @param simulation_names Character vector of simulation run identifiers #' (e.g. \code{c("s00001", "s00002")}). #' @param debug print debug messages (default: TRUE) +#' @param lean Logical. If \code{TRUE}, read only the fields consumed by +#' \code{\link{add_overflow_events_and_waterbalance}} -- \code{element$rates}, +#' \code{element$water_balance} and \code{connected_area$water_balance} -- and +#' leave \code{meta}/\code{states} (both sides) and \code{connected_area$rates} +#' as \code{NULL}. This keeps per-run memory and I/O minimal when each run is +#' thinned to its optimisation row immediately instead of collecting every +#' run's full results first. Defaults to \code{FALSE} (read everything). #' @return A named list with one entry per \code{simulation_names}. Each entry is #' either \code{NULL} (element HDF5 missing) or a nested list: #' \describe{ @@ -53,13 +60,16 @@ #' @importFrom stats setNames #' @importFrom hdf5r H5File get_simulation_results_optim <- function(paths, - path_list, + path_list, simulation_names, - debug = TRUE) { - - message(sprintf("Reading results files ('%s') for %d model runs", - paste0(c(paths$file_results_hdf5_element, paths$file_results_hdf5_flaeche), collapse = "|"), - length(simulation_names))) + debug = TRUE, + lean = FALSE) { + + if (isTRUE(debug)) { + message(sprintf("Reading results files ('%s') for %d model runs", + paste0(c(paths$file_results_hdf5_element, paths$file_results_hdf5_flaeche), collapse = "|"), + length(simulation_names))) + } stats::setNames(lapply(simulation_names, function(s_name) { paths <- kwb.utils::resolve(path_list, dir_target = s_name) @@ -109,20 +119,20 @@ get_simulation_results_optim <- function(paths, paths$dir_target_output), expr = { element <- list( - meta = kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Metainfo"]], + meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Metainfo"]], numeric_only = FALSE), rates = kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Raten"]]), water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Wasserbilanz"]]), - states = kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Zustandsvariablen"]]) + states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Zustandsvariablen"]]) ) connected_area <- if (!is.null(res_hdf5_flaeche)) { list( - meta = kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Metainfo"]], + meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Metainfo"]], numeric_only = FALSE), - rates = kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Raten"]]), + rates = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Raten"]]), water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Wasserbilanz"]]), - states = kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Zustandsvariablen"]]) + states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Zustandsvariablen"]]) ) } else { NULL diff --git a/vignettes/workflow_badaussee.Rmd b/vignettes/workflow_badaussee.Rmd index 69b87ef..d1ead79 100644 --- a/vignettes/workflow_badaussee.Rmd +++ b/vignettes/workflow_badaussee.Rmd @@ -260,6 +260,12 @@ message(txt) period <- c(diff(timeseries_rain$time), mean(diff(timeseries_rain$time))) timeseries_rain$value <- timeseries_rain$value / period +### Convert ET0 from mm/day to mm/h (the engine reads //Kurven/ET0 as a mm/h +### rate, exactly like rain; daily values must be divided by their interval in +### hours = 24, otherwise ET0 is integrated 24x too high) +period_et <- c(diff(timeseries_et$time), mean(diff(timeseries_et$time))) +timeseries_et$value <- timeseries_et$value / period_et + #openxlsx::write.xlsx(list(regen = timeseries_rain, et = timeseries_et), "timeseries.xlsx") @@ -332,13 +338,33 @@ run_one <- function(i, path_input = paths$path_target_input, debug = debug) - invisible(NULL) + # Thin immediately: read only this run's results (lean = water balance + + # overflow rates, no states/meta/connected-area rates) and reduce to the + # single optimisation row. This way we never hold all scenarios' full + # time series in memory at once; the full result HDF5 stays on disk for + # ad-hoc inspection. + sim_one <- kwb.raindrop::get_simulation_results_optim( + paths = paths, + path_list = path_list, + simulation_names = param_grid_tmp$scenario_name, + debug = debug, + lean = TRUE + ) + + kwb.raindrop::add_overflow_events_and_waterbalance( + simulation_results = sim_one, + event_separation_hours = 4, + canonical_variables = kwb.raindrop::default_canonical_wb_variables() + ) } n_cores <- parallel::detectCores() +# run_one() now returns the thinned per-run optimisation row, so run_scenarios() +# yields a list of one-row tibbles we simply bind below. +scenario_rows <- NULL system.time(expr = { -kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), +scenario_rows <- kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), run_one_scenario = run_one, timestep_hours = 0.1, debug = FALSE, @@ -368,21 +394,11 @@ x$Fehlerbeschreibung ### Analyse Results ```{r analyse_results, eval = data_available && is_windows && !is_ghactions} -system.time( -simulation_results <- kwb.raindrop::get_simulation_results_optim_parallel( - paths = paths, - path_list = path_list, - simulation_names = param_grid$scenario_name, - debug = FALSE) -) - -system.time( -simulation_results_optimisation <- kwb.raindrop::add_overflow_events_and_waterbalance( - simulation_results = simulation_results, - event_separation_hours = 4, - canonical_variables = kwb.raindrop::default_canonical_wb_variables() - ) -) +# Each run was already thinned to its optimisation row inside run_one(), so we +# just bind the per-run rows here instead of re-reading every run's full +# results into memory. (The previous get_simulation_results_optim_parallel() + +# add_overflow_events_and_waterbalance() pass loaded all runs at once.) +simulation_results_optimisation <- dplyr::bind_rows(scenario_rows) simulation_results_optimisation <- param_grid %>% dplyr::left_join(simulation_results_optimisation, diff --git a/vignettes/workflow_wien.Rmd b/vignettes/workflow_wien.Rmd index e9971ef..426da47 100644 --- a/vignettes/workflow_wien.Rmd +++ b/vignettes/workflow_wien.Rmd @@ -260,6 +260,12 @@ message(txt) period <- c(diff(timeseries_rain$time), mean(diff(timeseries_rain$time))) timeseries_rain$value <- timeseries_rain$value / period +### Convert ET0 from mm/day to mm/h (the engine reads //Kurven/ET0 as a mm/h +### rate, exactly like rain; daily values must be divided by their interval in +### hours = 24, otherwise ET0 is integrated 24x too high) +period_et <- c(diff(timeseries_et$time), mean(diff(timeseries_et$time))) +timeseries_et$value <- timeseries_et$value / period_et + #openxlsx::write.xlsx(list(regen = timeseries_rain, et = timeseries_et), "timeseries.xlsx") ``` @@ -329,13 +335,33 @@ run_one <- function(i, path_input = paths$path_target_input, debug = debug) - invisible(NULL) + # Thin immediately: read only this run's results (lean = water balance + + # overflow rates, no states/meta/connected-area rates) and reduce to the + # single optimisation row. This way we never hold all scenarios' full + # time series in memory at once; the full result HDF5 stays on disk for + # ad-hoc inspection. + sim_one <- kwb.raindrop::get_simulation_results_optim( + paths = paths, + path_list = path_list, + simulation_names = param_grid_tmp$scenario_name, + debug = debug, + lean = TRUE + ) + + kwb.raindrop::add_overflow_events_and_waterbalance( + simulation_results = sim_one, + event_separation_hours = 4, + canonical_variables = kwb.raindrop::default_canonical_wb_variables() + ) } n_cores <- parallel::detectCores() +# run_one() now returns the thinned per-run optimisation row, so run_scenarios() +# yields a list of one-row tibbles we simply bind below. +scenario_rows <- NULL system.time(expr = { -kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), +scenario_rows <- kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), run_one_scenario = run_one, timestep_hours = 0.1, debug = FALSE, @@ -364,21 +390,11 @@ x$Fehlerbeschreibung ### Analyse Results ```{r analyse_results, eval = data_available && is_windows && !is_ghactions} -system.time( -simulation_results <- kwb.raindrop::get_simulation_results_optim_parallel( - paths = paths, - path_list = path_list, - simulation_names = param_grid$scenario_name, - debug = FALSE) -) - -system.time( -simulation_results_optimisation <- kwb.raindrop::add_overflow_events_and_waterbalance( - simulation_results = simulation_results, - event_separation_hours = 4, - canonical_variables = kwb.raindrop::default_canonical_wb_variables() - ) -) +# Each run was already thinned to its optimisation row inside run_one(), so we +# just bind the per-run rows here instead of re-reading every run's full +# results into memory. (The previous get_simulation_results_optim_parallel() + +# add_overflow_events_and_waterbalance() pass loaded all runs at once.) +simulation_results_optimisation <- dplyr::bind_rows(scenario_rows) simulation_results_optimisation <- param_grid %>% dplyr::left_join(simulation_results_optimisation, From 5ddd90e8291369cdde7f4f2c3c6bcc858677d880 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 12:35:01 +0000 Subject: [PATCH 2/5] Eisenstadt 2005 workflows: per-run thinning to cut RAM Apply the same memory fix as Wien/BadAussee: run_one() now thins each run to its optimisation row immediately (get_simulation_results_optim(lean = TRUE) + add_overflow_events_and_waterbalance) and returns it; run_scenarios() collects the rows and the analyse chunk binds them, replacing the get_simulation_results_optim_parallel() pass that held every run's full time series at once. No ET0 unit change here: the Eisenstadt workflows use the base.h5 constant ET0 placeholder (0.2 mm/h) and the template rain (already mm/h), so there is no mm/d daily series to convert. --- vignettes/workflow_eisenstadt-2005.Rmd | 44 +++++++++++++--------- vignettes/workflow_eisenstadt-2005_neu.Rmd | 44 +++++++++++++--------- 2 files changed, 54 insertions(+), 34 deletions(-) diff --git a/vignettes/workflow_eisenstadt-2005.Rmd b/vignettes/workflow_eisenstadt-2005.Rmd index 818fc01..474da95 100644 --- a/vignettes/workflow_eisenstadt-2005.Rmd +++ b/vignettes/workflow_eisenstadt-2005.Rmd @@ -228,13 +228,33 @@ if (is.data.frame(vals[["//Kurven/Regen"]])) { path_input = paths$path_target_input, debug = debug) - invisible(NULL) + # Thin immediately: read only this run's results (lean = water balance + + # overflow rates, no states/meta/connected-area rates) and reduce to the + # single optimisation row. This way we never hold all scenarios' full + # time series in memory at once; the full result HDF5 stays on disk for + # ad-hoc inspection. + sim_one <- kwb.raindrop::get_simulation_results_optim( + paths = paths, + path_list = path_list, + simulation_names = param_grid_tmp$scenario_name, + debug = debug, + lean = TRUE + ) + + kwb.raindrop::add_overflow_events_and_waterbalance( + simulation_results = sim_one, + event_separation_hours = 4, + canonical_variables = kwb.raindrop::default_canonical_wb_variables() + ) } n_cores <- parallel::detectCores() +# run_one() now returns the thinned per-run optimisation row, so run_scenarios() +# yields a list of one-row tibbles we simply bind below. +scenario_rows <- NULL system.time(expr = { -kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), +scenario_rows <- kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), run_one_scenario = run_one, timestep_hours = 0.1, debug = FALSE, @@ -264,21 +284,11 @@ x$Fehlerbeschreibung ### Analyse Results ```{r analyse_results, eval = data_available && is_windows && !is_ghactions} -system.time( -simulation_results <- kwb.raindrop::get_simulation_results_optim_parallel( - paths = paths, - path_list = path_list, - simulation_names = param_grid$scenario_name, - debug = FALSE) -) - -system.time( -simulation_results_optimisation <- kwb.raindrop::add_overflow_events_and_waterbalance( - simulation_results = simulation_results, - event_separation_hours = 4, - canonical_variables = kwb.raindrop::default_canonical_wb_variables() - ) -) +# Each run was already thinned to its optimisation row inside run_one(), so we +# just bind the per-run rows here instead of re-reading every run's full +# results into memory. (The previous get_simulation_results_optim_parallel() + +# add_overflow_events_and_waterbalance() pass loaded all runs at once.) +simulation_results_optimisation <- dplyr::bind_rows(scenario_rows) simulation_results_optimisation <- param_grid %>% dplyr::left_join(simulation_results_optimisation, diff --git a/vignettes/workflow_eisenstadt-2005_neu.Rmd b/vignettes/workflow_eisenstadt-2005_neu.Rmd index 5c74100..c95f8a8 100644 --- a/vignettes/workflow_eisenstadt-2005_neu.Rmd +++ b/vignettes/workflow_eisenstadt-2005_neu.Rmd @@ -228,13 +228,33 @@ run_one <- function(i, path_input = paths$path_target_input, debug = debug) - invisible(NULL) + # Thin immediately: read only this run's results (lean = water balance + + # overflow rates, no states/meta/connected-area rates) and reduce to the + # single optimisation row. This way we never hold all scenarios' full + # time series in memory at once; the full result HDF5 stays on disk for + # ad-hoc inspection. + sim_one <- kwb.raindrop::get_simulation_results_optim( + paths = paths, + path_list = path_list, + simulation_names = param_grid_tmp$scenario_name, + debug = debug, + lean = TRUE + ) + + kwb.raindrop::add_overflow_events_and_waterbalance( + simulation_results = sim_one, + event_separation_hours = 4, + canonical_variables = kwb.raindrop::default_canonical_wb_variables() + ) } n_cores <- parallel::detectCores() +# run_one() now returns the thinned per-run optimisation row, so run_scenarios() +# yields a list of one-row tibbles we simply bind below. +scenario_rows <- NULL system.time(expr = { -kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), +scenario_rows <- kwb.raindrop::run_scenarios(indices = seq_len(nrow(param_grid)), run_one_scenario = run_one, timestep_hours = 0.1, debug = FALSE, @@ -266,21 +286,11 @@ x$Fehlerbeschreibung ### Analyse Results ```{r analyse_results, eval = data_available && is_windows && !is_ghactions} -system.time( -simulation_results <- kwb.raindrop::get_simulation_results_optim_parallel( - paths = paths, - path_list = path_list, - simulation_names = param_grid$scenario_name, - debug = FALSE) -) - -system.time( -simulation_results_optimisation <- kwb.raindrop::add_overflow_events_and_waterbalance( - simulation_results = simulation_results, - event_separation_hours = 4, - canonical_variables = kwb.raindrop::default_canonical_wb_variables() - ) -) +# Each run was already thinned to its optimisation row inside run_one(), so we +# just bind the per-run rows here instead of re-reading every run's full +# results into memory. (The previous get_simulation_results_optim_parallel() + +# add_overflow_events_and_waterbalance() pass loaded all runs at once.) +simulation_results_optimisation <- dplyr::bind_rows(scenario_rows) simulation_results_optimisation <- param_grid %>% dplyr::left_join(simulation_results_optimisation, From 971dcdd75162201e89218b0b8dd3a66da3d0388d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 12:35:01 +0000 Subject: [PATCH 3/5] Add results landing page vignettes/index.Rmd (excluded from R CMD check) Summary page linking the per-site brute-force outputs (workflow html, result tables, CSVs, interactive plots). Added to .Rbuildignore so R CMD check does not build/execute it; render it manually into the assembled results directory. Result-file links match each workflow's paths$modelname (Wien, BadAussee, Eisenstadt_2005). --- .Rbuildignore | 1 + vignettes/index.Rmd | 150 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 vignettes/index.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 906e453..5ae8bc9 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ ^codecov\.yml$ ^index\.md$ ^README\.md$ +^vignettes/index\.Rmd$ diff --git a/vignettes/index.Rmd b/vignettes/index.Rmd new file mode 100644 index 0000000..5439b48 --- /dev/null +++ b/vignettes/index.Rmd @@ -0,0 +1,150 @@ +--- +title: "RainDrop Optimierung – Brute Force" +author: "Michael Rustler" +date: "2026-02-25" +output: + html_document: + toc: true + toc_depth: 3 + number_sections: true +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE) + +sites <- c("Eisenstadt_2005", "Wien", "BadAussee") + +# index.html liegt im gleichen Verzeichnis wie der Ordner "brute-force" +base_dir <- "." + +design_spaces <- paste0( + "mulde-area_vs_", + c("filter_hydraulicconductivity", "mulde_height", "storage_height") +) + +rel <- function(...) file.path(..., fsep = "/") + +md_link_line <- function(label, href) sprintf("- [%s](%s)", label, href) + +md_list <- function(lines) knitr::asis_output(paste(lines, collapse = "\n")) +``` + +# Hintergrund + +xxx + +# Methodik + +Die Modellierung erfolgte in R mit dem R Paket [kwb.raindrop](https://github.com/kwb-r/kwb.raindrop). +Das genaue Vorgehen ist für jede Fallstudie im folgenden im R Markdown reproduzierbar +dokumentiert. + +```{r brute_force_rmarkdown, echo = FALSE, results='asis'} +for (site in sites) { + cat(sprintf( + "- [%s](%s/workflow_%s.html)\n", + site, + base_dir, + site + )) +} +``` + +# Ergebnisse + +Die Ergebnisse für die einjährige Berechnung (Eisenstadt für Jahr 2005) und die +beiden 15 jährigen Zeitreihen (2011-2025) für Wien und Bad Aussee finden sich in +unten stehenden Links: + +## Tabellen + +```{r brute_force_tabelle, echo = FALSE, results='asis'} +for (site in sites) { + cat(sprintf( + "- [%s](%s/simulation_results_optimisation_%s.html)\n", + site, + base_dir, + site + )) +} + +``` + +## CSV + +Die in den [obenstehenden Tabellen](#tabellen) dargestellten Ergebnisse können auch als `.csv` Datei +heruntergeladen werden. + +```{r brute_force_csv, echo = FALSE, results='asis'} +for (site in sites) { + cat(sprintf( + "- [%s](%s/simulation_results_optimisation_%s.csv)\n", + site, + base_dir, + site + )) +} +``` + +## Interaktive Visualisierungen + +### Sensitive Modellparameter + +```{r brute_force_plots_main, echo = FALSE, results='asis'} +for (site in sites) { + cat(sprintf( + "- [%s](%s/simulation_results_optimisation_%s_main-effects.html)\n", + site, + base_dir, + site + )) +} +``` + +### Design Spaces + +In den nachfolgende Abbildungen wird die **Muldenfläche** (x-Achse) mit +**einem weiteren Parameter** (y-Achse) dargestellt. Diese sind im folgenden: + +- ***Muldenhöhe*** + +- ***Speicherhöhe*** + +- ***hydraulische Leitfähigkeit*** des Bodenfilters + +```{r brute_force_plots_design-spaches, echo = FALSE, results='asis'} +cat("| Design Space |", paste(sites, collapse = " | "), "|\n") +cat("|---|", paste(rep("---", length(sites)), collapse = "|"), "|\n") + +for (ds in design_spaces) { + + ds_label <- sub("^mulde-area_vs_", "", ds) + + row_links <- sapply(sites, function(site) { + sprintf( + "[%s](%s/simulation_results_optimisation_%s_design-space_%s.html)", + ds_label, + base_dir, + site, + ds + ) + }) + + cat("|", ds_label, "|", paste(row_links, collapse = " | "), "|\n") +} +``` + +### Wasserbilanz + +```{r brute_force_plots_water-balance, echo = FALSE, results='asis'} +for (site in sites) { + cat(sprintf( + "- [%s](%s/simulation_results_optimisation_%s_water-balance.html)\n", + site, + base_dir, + site + )) +} +``` + + From 5302a4dd3ada087f196d0fe65f6c78b6ae4ce677 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 13:31:09 +0000 Subject: [PATCH 4/5] Tolerate unreadable result HDF5 in get_simulation_results_optim() Since the workflows now thin per run inside run_one(), reading happens inside the future_lapply batch. A result file that exists but cannot be opened/read (engine crashed mid-write for a scenario, or a transient lock) previously threw H5File.open() 'unable to open file' and aborted the whole render at the run_model chunk. Wrap the open+read in an inner function (own on.exit for handle cleanup) plus tryCatch: such a file is now treated like a missing one -- warn, name the scenario, return NULL -- so add_overflow_events_and_waterbalance() emits an NA row and the batch completes. --- NEWS.md | 9 +++ R/get_simulation_results_optim.R | 102 ++++++++++++++++++------------- 2 files changed, 67 insertions(+), 44 deletions(-) diff --git a/NEWS.md b/NEWS.md index d6aaace..d997a05 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,15 @@ ## Bug fixes +* `get_simulation_results_optim()` now treats a result HDF5 that exists but + cannot be opened/read (e.g. the engine crashed mid-write for a scenario, or a + file briefly locked just after the run) like a missing file: it `warning()`s, + names the scenario, and returns `NULL` instead of throwing. Because the Wien / + Bad Aussee / Eisenstadt workflows now read results per run inside `run_one()`, + a single unreadable file used to abort the entire `future_lapply` batch (seen + as an `H5File.open()` "unable to open file" error mid-render); the run now + completes with NA rows for the affected scenarios. + * `vignettes/example_wien_minimal.Rmd`, `vignettes/workflow_wien.Rmd` and `vignettes/workflow_badaussee.Rmd` now convert ET0 from mm/day to mm/h (`value / period_et`) before writing `//Kurven/ET0`, mirroring the existing diff --git a/R/get_simulation_results_optim.R b/R/get_simulation_results_optim.R index e4d48b0..311315e 100644 --- a/R/get_simulation_results_optim.R +++ b/R/get_simulation_results_optim.R @@ -92,56 +92,70 @@ get_simulation_results_optim <- function(paths, return(NULL) } - # Open H5 handles outside catAndRun so on.exit binds to *this* lambda's - # frame, not catAndRun's internal frame. Handles are guaranteed to close - # whichever way the iteration unwinds. - res_hdf5_element <- hdf5r::H5File$new(paths$path_results_hdf5_element, mode = "r") - on.exit(try(res_hdf5_element$close_all(), silent = TRUE), add = TRUE) + # Open + read in an inner function so its on.exit() handle-closing binds to + # *its own* frame and always fires (even on error) before we decide what to + # return. The surrounding tryCatch makes a result file that exists but is + # unreadable -- e.g. the engine crashed mid-write for that scenario -- + # behave like a missing file (NULL + warning) instead of aborting the whole + # (possibly parallel) batch. Downstream add_overflow_events_and_waterbalance() + # then emits an NA row for the scenario. + read_result <- function() { + res_hdf5_element <- hdf5r::H5File$new(paths$path_results_hdf5_element, mode = "r") + on.exit(try(res_hdf5_element$close_all(), silent = TRUE), add = TRUE) - res_hdf5_flaeche <- if (has_flaeche) { - h <- hdf5r::H5File$new(paths$path_results_hdf5_flaeche, mode = "r") - on.exit(try(h$close_all(), silent = TRUE), add = TRUE) - h - } else { - if (isTRUE(debug)) { - message(sprintf( - "No connected_area H5 for %s ('%s') -> connected_area = NULL", - s_name, paths$path_results_hdf5_flaeche - )) + res_hdf5_flaeche <- if (has_flaeche) { + h <- hdf5r::H5File$new(paths$path_results_hdf5_flaeche, mode = "r") + on.exit(try(h$close_all(), silent = TRUE), add = TRUE) + h + } else { + if (isTRUE(debug)) { + message(sprintf( + "No connected_area H5 for %s ('%s') -> connected_area = NULL", + s_name, paths$path_results_hdf5_flaeche + )) + } + NULL } - NULL - } - kwb.utils::catAndRun( - messageText = sprintf("(%d/%d)) Reading results files for model run %s", - which(simulation_names == s_name), - length(simulation_names), - paths$dir_target_output), - expr = { - element <- list( - meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Metainfo"]], - numeric_only = FALSE), - rates = kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Raten"]]), - water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Wasserbilanz"]]), - states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Zustandsvariablen"]]) - ) - - connected_area <- if (!is.null(res_hdf5_flaeche)) { - list( - meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Metainfo"]], + kwb.utils::catAndRun( + messageText = sprintf("(%d/%d)) Reading results files for model run %s", + which(simulation_names == s_name), + length(simulation_names), + paths$dir_target_output), + expr = { + element <- list( + meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Metainfo"]], numeric_only = FALSE), - rates = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Raten"]]), - water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Wasserbilanz"]]), - states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Zustandsvariablen"]]) + rates = kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Raten"]]), + water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_element[["Wasserbilanz"]]), + states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_element[["Zustandsvariablen"]]) ) - } else { - NULL - } - list(element = element, connected_area = connected_area) - }, - dbg = debug - ) + connected_area <- if (!is.null(res_hdf5_flaeche)) { + list( + meta = if (lean) NULL else kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Metainfo"]], + numeric_only = FALSE), + rates = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Raten"]]), + water_balance = kwb.raindrop::read_hdf5_scalars(res_hdf5_flaeche[["Wasserbilanz"]]), + states = if (lean) NULL else kwb.raindrop::read_hdf5_timeseries(res_hdf5_flaeche[["Zustandsvariablen"]]) + ) + } else { + NULL + } + + list(element = element, connected_area = connected_area) + }, + dbg = debug + ) + } + + tryCatch(read_result(), error = function(e) { + warning(sprintf( + "Scenario '%s': result HDF5 unreadable ('%s'): %s -- treating as missing (NULL).", + s_name, paths$path_results_hdf5_element, conditionMessage(e) + ), call. = FALSE) + NULL + }) }), nm = simulation_names) } From a8b4e0d97531e6efac62984531a6b8e0129e1aba Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 3 Jun 2026 14:39:14 +0000 Subject: [PATCH 5/5] Sync get_simulation_results_optim.Rd with new lean argument R CMD check failed with a codoc WARNING because the .Rd still documented the old signature (no 'lean'). Regenerate the usage block and add the \item{lean} documentation to match R/get_simulation_results_optim.R (devtools::document() equivalent), clearing the only WARNING (the 3 NOTEs are pre-existing). --- man/get_simulation_results_optim.Rd | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/man/get_simulation_results_optim.Rd b/man/get_simulation_results_optim.Rd index 1c7d586..1a1c68b 100644 --- a/man/get_simulation_results_optim.Rd +++ b/man/get_simulation_results_optim.Rd @@ -4,7 +4,13 @@ \alias{get_simulation_results_optim} \title{Read Raindrop optimisation simulation results from HDF5} \usage{ -get_simulation_results_optim(paths, path_list, simulation_names, debug = TRUE) +get_simulation_results_optim( + paths, + path_list, + simulation_names, + debug = TRUE, + lean = FALSE +) } \arguments{ \item{paths}{A list of path definitions. Used for messaging and expected to @@ -20,6 +26,14 @@ run-specific paths (must yield \code{path_results_hdf5_element}, (e.g. \code{c("s00001", "s00002")}).} \item{debug}{print debug messages (default: TRUE)} + +\item{lean}{Logical. If \code{TRUE}, read only the fields consumed by +\code{\link{add_overflow_events_and_waterbalance}} -- \code{element$rates}, +\code{element$water_balance} and \code{connected_area$water_balance} -- and +leave \code{meta}/\code{states} (both sides) and \code{connected_area$rates} +as \code{NULL}. This keeps per-run memory and I/O minimal when each run is +thinned to its optimisation row immediately instead of collecting every +run's full results first. Defaults to \code{FALSE} (read everything).} } \value{ A named list with one entry per \code{simulation_names}. Each entry is