Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions R/analyse-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,46 @@
}


#' Resolve the response scalar for formula-based analyses
#'
#' Uses the left-hand side of the formula when `scalar` is omitted.
#' If `scalar` is supplied, validates that it exists and matches the formula
#' response when that response can be mapped to a scalar name.
#' @noRd
.resolve_formula_scalar <- function(formula, data, scalar = NULL) {
scalar_names <- names(scalars(data))
lhs_expr <- tryCatch(formula[[2]], error = function(e) NULL)
lhs_vars <- if (is.null(lhs_expr)) character(0) else all.vars(lhs_expr)
lhs_scalar <- intersect(lhs_vars, scalar_names)

if (is.null(scalar)) {
if (length(lhs_scalar) == 1L) {
return(lhs_scalar)
}
stop(
"Could not infer the response scalar from the formula. ",
"Please supply `scalar=` explicitly."
)
}

if (!(scalar %in% scalar_names)) {
stop(
"scalar '", scalar, "' not found in data. Available scalars: ",
paste(scalar_names, collapse = ", ")
)
}

if (length(lhs_scalar) == 1L && !identical(lhs_scalar, scalar)) {
stop(
"The formula response refers to scalar '", lhs_scalar,
"' but `scalar=` was set to '", scalar, "'."
)
}

scalar
}


#' Validate and default element.subset
#' @return Integer vector of element indices
#' @noRd
Expand Down
6 changes: 4 additions & 2 deletions R/analyse.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
#' @rdname ModelArray.lm
#' @export

ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
ModelArray.lm <- function(formula, data, phenotypes, scalar = NULL, element.subset = NULL, full.outputs = FALSE,
var.terms = c("estimate", "statistic", "p.value"),
var.model = c("adj.r.squared", "p.value"),
correct.p.value.terms = c("fdr"), correct.p.value.model = c("fdr"),
Expand All @@ -147,6 +147,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
return_output = TRUE,
...) {
.validate_modelarray_input(data)
scalar <- .resolve_formula_scalar(formula, data, scalar)
element.subset <- .validate_element_subset(element.subset, data, scalar)
phenotypes <- .align_phenotypes(data, phenotypes, scalar)

Expand Down Expand Up @@ -414,7 +415,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
#' @rdname ModelArray.gam
#' @export

ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
ModelArray.gam <- function(formula, data, phenotypes, scalar = NULL, element.subset = NULL, full.outputs = FALSE,
var.smoothTerms = c("statistic", "p.value"),
var.parametricTerms = c("estimate", "statistic", "p.value"),
var.model = c("dev.expl"),
Expand All @@ -432,6 +433,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
return_output = TRUE,
...) {
.validate_modelarray_input(data)
scalar <- .resolve_formula_scalar(formula, data, scalar)
element.subset <- .validate_element_subset(element.subset, data, scalar)

# check if the formula is valid in terms of mgcv::gam()
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ library(ModelArray)
modelarray <- ModelArray("data.h5", scalar_types = c("FDC"))
phenotypes <- read.csv("cohort.csv")

results <- ModelArray.lm(FDC ~ Age + sex + motion, modelarray, phenotypes, "FDC",
results <- ModelArray.lm(FDC ~ Age + sex + motion, modelarray, phenotypes,
n_cores = 4
)
```
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ library(ModelArray)
modelarray <- ModelArray("data.h5", scalar_types = c("FDC"))
phenotypes <- read.csv("cohort.csv")

results <- ModelArray.lm(FDC ~ Age + sex + motion, modelarray, phenotypes, "FDC",
results <- ModelArray.lm(FDC ~ Age + sex + motion, modelarray, phenotypes,
n_cores = 4
)
```
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/test-analyse_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,24 @@ test_that(".validate_modelarray_input rejects non-ModelArray", {
expect_error(.validate_modelarray_input(data.frame()), "not ModelArray")
})

test_that(".resolve_formula_scalar infers and validates scalar names", {
h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
ma <- ModelArray(h5_path, scalar_types = c("FD"))

expect_identical(.resolve_formula_scalar(FD ~ age, ma), "FD")
expect_identical(.resolve_formula_scalar(log(FD) ~ age, ma), "FD")
expect_identical(.resolve_formula_scalar(FD ~ age, ma, scalar = "FD"), "FD")

expect_error(
.resolve_formula_scalar(unknown_response ~ age, ma),
"Could not infer the response scalar"
)
expect_error(
.resolve_formula_scalar(FD ~ age, ma, scalar = "FA"),
"not found in data"
)
})

test_that(".validate_element_subset defaults and validates", {
h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
ma <- ModelArray(h5_path, scalar_types = c("FD"))
Expand Down
130 changes: 130 additions & 0 deletions tests/testthat/test-merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,133 @@ test_that("mergeModelArrays returns proper structure", {
expect_equal(nInputFiles(merged$data, "FD2"), length(src))
expect_equal(nElements(merged$data, "FD"), nElements(ma_fd, "FD"))
})

test_that("merged ModelArrays work with analyseOneElement helpers", {
h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
ma_fd <- ModelArray(h5_path, scalar_types = c("FD"))
src <- sources(ma_fd)[["FD"]]
base <- scalars(ma_fd)[["FD"]]

ma2 <- new("ModelArray",
scalars = list(
FD2 = base * 1.5 + 2,
FD3 = log(base + 1)
),
sources = list(FD2 = src, FD3 = src),
results = list(),
path = c(FD2 = h5_path, FD3 = h5_path)
)

phen1 <- data.frame(
subject_id = paste0("subj_", seq_along(src)),
source_file = src,
age = seq_along(src),
stringsAsFactors = FALSE
)
phen2 <- data.frame(
subject_id = rev(paste0("subj_", seq_along(src))),
source_file = rev(src),
stringsAsFactors = FALSE
)

merged <- mergeModelArrays(
list(ma_fd, ma2),
list(phen1, phen2),
merge_on = "subject_id"
)

lm_init <- suppressWarnings(analyseOneElement.lm(
1L,
FD ~ FD2 + FD3 + age,
merged$data,
merged$phenotypes,
scalar = "FD",
var.terms = c("estimate", "statistic", "p.value"),
var.model = c("adj.r.squared", "p.value"),
num.subj.lthr = 1,
num.stat.output = 12,
flag_initiate = TRUE,
on_error = "stop"
))
expect_true("FD2.estimate" %in% lm_init$column_names)
expect_true("FD3.estimate" %in% lm_init$column_names)

lm_fit <- suppressWarnings(ModelArray.lm(
FD ~ FD2 + FD3 + age,
merged$data,
merged$phenotypes,
element.subset = as.integer(1:2),
verbose = FALSE,
pbar = FALSE,
n_cores = 1,
on_error = "stop"
))
expect_true(all(is.finite(lm_fit$FD2.estimate)))
expect_true(all(is.finite(lm_fit$FD3.estimate)))

expect_error(
ModelArray.lm(
FD ~ FD2 + FD3 + age,
merged$data,
merged$phenotypes,
scalar = "FD2",
element.subset = as.integer(1:2),
verbose = FALSE,
pbar = FALSE,
n_cores = 1,
on_error = "stop"
),
"formula response refers to scalar 'FD'"
)

gam_init <- analyseOneElement.gam(
1L,
FD ~ s(age) + FD2 + FD3,
merged$data,
merged$phenotypes,
scalar = "FD",
var.smoothTerms = c("edf", "statistic", "p.value"),
var.parametricTerms = c("estimate", "statistic", "p.value"),
var.model = c("adj.r.squared", "dev.expl"),
num.subj.lthr = 1,
num.stat.output = 12,
flag_initiate = TRUE,
flag_sse = FALSE,
on_error = "stop"
)
expect_true("FD2.estimate" %in% gam_init$column_names)
expect_true("FD3.estimate" %in% gam_init$column_names)

gam_fit <- ModelArray.gam(
FD ~ s(age) + FD2 + FD3,
merged$data,
merged$phenotypes,
element.subset = as.integer(1:2),
verbose = FALSE,
pbar = FALSE,
n_cores = 1,
on_error = "stop"
)
expect_true("FD2.estimate" %in% colnames(gam_fit))
expect_true("FD3.estimate" %in% colnames(gam_fit))

wrap_out <- analyseOneElement.wrap(
1L,
function(data) {
data.frame(
mean_fd = mean(data$FD, na.rm = TRUE),
cor_fd2 = cor(data$FD, data$FD2, use = "complete.obs"),
cor_fd3 = cor(data$FD, data$FD3, use = "complete.obs")
)
},
merged$data,
merged$phenotypes,
scalar = "FD",
num.subj.lthr = 1,
num.stat.output = 4,
flag_initiate = FALSE,
on_error = "stop"
)
expect_true(is.finite(unname(wrap_out[[3]])))
expect_true(is.finite(unname(wrap_out[[4]])))
})
Loading