Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a
been designed to only share non disclosive summary statistics, with built in automated output
checking based on statistical disclosure control. With data sites setting the threshold values for
the automated output checks. For more details, see 'citation("dsBase")'.
Version: 6.3.5
Version: 6.3.6
Authors@R: c(person(given = "Paul",
family = "Burton",
role = c("aut"),
Expand Down Expand Up @@ -65,6 +65,9 @@ Imports:
stringr,
lme4,
dplyr,
tibble,
purrr,
tidyselect,
reshape2,
polycor (>= 0.8),
splines,
Expand Down
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@ export(dmtC2SDS)
export(elsplineDS)
export(extractQuantilesDS1)
export(extractQuantilesDS2)
export(fixClassDS)
export(fixColsDS)
export(fixLevelsDS)
export(gamlssDS)
export(getAllLevelsDS)
export(getClassAllColsDS)
export(getWGSRDS)
export(glmDS1)
export(glmDS2)
Expand Down Expand Up @@ -139,5 +144,15 @@ import(dplyr)
import(gamlss)
import(gamlss.dist)
import(mice)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(gamlss.dist,pST3)
importFrom(gamlss.dist,qST3)
importFrom(purrr,imap)
importFrom(purrr,map)
importFrom(purrr,set_names)
importFrom(tibble,as_tibble)
importFrom(tidyselect,all_of)
importFrom(tidyselect,peek_vars)
133 changes: 133 additions & 0 deletions R/standardiseDfDS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' Get the Class of All Columns in a Data Frame
#' @param df.name A string representing the name of the data frame.
#' @return A tibble with the class of each column in the data frame.
#' @importFrom dplyr %>%
#' @importFrom tibble as_tibble
#' @importFrom purrr map
#' @export
getClassAllColsDS <- function(df.name){
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df.name <- eval(parse(text = df.name), envir = parent.frame())
all_classes <- map(df.name, class) %>% as_tibble()
return(all_classes)
}

#' Change Class of Target Variables in a Data Frame
#' @param df.name A string representing the name of the data frame.
#' @param target_vars A character vector specifying the columns to be modified.
#' @param target_class A character vector specifying the new classes for each column (1 = factor,
#' 2 = integer, 3 = numeric, 4 = character, 5 = logical).
#' @return A modified data frame with the specified columns converted to the target classes.
#' @importFrom dplyr mutate across
#' @importFrom tidyselect all_of
#' @export
fixClassDS <- function(df.name, target_vars, target_class) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df <- eval(parse(text = df.name), envir = parent.frame())
df_transformed <- df %>%
mutate(
across(all_of(target_vars),
~ .convertClass(.x, target_class[which(target_vars == cur_column())])))
return(df_transformed)
}

#' Convert a Vector to a Specified Class
#' @param x The vector to be converted.
#' @param class_name A string indicating the target class (1 = factor, 2 = integer, 3 = numeric,
#' 4 = character, 5 = logical).
#' @return The converted vector.
#' @noRd
.convertClass <- function(target_var, target_class_code) {
switch(target_class_code,
"1" = as.factor(target_var),
"2" = as.integer(target_var),
"3" = as.numeric(target_var),
"4" = as.character(target_var),
"5" = as.logical(target_var)
)
}

#' Add Missing Columns with NA Values
#' @param .data A string representing the name of the data frame.
#' @param cols A character vector specifying the columns to be added if missing.
#' @return A modified data frame with missing columns added and filled with NA.
#' @importFrom dplyr mutate select
#' @importFrom tidyselect peek_vars
#' @importFrom purrr set_names
#' @export
fixColsDS <- function(.data, cols) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

.data <- eval(parse(text = .data), envir = parent.frame())
missing <- setdiff(cols, colnames(.data))
out <- .data %>%
mutate(!!!set_names(rep(list(NA), length(missing)), missing)) %>%
select(sort(peek_vars()))
return(out)
}

#' Retrieve Factor Levels for Specific Columns
#' @param df.name A string representing the name of the data frame.
#' @param factor_vars A character vector specifying the factor columns.
#' @return A list of factor levels for the specified columns.
#' @importFrom tidyselect all_of
#' @importFrom purrr map imap
#' @export
getAllLevelsDS <- function(df.name, factor_vars) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df <- eval(parse(text = df.name), envir = parent.frame())
factor_vars_split <- strsplit(factor_vars, ",\\s*")[[1]]
levels <- purrr::map(df[factor_vars_split], base::levels)

disclosure_check <- imap(levels, function(lvls, var) {
.checkLevelsDisclosure(df = df, var = var, levels = lvls)
})

failed_vars <- names(disclosure_check)[unlist(disclosure_check)]

if(length(failed_vars) > 0) {
stop("Based on the value of nfilter.levels.density, these factor variables", " {", failed_vars, "} ", "have too many levels compared to the length of the variable. Please reduce the numnber of levels or change the variable type and try again")
} else {
return(levels)
}
}

#' Check variable levels against disclosure thresholds
#'
#' Internal helper function to verify whether the number of levels in a variable
#' exceeds the allowed density threshold defined by `dsBase::listDisclosureSettingsDS()`.
#'
#' @param df A data frame containing the variable.
#' @param var Character string. Name of the variable to check.
#' @param levels Character vector. Levels of the variable.
#'
#' @return Logical. `TRUE` if the check fails (i.e., disclosure threshold is violated),
#' otherwise `FALSE`.
#'
#' @keywords internal
#' @noRd
.checkLevelsDisclosure <- function(df, var, levels) {
thr <- dsBase::listDisclosureSettingsDS()
nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
n_levels <- length(levels)
length_var <- length(df[[var]])
fail <- (length_var * nfilter.levels.density) < n_levels
return(fail)
}

#' Set Factor Levels for Specific Columns in a Data Frame
#' @param df.name A string representing the name of the data frame to modify.
#' @param vars A character vector specifying the columns to be modified.
#' @param levels A named list where each element contains the levels for the corresponding factor variable.
#' @return A modified data frame with the specified columns converted to factors with the provided levels.
#' @export
fixLevelsDS <- function(df.name, vars, levels) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df.name <- eval(parse(text = df.name), envir = parent.frame())
out <- df.name %>%
mutate(across(all_of(vars), ~factor(., levels = levels[[dplyr::cur_column()]])))
}
9 changes: 7 additions & 2 deletions inst/DATASHIELD
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ AggregateMethods:
is.null=base::is.null,
is.numeric=base::is.numeric,
NROW=base::NROW,
t.test=stats::t.test
t.test=stats::t.test,
getClassAllColsDS,
getAllLevelsDS
AssignMethods:
absDS,
asCharacterDS,
Expand Down Expand Up @@ -161,7 +163,10 @@ AssignMethods:
acos=base::acos,
atan=base::atan,
sum=base::sum,
unlist=base::unlist
unlist=base::unlist,
fixClassDS,
fixColsDS,
fixLevelsDS
Options:
datashield.privacyLevel=5,
default.datashield.privacyControlLevel="banana",
Expand Down
22 changes: 22 additions & 0 deletions man/fixClassDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/fixColsDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/fixLevelsDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/getAllLevelsDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/getClassAllColsDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading