From 4cdabb237fe9b6c14454f4d0076d542292a48f72 Mon Sep 17 00:00:00 2001 From: Matt Pratola Date: Sat, 27 Jun 2026 15:30:10 -0400 Subject: [PATCH] Removed the misc/ and R/ folders (no dependency on items there). Moved the example files in Examples/ into a specific example subdirectory, in this case Examples/BART_BMM_Technometrics_2024/. Future examples should have their own subdirectories within Examples/ to keep things clean, such as Examples/New_Paper_Example/. Moved eft_models.py from Python/ to Examples/BART_BMM_Technometrics_2024/ as it is referenced from the Jupyter notebook in that example but nowhere else; subsequently removed the Python/ directory. --- .../BART-Model-Mixing-Priors.md | 0 .../figure-gfm/Plot Expansion and Delta-1.png | Bin .../figure-gfm/Plot Expansion-1.png | Bin .../figure-gfm/inform pointwise prior-1.png | Bin .../figure-gfm/inform tnode prior-1.png | Bin .../noninform pointwise prior-1.png | Bin .../figure-gfm/tnode prior-1.png | Bin .../figure-gfm/tree1-1.png | Bin .../BART_BMM_Technometrics.ipynb | 0 .../Data/2d_x_test.txt | 0 .../Data/2d_x_train.txt | 0 .../Data/2d_y_train.txt | 0 .../Data/honda_x_test.txt | 0 .../Data/honda_x_train.txt | 0 .../Data/honda_y_train.txt | 0 .../EFT-Model-Mixing-Using-BART.md | 0 .../figure-gfm/Ex1 Predictions-1.png | Bin .../figure-gfm/Ex1 Weights-1.png | Bin .../figure-gfm/Ex2 Predictions-1.png | Bin .../figure-gfm/Ex2 Weights-1.png | Bin .../figure-gfm/Ex3 Predictions-1.png | Bin .../figure-gfm/Ex3 Weights-1.png | Bin .../figure-gfm/Plot Expansion-1.png | Bin .../eft_models.py | 0 R/eft_examples.R | 330 --- R/eft_mixing_helper_functions.R | 499 ---- R/honda_eft.R | 212 -- R/mixing_priors.R | 313 --- R/polynomials.R | 138 - misc/eft_fit.py | 379 --- misc/example.R | 114 - misc/example_model_mixing.R | 495 ---- misc/openbt.R | 2286 ----------------- misc/openbt.py | 903 ------- misc/openbt_test.R | 0 misc/openbt_test.py | 133 - 36 files changed, 5802 deletions(-) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors.md (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion and Delta-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/inform pointwise prior-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/inform tnode prior-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/noninform pointwise prior-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/tnode prior-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART-Model-Mixing-Priors_files/figure-gfm/tree1-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/BART_BMM_Technometrics.ipynb (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/2d_x_test.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/2d_x_train.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/2d_y_train.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/honda_x_test.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/honda_x_train.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/Data/honda_y_train.txt (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART.md (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Predictions-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Weights-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Predictions-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Weights-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Predictions-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Weights-1.png (100%) rename Examples/{ => BART_BMM_Technometrics_2024}/EFT-Model-Mixing-Using-BART_files/figure-gfm/Plot Expansion-1.png (100%) rename {Python => Examples/BART_BMM_Technometrics_2024}/eft_models.py (100%) delete mode 100644 R/eft_examples.R delete mode 100644 R/eft_mixing_helper_functions.R delete mode 100644 R/honda_eft.R delete mode 100644 R/mixing_priors.R delete mode 100644 R/polynomials.R delete mode 100644 misc/eft_fit.py delete mode 100644 misc/example.R delete mode 100644 misc/example_model_mixing.R delete mode 100644 misc/openbt.R delete mode 100644 misc/openbt.py delete mode 100644 misc/openbt_test.R delete mode 100644 misc/openbt_test.py diff --git a/Examples/BART-Model-Mixing-Priors.md b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors.md similarity index 100% rename from Examples/BART-Model-Mixing-Priors.md rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors.md diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion and Delta-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion and Delta-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion and Delta-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion and Delta-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/Plot Expansion-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/inform pointwise prior-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/inform pointwise prior-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/inform pointwise prior-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/inform pointwise prior-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/inform tnode prior-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/inform tnode prior-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/inform tnode prior-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/inform tnode prior-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/noninform pointwise prior-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/noninform pointwise prior-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/noninform pointwise prior-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/noninform pointwise prior-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/tnode prior-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/tnode prior-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/tnode prior-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/tnode prior-1.png diff --git a/Examples/BART-Model-Mixing-Priors_files/figure-gfm/tree1-1.png b/Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/tree1-1.png similarity index 100% rename from Examples/BART-Model-Mixing-Priors_files/figure-gfm/tree1-1.png rename to Examples/BART_BMM_Technometrics_2024/BART-Model-Mixing-Priors_files/figure-gfm/tree1-1.png diff --git a/Examples/BART_BMM_Technometrics.ipynb b/Examples/BART_BMM_Technometrics_2024/BART_BMM_Technometrics.ipynb similarity index 100% rename from Examples/BART_BMM_Technometrics.ipynb rename to Examples/BART_BMM_Technometrics_2024/BART_BMM_Technometrics.ipynb diff --git a/Examples/Data/2d_x_test.txt b/Examples/BART_BMM_Technometrics_2024/Data/2d_x_test.txt similarity index 100% rename from Examples/Data/2d_x_test.txt rename to Examples/BART_BMM_Technometrics_2024/Data/2d_x_test.txt diff --git a/Examples/Data/2d_x_train.txt b/Examples/BART_BMM_Technometrics_2024/Data/2d_x_train.txt similarity index 100% rename from Examples/Data/2d_x_train.txt rename to Examples/BART_BMM_Technometrics_2024/Data/2d_x_train.txt diff --git a/Examples/Data/2d_y_train.txt b/Examples/BART_BMM_Technometrics_2024/Data/2d_y_train.txt similarity index 100% rename from Examples/Data/2d_y_train.txt rename to Examples/BART_BMM_Technometrics_2024/Data/2d_y_train.txt diff --git a/Examples/Data/honda_x_test.txt b/Examples/BART_BMM_Technometrics_2024/Data/honda_x_test.txt similarity index 100% rename from Examples/Data/honda_x_test.txt rename to Examples/BART_BMM_Technometrics_2024/Data/honda_x_test.txt diff --git a/Examples/Data/honda_x_train.txt b/Examples/BART_BMM_Technometrics_2024/Data/honda_x_train.txt similarity index 100% rename from Examples/Data/honda_x_train.txt rename to Examples/BART_BMM_Technometrics_2024/Data/honda_x_train.txt diff --git a/Examples/Data/honda_y_train.txt b/Examples/BART_BMM_Technometrics_2024/Data/honda_y_train.txt similarity index 100% rename from Examples/Data/honda_y_train.txt rename to Examples/BART_BMM_Technometrics_2024/Data/honda_y_train.txt diff --git a/Examples/EFT-Model-Mixing-Using-BART.md b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART.md similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART.md rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART.md diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Predictions-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Predictions-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Predictions-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Predictions-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Weights-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Weights-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Weights-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex1 Weights-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Predictions-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Predictions-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Predictions-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Predictions-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Weights-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Weights-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Weights-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex2 Weights-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Predictions-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Predictions-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Predictions-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Predictions-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Weights-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Weights-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Weights-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Ex3 Weights-1.png diff --git a/Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Plot Expansion-1.png b/Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Plot Expansion-1.png similarity index 100% rename from Examples/EFT-Model-Mixing-Using-BART_files/figure-gfm/Plot Expansion-1.png rename to Examples/BART_BMM_Technometrics_2024/EFT-Model-Mixing-Using-BART_files/figure-gfm/Plot Expansion-1.png diff --git a/Python/eft_models.py b/Examples/BART_BMM_Technometrics_2024/eft_models.py similarity index 100% rename from Python/eft_models.py rename to Examples/BART_BMM_Technometrics_2024/eft_models.py diff --git a/R/eft_examples.R b/R/eft_examples.R deleted file mode 100644 index 8d36344..0000000 --- a/R/eft_examples.R +++ /dev/null @@ -1,330 +0,0 @@ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Name: eft_examples.R -# Author: John Yannotty (yannotty.1@buckeyemail.osu.edu) -# Desc: R code to perform model mixing using BART on three EFT examples. -# The tuning parameters that are currently set were used to produce the plots, -# which appear in the file "EFT-Model-Mixing-Using-BART.html" -# Version: 1.0 -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -### SETUP TUTORIAL LIKE THINGS -source("https://github.com/jcyannotty/OpenBT/blob/main/src/openbt.R?raw=TRUE") -source("https://github.com/jcyannotty/OpenBT/blob/main/R/honda_eft.R?raw=TRUE") -source("https://github.com/jcyannotty/OpenBT/blob/main/R/eft_mixing_helper_functions.R?raw=TRUE") - -#---------------------------------------------------------- -#Get training data -ex1_data = get_data(20, 300, 0.005, 2, 4, 0.03, 0.5, 321) -ex2_data = get_data(20, 300, 0.005, 4, 4, 0.03, 0.5, 321) -ex3_data = get_data(20, 300, 0.005, c(2,4,6), NULL, 0.03, 0.5, 321) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Plot Expansion -labs1 = c(expression(paste(f[s]^"(2)", '(x)')), expression(paste(f[l]^"(4)", '(x)')), expression(paste(f["\u2020"],'(x)'))) -e1 = plot_exp_gg2(ex1_data, in_labs = labs1, colors = color_list, line_type_list = lty_list, grid_on = TRUE) -e1 = e1 + theme(legend.position = c(0.22,0.16),legend.title = element_blank()) + labs(title = '(a)') -e1 = e1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), plot.title = element_text(size = 15),legend.text.align = 0, legend.text = element_text(size = 13)) - -labs2 = c(expression(paste(f[s]^"(4)", '(x)')), expression(paste(f[l]^"(4)", '(x)')), expression(paste(f["\u2020"],'(x)'))) -e2 = plot_exp_gg2(ex2_data, in_labs = labs2, colors = color_list, line_type_list = lty_list, grid_on = TRUE) -e2 = e2 + theme(legend.position = c(0.22,0.16),legend.title = element_blank()) + labs(title = '(b)') -e2 = e2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), plot.title = element_text(size = 15),legend.text.align = 0, legend.text = element_text(size = 13)) - -labs3 = c(expression(paste(f[s]^"(2)", '(x)')), expression(paste(f[s]^"(4)", '(x)')),expression(paste(f[s]^"(6)", '(x)')), expression(paste(f["\u2020"],'(x)'))) -e3 = plot_exp_gg2(ex3_data, in_labs = labs3, colors = color_list, line_type_list = lty_list, grid_on = TRUE) -e3 = e3 + theme(legend.position = c(0.22,0.19),legend.title = element_blank()) + labs(title = '(c)') -e3 = e3 + theme(axis.text=element_text(size=12),axis.title=element_text(size=13), plot.title = element_text(size = 15),legend.text.align = 0, legend.text = element_text(size = 12.5)) - -e1 = e1 + theme(legend.key.width = unit(0.73,"cm")) + guides(linetype = guide_legend(override.aes = list(size = 0.7))) -e2 = e2 + theme(legend.key.width = unit(0.73,"cm")) + guides(linetype = guide_legend(override.aes = list(size = 0.7))) -e3 = e3 + theme(legend.key.width = unit(0.73,"cm")) + guides(linetype = guide_legend(override.aes = list(size = 0.7))) - -grid.arrange(arrangeGrob(e1,e2,e3,nrow = 1), nrow=1, heights = c(8), - top=textGrob("Prototype Effective Field Theories", gp=gpar(fontsize=20,font=8))) - -#----------------------------------------------------- -# Ex1 Non-Informative Prior -#----------------------------------------------------- -# Attach the data set -attach(ex1_data) - -# Get the initial estimate of sigma^2 -sig2_hat = max(apply(apply(f_train, 2, function(x) (x-y_train)^2),2,min)) - -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit1=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),model="mixbart", - ntree = 8,k = 2.5, overallsd = sqrt(sig2_hat), overallnu = 5,power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 4, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 1000, - summarystats = FALSE,modelname="eft_mixing") - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp1=predict.openbt(fit1,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw1=mixingwts.openbt(fit1, x.test = x_test, numwts = 2, tc = 4, q.lower = 0.025, q.upper = 0.975) - -#----------------------------------------------------- -# Ex1 Informative Prior -#----------------------------------------------------- -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit2=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),f.sd.train = f_train_dsd,model="mixbart", - ntree = 10,k = 5, overallsd = sqrt(sig2_hat), overallnu = 8, power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 3, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 500, - modelname="eft_mixing" - ) - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp2=predict.openbt(fit2,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw2 = mixingwts.openbt(fit2, x.test = x_test, numwts = 2, tc = 4, q.lower = 0.025, q.upper = 0.975) - -detach(ex1_data) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Ex 1 Plot the predictions -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Set the labels for hte legend -g_labs = c(expression(paste(hat(f)[1], '(x)')), expression(paste(hat(f)[2], '(x)')), - expression(paste(f["\u2020"],'(x)')),"Post. Mean") -# Plot the predictions obtained with the non-informative prior -p1 = plot_fit_gg2(ex1_data, fitp1, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.85,2.75), title = "Non-Informative Prior", grid_on = TRUE) -# Plot the predictions obtained with the informative prior -p2 = plot_fit_gg2(ex1_data, fitp2, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.85,2.75), title = "Informative Prior", grid_on = TRUE) -# Resize text elements -p1 = p1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -p2 = p2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure -legend1 = g_legend(p1) -grid.arrange(arrangeGrob(p1 + theme(legend.position = "none"), - p2 + theme(legend.position = "none"), - nrow = 1), nrow=2, heights = c(10,1), legend = legend1, - top = textGrob("Posterior Mean Predictions", gp = gpar(fontsize = 16))) - -#------------------------------------------------ -# Ex 1 Plot the weight functions -#------------------------------------------------ -# Create the plot for weights under the non-informative prior -w1 = plot_wts_gg2(fitw1, ex1_data$x_test, y_lim = c(-0.05, 1.05),title = 'Non-Informative Prior', colors = color_list, - line_type_list = lty_list, gray_scale = FALSE) -# Create the plot for weights under the informative prior -w2 = plot_wts_gg2(fitw2, ex1_data$x_test, y_lim = c(-0.05, 1.05), title = 'Informative Prior',colors = color_list, - line_type_list = lty_list, gray_scale = FALSE) -# Resize text elements -w1 = w1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -w2 = w2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure with both plots -legend_w1 = g_legend(w1) -grid.arrange(arrangeGrob(w1 + theme(legend.position = "none"), - w2 + theme(legend.position = "none"), - nrow = 1),nrow=2, heights = c(10,1), legend = legend_w1, - top = textGrob("Posterior Weight Functions", gp = gpar(fontsize = 16))) - - - -#----------------------------------------------------- -# Ex2 Non-Informative Prior -#----------------------------------------------------- -# Attach example 2 data -attach(ex2_data) - -# Get the initial estimate of sigma^2 -sig2_hat = max(apply(apply(f_train, 2, function(x) (x-y_train)^2),2,min)) - -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit1=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),model="mixbart", - ntree = 8,k = 3.0, overallsd = sqrt(sig2_hat), overallnu = 5,power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 4, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 1000, - summarystats = FALSE,modelname="eft_mixing") - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp1=predict.openbt(fit1,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw1=mixingwts.openbt(fit1, x.test = x_test, numwts = 2, tc = 4, q.lower = 0.025, q.upper = 0.975) - - -#----------------------------------------------------- -# Ex2 - Informative Prior -#----------------------------------------------------- -# Get the initial estimate of sigma^2 -sig2_hat = max(apply(apply(f_train, 2, function(x) (x-y_train)^2),2,min)) - -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit2=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),f.sd.train = f_train_dsd,model="mixbart", - ntree = 10,k = 5, overallsd = sqrt(sig2_hat), overallnu = 5, power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 3, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 500, - modelname="eft_mixing" -) - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp2=predict.openbt(fit2,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw2 = mixingwts.openbt(fit2, x.test = x_test, numwts = 2, tc = 4, q.lower = 0.025, q.upper = 0.975) - -# Attach example 2 data -detach(ex2_data) - - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Ex2 - Plot the predictions -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Set the labels in the legend -g_labs = c(expression(paste(f[1], '(x)')), expression(paste(f[2], '(x)')), expression(paste(f["\u2020"],'(x)')), - "Post. Mean") -# Plot the predictions from the non-informative prior -p1 = plot_fit_gg2(ex2_data, fitp1, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.9,2.8), grid_on = TRUE, title = "Non-Informative Prior") - -# Plot the predictions from the informative prior -p2 = plot_fit_gg2(ex2_data, fitp2, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.9,2.8), title = "Informative Prior", grid_on = TRUE) - -# Resize the plot text -p1 = p1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -p2 = p2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure with both plots -legend1 = g_legend(p1) -grid.arrange(arrangeGrob(p1 + theme(legend.position = "none"), - p2 + theme(legend.position = "none"), - nrow = 1), nrow=2, heights = c(10,1), legend = legend1, - top = textGrob("Posterior Mean Predictions", gp = gpar(fontsize = 16))) - -#------------------------------------------------ -# Ex2 - Plot the weight functions -#------------------------------------------------ -# Create the plot for weights under the non-informative prior -w1 = plot_wts_gg2(fitw1, ex1_data$x_test, y_lim = c(-0.05, 1.05),title = 'Non-Informative Prior', colors = color_list, - line_type_list = lty_list, gray_scale = FALSE) -# Create the plot for weights under the informative prior -w2 = plot_wts_gg2(fitw2, ex1_data$x_test, y_lim = c(-0.05, 1.05), title = 'Informative Prior',colors = color_list, - line_type_list = lty_list, gray_scale = FALSE) -# Resize text elements -w1 = w1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -w2 = w2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure with both plots -legend_w1 = g_legend(w1) -grid.arrange(arrangeGrob(w1 + theme(legend.position = "none"), - w2 + theme(legend.position = "none"), - nrow = 1),nrow=2, heights = c(10,1), legend = legend_w1, - top = textGrob("Posterior Weight Functions", gp = gpar(fontsize = 16))) - - -#----------------------------------------------------- -# Ex3 - Non-Informative Prior -#----------------------------------------------------- -# Attach example 2 data -attach(ex3_data) - -# Get the initial estimate of sigma^2 -sig2_hat = max(apply(apply(f_train, 2, function(x) (x-y_train)^2),2,min)) - -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit1=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),model="mixbart", - ntree = 10,k = 5.5, overallsd = sqrt(sig2_hat), overallnu = 5,power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 4, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 1000, - summarystats = FALSE,modelname="eft_mixing") - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp1=predict.openbt(fit1,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw1=mixingwts.openbt(fit1, x.test = x_test, numwts = 3, tc = 4, q.lower = 0.025, q.upper = 0.975) - - -#----------------------------------------------------- -# Informative Prior -#----------------------------------------------------- -# Get the initial estimate of sigma^2 -sig2_hat = max(apply(apply(f_train, 2, function(x) (x-y_train)^2),2,min)) - -# Perform model mixing with the Non-informative prior. -# Note, f.sd.train is not specified, hence the non-informative prior is used by default. -fit2=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),f.sd.train = f_train_dsd,model="mixbart", - ntree = 8, k = 6.5, overallsd = sqrt(sig2_hat), overallnu = 5, power = 2.0, base = 0.95, - ntreeh=1,numcut=300,tc=4,minnumbot = 3, - ndpost = 30000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 500, - modelname="eft_mixing" -) - -# Get predictions at the test points specified by x_test with mean predictions stored in f_test. -fitp2=predict.openbt(fit2,x.test = x_test, f.test = f_test, tc=4, q.lower = 0.025, q.upper = 0.975) - -# Get the weight functions from the fit model -fitw2 = mixingwts.openbt(fit2, x.test = x_test, numwts = 3, tc = 4, q.lower = 0.025, q.upper = 0.975) - -# Attach example 2 data -detach(ex3_data) - -#------------------------------------------------ -# Ex3 - Plot the predictions -#------------------------------------------------ -# Set the labels in the legend -g_labs = c(expression(paste(f[1], '(x)')), expression(paste(f[2], '(x)')), - expression(paste(f[3], '(x)')), expression(paste(f["\u2020"],'(x)')), - "Post. Mean") -# Generate the first plot -p1 = plot_fit_gg2(ex3_data, fitp1, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.8,2.7), title = "Non-Informative Prior", grid_on = TRUE) -# Generate the first plot -p2 = plot_fit_gg2(ex3_data, fitp2, in_labs = g_labs, colors = color_list, line_type_list = lty_list, - y_lim = c(1.8,2.7), title = "Informative Prior", grid_on = TRUE) -# Resize plot text -p1 = p1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -p2 = p2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure with both plots -legend1 = g_legend(p1) -grid.arrange(arrangeGrob(p1 + theme(legend.position = "none"), - p2 + theme(legend.position = "none"), - nrow = 1), nrow=2, heights = c(10,1), legend = legend1, - top = textGrob("Posterior Mean Predictions", gp = gpar(fontsize = 16))) - -#------------------------------------------------ -# Ex3 - Plot the weight functions -#------------------------------------------------ -# Plot the weights from the non-informative prior -w1 = plot_wts_gg2(fitw1, ex3_data$x_test, y_lim = c(-0.05, 1.05), title = 'Non-Informative Prior', - colors = color_list, line_type_list = lty_list, gray_scale = FALSE) -# Plot the weights from the informative prior -w2 = plot_wts_gg2(fitw2, ex3_data$x_test, y_lim = c(-0.05, 1.05), title = 'Informative Prior', - colors = color_list, line_type_list = lty_list, gray_scale = FALSE) -# Resize text elements -w1 = w1+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) -w2 = w2+theme(axis.text=element_text(size=12),axis.title=element_text(size=13), - plot.title = element_text(size = 15)) - -# Create the figure with both plots -legend_w1 = g_legend(w1) -grid.arrange(arrangeGrob(w1 + theme(legend.position = "none"), - w2 + theme(legend.position = "none"), - nrow = 1),nrow=2, heights = c(10,1), legend = legend_w1, - top = textGrob("Posterior Weight Functions", gp = gpar(fontsize = 16))) diff --git a/R/eft_mixing_helper_functions.R b/R/eft_mixing_helper_functions.R deleted file mode 100644 index c84857c..0000000 --- a/R/eft_mixing_helper_functions.R +++ /dev/null @@ -1,499 +0,0 @@ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# BART Model Mixing Helper Functions -# Author: John Yannotty (yannotty.1@buckeyemail.osu.edu) -# Desc: -# Version: 1.0 -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Global variables -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -library(ggplot2) -library(dplyr) -library(grid) -library(gridExtra) -library(reshape2) -library(DescTools) - -color_list = list(exp_cols = c("red","blue","green4"), mean_cols = c("purple","darkorchid2"), true_cols = "black", - wts_cols = c("red","blue","green4")) -gray_scale_list = list(exp_cols = c("gray60","gray60","gray60"), mean_cols = c("gray30","gray50"), true_cols = "gray60", - wts_cols = c("gray30","gray50","gray70") ) - -lty_list = list(exp_lty = c("dashed","dashed","dashed"), mean_lty = "solid", true_lty = "solid", wts_lty = c('solid', 'dashed', 'dotted')) -gs_lty_list = list(exp_lty = c("dashed","dotted","dotdash"), mean_lty = "solid", true_lty = "solid", wts_lty = c('solid', 'dashed', 'dotted')) - - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Plotting -- GGplot -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Plot the predictions -plot_fit_gg2 = function(data,fitp, title = 'Non-Informative',y_lim = c(1,3),colors = color_list, - line_type_list = lty_list, in_labs = NULL, grid_on = FALSE){ - df = data.frame(x_test = data$x_test, fg_test = data$fg_test, - data.frame(mmean = fitp$mmean,m.lower=fitp$m.lower,m.upper=fitp$m.upper), - data$f_test) - n = nrow(df) - - K = ncol(data$f_test) - exp_cols = (ncol(df) - K+1):ncol(df) - - colnames(df)[exp_cols] = paste0('f',1:K) - exp_df_melt = melt(df, id.vars = "x_test", measure.vars = colnames(df)[exp_cols]) - df_melt = melt(df, id.vars = "x_test", measure.vars = c(colnames(df)[exp_cols],'fg_test','mmean')) - #df_melt$line_group = c(rep('dashed',K*n),rep('solid', 2*n)) - #df_melt$size_group = c(rep('A',(K+1)*n),rep('B', n)) - - line_group = c(line_type_list$exp_lty[1:K],line_type_list$mean_lty,line_type_list$true_lty) - size_group = c(rep(1.1,(K+1)),1.2) - - if(is.null(in_labs)){in_labs = c(colnames(df)[exp_cols], "mean", 'true')} - - p = df_melt %>% ggplot() + - geom_line(aes(x_test, value, color = variable, linetype = variable, size = variable)) + - scale_color_manual(values = c(colors$exp_cols[1:K], colors$true_cols, colors$mean_cols[1]), - name = "", labels = in_labs) + - scale_linetype_manual(values = line_group, name = "", labels = in_labs) + - scale_size_manual(values = size_group) + - geom_ribbon(data = df, aes(x = x_test, - ymin=m.lower, - ymax=m.upper), - fill=colors$mean_cols[2], alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = "bottom", legend.text = element_text(size = 10)) + - labs(x = "X", y = "F(x)", title = title) + - geom_point(data = data.frame(data$x_train,data$y_train),aes(data$x_train,data$y_train), size= 1.0) + - coord_cartesian(ylim = y_lim) + - guides(size = 'none') - - if(!grid_on){ - p = p + theme(panel.grid = element_blank()) - } - return(p) -} - - -# Plot the weights -plot_wts_gg2 = function(fitw,x_test, title = 'Non-Informative',y_lim = c(0,1),colors = color_list, - line_type_list = lty_list, gray_scale = FALSE, w_labs = NULL){ - K = ncol(fitw$wmean) - colnames(fitw$wmean) = paste0("w",1:K) - colnames(fitw$w.lower) = paste0("w",1:K) - colnames(fitw$w.upper) = paste0("w",1:K) - fitw$wmean = data.frame(x_test,fitw$wmean) - wt_mean = reshape2::melt(fitw$wmean, id.vars = 'x_test', measure.vars = paste0("w",1:K)) - wt_lb = reshape2::melt(fitw$w.lower) - wt_ub = reshape2::melt(fitw$w.upper) - - # Select last column in ub and lb - wt_lb = wt_lb[,ncol(wt_lb)] - wt_ub = wt_ub[,ncol(wt_ub)] - - wts_df = cbind(wt_mean, wt_lb, wt_ub) - colnames(wts_df)[1:3] = c('x_test','wt','wt_mean') - - if(is.null(w_labs)){ - w_labs = c() - for(j in 1:K){ - w_labs = c(w_labs,bquote(w[.(j)] * "(x)")) - } - } - - if(gray_scale){ - p = wts_df %>% ggplot() + - geom_line(aes(x_test, wt_mean, color = wt, linetype = wt), size = 1.1) + - geom_ribbon(aes(x = x_test, ymin=wt_lb, ymax=wt_ub, fill = wt), alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12), - panel.grid = element_blank()) + - labs(x = "X", y = "W(x)", title = title,color = "Weights" ,fill = "Weights", linetype = 'Weights') + - scale_color_manual(values = colors$exp_cols[1:K], labels = w_labs) + - scale_linetype_manual(values = line_type_list$wts_lty[1:K], labels = w_labs) + - scale_fill_manual(values = colors$exp_cols[1:K], labels = w_labs) + - coord_cartesian(ylim = y_lim) + - guides(fill = 'none') - }else{ - p = wts_df %>% ggplot() + - geom_line(aes(x_test, wt_mean, color = wt), size = 1.1) + - geom_ribbon(aes(x = x_test, ymin=wt_lb, ymax=wt_ub, fill = wt), alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12)) + - labs(x = "X", y = "W(x)", title = title,color = "Weights" ,fill = "Weights") + - scale_color_manual(values = colors$exp_cols[1:K], labels = w_labs) + - scale_fill_manual(values = colors$exp_cols[1:K], labels = w_labs) + - coord_cartesian(ylim = y_lim) - } - return(p) -} - -# Plot expansion -plot_exp_gg2 = function(ex_data,colors = color_list, line_type_list = lty_list, y_lim = c(0,5),in_labs = NULL, grid_on = FALSE){ - K = ncol(ex_data$f_test) - df = data.frame(ex_data$x_test, ex_data$f_test, ex_data$fg_test) - colnames(df) = c('x_test', paste0('f',1:K),'ftrue') - if(is.null(in_labs)){in_labs = colnames(df)[-1]} - df_melt = melt(df, id.vars = 'x_test') - p = df_melt %>% ggplot() + - geom_line(aes(x_test, value, color = variable, linetype = variable), size = 1.1) + - scale_color_manual(values = c(colors$exp_cols[1:K], colors$true_cols), labels = in_labs, name = "Models") + - scale_linetype_manual(values = c(line_type_list$exp_lty[1:K], line_type_list$true_lty), labels = in_labs, name = "Models") + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12)) + - labs(x = "X", y = "F(x)", color = "Models") + - coord_cartesian(ylim = y_lim) - - if(!grid_on){ - p = p + theme(panel.grid = element_blank()) - } - return(p) -} - -# Plot EFT with discrepancy -plot_eft_gg2 = function(x,eft_mean,eft_sd,color='red', ci=0.95, y_lim = c(0,5), title = "EFT Model"){ - # Get confidence interval - alpha = (1 - ci) - z = abs(qnorm(alpha/2,0,1)) - mean_lb = eft_mean - z*eft_sd - mean_ub = eft_mean + z*eft_sd - - # Get dataframe - df = data.frame(x, eft_mean, eft_sd, mean_lb, mean_ub) - p = df %>% ggplot() + - geom_line(aes(x, eft_mean, color = color), size = 1.1) + - geom_ribbon(aes(x = x, ymin=mean_lb, ymax=mean_ub, fill = color), alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12)) + - labs(x = "X", y = "F(x)", title = title,color = "Weights" ,fill = "Weights") + - scale_color_manual(values = color) + - scale_fill_manual(values = color) + - coord_cartesian(ylim = y_lim) + - guides(fill = 'none', color = 'none') - return(p) -} - -# Plot mean fit with expansions -plot_mean_gg2 = function(mean_fit, ex_data,colors = color_list, line_type_list = lty_list, y_lim = c(0,5),in_labs = NULL, grid_on = FALSE){ - K = ncol(ex_data$f_test) - df = data.frame(ex_data$x_test, ex_data$f_test, ex_data$fg_test, mean_fit) - colnames(df) = c('x_test', paste0('f',1:K),'ftrue', "mean") - if(is.null(in_labs)){in_labs = colnames(df)[-1]} - df_melt = melt(df, id.vars = 'x_test') - p = df_melt %>% ggplot() + - geom_line(aes(x_test, value, color = variable, linetype = variable), size = 1.1) + - scale_color_manual(values = c(colors$exp_cols[1:K], colors$true_cols,colors$mean_cols), labels = in_labs, name = "") + - scale_linetype_manual(values = c(line_type_list$exp_lty[1:K], line_type_list$true_lty,line_type_list$mean_lty), labels = in_labs, name = "") + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12)) + - labs(x = "X", y = "F(x)", color = "Models") + - coord_cartesian(ylim = y_lim) - - if(!grid_on){ - p = p + theme(panel.grid = element_blank()) - } - return(p) -} - -# Plot prior wts -plot_prior_wts_gg2 = function(x, betax, k = 2, m = 1, ci = 0.95,y_lim = c(0,1),title = "Prior Weight Functions",inform_prior = TRUE,colors = color_list, - line_type_list = lty_list, gray_scale = FALSE , w_labs = NULL){ - # Get the variance of the sum-of-trees under the informative prior - if(inform_prior){ - tau = 1/(2*m*k) # tau for one tree - }else{ - tau = 1/(2*sqrt(m)*k) # tau for one tree - } - tau2x = m*tau^2 # sum of variances from each tree - taux = sqrt(tau2x) # std of the sum-of-trees - - # Get upper and lower confidence bounds - z = abs(qnorm((1-ci)/2,0,1)) - wt_lb = betax - z*taux - wt_ub = betax + z*taux - - # Set column names - K = ncol(betax) - colnames(wt_ub) = paste0("w",1:K) - colnames(wt_lb) = paste0("w",1:K) - colnames(betax) = paste0("w",1:K) - - # Bind with x_test - wt_mean = data.frame(x = x, betax) - - # Reshape dataframes for plotting - wt_mean = reshape2::melt(wt_mean, id.vars = 'x', measure.vars = paste0("w",1:K)) - wt_lb = reshape2::melt(wt_lb) - wt_ub = reshape2::melt(wt_ub) - - # Select last column in ub and lb - wt_lb = wt_lb[,ncol(wt_lb)] - wt_ub = wt_ub[,ncol(wt_ub)] - - wts_df = cbind(wt_mean, wt_lb, wt_ub) - colnames(wts_df)[1:3] = c('x','wt','wt_mean') - - if(is.null(w_labs)){ - w_labs = c() - for(j in 1:K){ - w_labs = c(w_labs,bquote(w[.(j)] * "(x)")) - } - } - - if(gray_scale){ - p = wts_df %>% ggplot() + - geom_line(aes(x, wt_mean, color = wt, linetype = wt), size = 1.0) + - geom_point(aes(x, wt_mean, color = wt), size = 1.5) + - geom_ribbon(aes(x = x, ymin=wt_lb, ymax=wt_ub, fill = wt), alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12), - panel.grid = element_blank()) + - labs(x = "X", y = "W(x)", title = title,color = "Weights" ,fill = "Weights", linetype = 'Weights') + - scale_color_manual(values = colors$exp_cols[1:K], labels = w_labs) + - scale_linetype_manual(values = line_type_list$wts_lty[1:K], labels = w_labs) + - scale_fill_manual(values = colors$exp_cols[1:K], labels = w_labs) + - coord_cartesian(ylim = y_lim) + - guides(fill = 'none') - }else{ - p = wts_df %>% ggplot() + - geom_line(aes(x, wt_mean, color = wt), size = 1.0) + - geom_point(aes(x, wt_mean, color = wt), size = 1.5) + - geom_ribbon(aes(x = x, ymin=wt_lb, ymax=wt_ub, fill = wt), alpha=0.2) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = 'bottom', legend.text = element_text(size = 12)) + - labs(x = "X", y = "W(x)", title = title,color = "Weights" ,fill = "Weights") + - scale_color_manual(values = colors$exp_cols[1:K], labels = w_labs) + - scale_fill_manual(values = colors$exp_cols[1:K], labels = w_labs) + - coord_cartesian(ylim = y_lim) - } - return(p) -} - - -# Plot Prior Fit -plot_prior_fit_gg2 = function(ex_data, betax, f_true = TRUE,title = "Pointwise Prior Mean Prediction", colors = color_list, line_type_list = lty_list, y_lim = c(0,5), in_labs = NULL, grid_on = TRUE){ - # Get prior mean prediction - prior_mean = rowSums(betax*ex_data$f_train) - - # Bind data and get params - df = data.frame(x_train = ex_data$x_train, mmean = prior_mean, f_train = ex_data$f_train) - n = nrow(df) - K = ncol(ex_data$f_train) - exp_cols = (ncol(df) - K+1):ncol(df) - colnames(df)[exp_cols] = paste0('f',1:K) - - # Add in the true function if f_true is TRUE - if(f_true){ - df$f_true = ex_data$fg_train - msr_vars = c(colnames(df)[exp_cols],'f_true','mmean') - }else{ - msr_vars = c(colnames(df)[exp_cols],'mmean') - } - - # Prepare data - exp_df_melt = melt(df, id.vars = "x_train", measure.vars = colnames(df)[exp_cols]) - df_melt = melt(df, id.vars = "x_train", measure.vars = msr_vars) - - # Set sizes - line_group = c(line_type_list$exp_lty[1:K],line_type_list$true_lty,line_type_list$mean_lty) - size_group = c(rep(1.1,(K+1)),1.2) - color_group = c(colors$exp_cols[1:K],colors$true_cols, colors$mean_cols[1]) - - if(is.null(in_labs)){in_labs = c(colnames(df)[exp_cols], "true", 'mean')} - - p = df_melt %>% ggplot() + - geom_line(aes(x_train, value, color = variable, linetype = variable, size = variable)) + - scale_color_manual(values = color_group, - name = "", labels = in_labs) + - scale_linetype_manual(values = line_group, name = "", labels = in_labs) + - scale_size_manual(values = size_group) + - theme_bw() + - theme(axis.line = element_line(color = "grey70"), - panel.border = element_blank(),plot.title = element_text(hjust = 0.5), - legend.position = "bottom", legend.text = element_text(size = 10)) + - labs(x = "X", y = "F(x)", title = title) + - coord_cartesian(ylim = y_lim) + - guides(size = 'none') - - if(!grid_on){ - p = p + theme(panel.grid = element_blank()) - } - return(p) - -} - -# Plot mu prior -plot_mu_prior = function(k = 2, m = 1, betax = 1/2, data_rng = NULL, wt_nums = c(1,2), inform_prior = FALSE, - x_lim = NULL, y_lim = NULL, color = 'green',title = NULL, nlevels = 4, in_labs = NULL){ - if(!inform_prior){ - prior = "Non-Informative" - tau = 1/(2*k*sqrt(m)) - beta_hat = rep(betax/m,2) - }else{ - # Set parameters - if(is.null(data_rng)){data_rng = 1:nrow(betax)} - prior = "Informative" - tau = 1/(2*k*m) - beta_hat = apply(betax[data_rng,],2,mean)/m - - # Get the desired columns - beta_hat = beta_hat[wt_nums] - } - - #Get number of terminal node parameters - w1 = wt_nums[1] - w2 = wt_nums[2] - - #Set title indicator - if(is.null(title)){title = "Prior for Terminal Node Parameters"} - - #Set ellipse info - ellipse_list = list() - - #Check title - for(l in 1:nlevels){ - #Ellipse info - einfo = DrawEllipse(x = beta_hat[1], beta_hat[2], - radius.x = l*tau,radius.y = l*tau, plot = FALSE) - - ellipse_list[[l]] = cbind(c(einfo$x,einfo$x[1]) , c(einfo$y,einfo$y[1])) - } - - # Color scale - if(length(color) 1){ - h = which(xvec <= knots[k] & xvec > knots[k-1]) - fout[h] = fmatrix[h,k] - } - if(k == length(knots)){ - h = which(xvec > knots[k]) - fout[h] = fmatrix[h,k+1] - } - } - return(fout) -} - -# Softmax transformation -- basis = nxK matrix -softmax = function(basis){ - wts = exp(basis)/rowSums(exp(basis)) - colnames(wts) = paste0("w",1:ncol(basis)) - return(wts) -} - -# Generate weight basis for a model M_k -# a,b,c, and p can be vectors which match ncol of x -get_wt_basis = function(x, a=0,b=0, c=1, p=1){ - # Initialize basis matrix - if(is.matrix(x)){ - px = ncol(x) - n = nrow(x) - }else{ - px = 1 - n = length(x) - x = matrix(x, ncol = 1, nrow = n) - } - wts_basis = rep(0,n) - - # Loop through x cols to generate basis - if(length(a)= 1: - ypred_df.iloc[:,i] = term_df.iloc[:,i] + ypred_df.iloc[:,i-1] - else: - ypred_df.iloc[:,i] = term_df.iloc[:,i] - out = {'coef_df':coef_df, 'term_df':term_df, 'ypred_df':ypred_df, 'qx_df':qx_df,'yref_df':yref_df} - return out - -# Checker -fsg_terms(0.25, 6) -flg_terms(0.03, 4) - -# Set training points for model runs -#x1_train = np.linspace(0.05, 0.5, 5) -#x2_train = np.linspace(0.05, 0.5, 5) - -x1_train = np.array([0.05, 0.15, 0.25, 0.35, 0.45]) -x2_train = np.array([0.1, 0.2,0.30,0.40]) - -x1_test = np.linspace(0.01, 0.48, 20) -x2_test = np.linspace(0.03, 0.5, 20) - -#x1_test = np.linspace(0.03, 0.5, 300) -#x2_test = np.linspace(0.03, 0.5, 300) - -x1_data = np.concatenate((x1_train,x1_test), axis = 0) -x2_data = np.concatenate((x2_train,x2_test), axis = 0) - -#Get coefficients from both models -ns = 4 -c1_cols = [] -c1_list = [] -for i in range(ns+1): - c1_cols.append('order' + str(i)) - c1_data = get_terms(x1_train, i,'sg')['coef_list'] - c1_list.append(c1_data) -c1_array = np.array(c1_list).transpose() - -nl = 4 -c2_cols = [] -c2_list = [] -for i in range(nl+1): - c2_cols.append('order' + str(i)) - c2_data = get_terms(x2_train, i,'lg')['coef_list'] - c2_list.append(c2_data) -c2_array = np.array(c2_list).transpose() - -# Convert the x's to required format -x1_train = x1_train[:,None] -x2_train = x2_train[:,None] - -x1_test = x1_test[:,None] -x2_test = x2_test[:,None] - -x1_data = x1_data[:,None] -x2_data = x2_data[:,None] - -#------------------------- -# Hyperparameters -center0 = 0 -disp0 = 0 -df0 = 3 -scale0 = 5 -kernel1 = RBF(length_scale=0.3, length_scale_bounds=(0.05, 4)) + WhiteKernel(1e-10, noise_level_bounds='fixed') -kernel2 = RBF(length_scale=0.5, length_scale_bounds=(0.05, 1)) + WhiteKernel(1e-10, noise_level_bounds='fixed') - -exp1_res = get_exp(list(chain(*x1_data.tolist())), ns, 'sg') - -ratio1 = np.array(list(chain(*exp1_res['qx_df'].values.tolist()))) # Reshape the array -ref1 = np.array(list(chain(*exp1_res['yref_df'].values.tolist()))) # Reshape the array - -y1_df = exp1_res['ypred_df'] -coeffs1 = gm.coefficients(np.array(y1_df),ratio = ratio1,ref=ref1,orders=np.arange(0,ns+1)) - -gp1 = gm.ConjugateGaussianProcess(kernel1, center=center0, disp=disp0, df=df0, scale=scale0, n_restarts_optimizer=10) -fit1 = gp1.fit(x1_train, coeffs1[:len(x1_train),]) -fit1.predict(x1_test,return_std=True) -fit1.cbar_sq_mean_ -fit1.scale_ -fit1.df_ - -gp_trunc1 = gm.TruncationGP(kernel = fit1.kernel_, center=center0, disp=disp0, df=fit1.df_, scale=fit1.scale_, ref = ref1, ratio = ratio1) - -fhat1 = y1_df.iloc[:,ns] + gp_trunc1.predict(x1_data, order = ns, return_std = False) -std1 = gp_trunc1.predict(x1_data, order = ns, return_std = True, kind = 'trunc')[1] -cov1 = gp_trunc1.predict(x1_data, order = ns, return_cov = True)[1] - -#fit_trunc1 = gp_trunc1.fit(x1_data, y=np.array(exp1_res['ypred_df']), orders=np.arange(0,ns+1)) -#fhat1 = fit_trunc1.predict(x1_data, order = 2, return_std = False) -#std1 = fit_trunc1.predict(x1_data, order = 2, return_std = True, kind = 'trunc')[1] -#cov1 = fit_trunc1.predict(x1_data, order = 2, return_cov = True)[1] - - -fig, ax = plt.subplots(figsize=(3.2, 3.2)) -ax.plot(x1_test, fhat1[len(x1_train):], zorder = 2) -ax.fill_between(x1_test[:,0], fhat1[len(x1_train):] + 2*std1[len(x1_train):], fhat1[len(x1_train):] - 2*std1[len(x1_train):], zorder=1) -plt.show() - -fig, ax = plt.subplots(figsize=(3.2, 3.2)) -ax.plot(x1_test, exp1_res['coef_df'].iloc[len(x1_train):,2], zorder = 2) -ax.plot(x1_test, exp1_res['coef_df'].iloc[len(x1_train):,4], zorder = 2, color = 'red') -#ax.plot(x1_test, exp1_res['coef_df'].iloc[len(x1_train):,6], zorder = 2, color = 'green') -ax.plot(x1_test, exp1_res['coef_df'].iloc[len(x1_train):,0], zorder = 2, color = 'purple') -plt.show() - - -center0 = 0 -disp0 = 0 -df0 = 3 -scale0 = 5 - -exp2_res = get_exp(list(chain(*x2_data.tolist())), 4, 'lg') -y2_df = exp2_res['ypred_df'] - -ratio2 = np.array(list(chain(*exp2_res['qx_df'].values.tolist()))) # Reshape the array -ref2 = np.array(list(chain(*exp2_res['yref_df'].values.tolist()))) # Reshape the array - -coeffs2 = gm.coefficients(np.array(y2_df),ratio = ratio2,ref = ref2, orders=np.arange(0,nl+1)) - -gp2 = gm.ConjugateGaussianProcess(kernel2, center=center0, disp=disp0, df=df0, scale=scale0, n_restarts_optimizer=10) -fit2 = gp2.fit(x2_train, coeffs2[:len(x2_train),]) -fit2.cbar_sq_mean_ - -gp_trunc2 = gm.TruncationGP(kernel = fit2.kernel_, center=center0, disp=disp0, df=fit2.df_, scale=fit2.scale_, ref = ref2, ratio = ratio2) -fhat2 = y2_df.iloc[:,ns] + gp_trunc2.predict(x2_data, order = nl, return_std = False) -std2 = gp_trunc2.predict(x2_data, order = nl, return_std = True, kind = 'trunc')[1] -cov2 = gp_trunc2.predict(x2_data, order = nl, return_cov = True)[1] - -#fit_trunc1 = gp_trunc1.fit(x1_data, y=np.array(exp1_res['ypred_df']), orders=np.arange(0,ns+1)) -#fhat1 = fit_trunc1.predict(x1_data, order = 2, return_std = False) -#std1 = fit_trunc1.predict(x1_data, order = 2, return_std = True, kind = 'trunc')[1] -#cov1 = fit_trunc1.predict(x1_data, order = 2, return_cov = True)[1] - - -fig, ax = plt.subplots(figsize=(3.2, 3.2)) -ax.plot(x2_test, fhat2[len(x2_train):], zorder = 2) -ax.fill_between(x2_test[:,0], fhat2[len(x2_train):] + 2*std2[len(x2_train):], fhat2[len(x2_train):] - 2*std2[len(x2_train):], zorder=1) -plt.show() - -fig, ax = plt.subplots(figsize=(3.2, 3.2)) -ax.plot(x2_test, exp2_res['coef_df'].iloc[len(x2_train):,2], zorder = 2) -ax.plot(x2_test, exp2_res['coef_df'].iloc[len(x2_train):,4], zorder = 2, color = 'red') -#ax.plot(x1_test, exp1_res['coef_df'].iloc[len(x1_train):,6], zorder = 2, color = 'green') -ax.plot(x2_test, exp2_res['coef_df'].iloc[len(x2_train):,0], zorder = 2, color = 'purple') -plt.show() - - - -#----------------------------------------------------- -nn = fhat1.shape[0] -fdata = np.concatenate([fhat1.reshape(nn,1), fhat2.reshape(nn,1)], axis = 1) -sdata = np.concatenate([std1.reshape(nn,1), std2.reshape(nn,1)], axis = 1) - -np.savetxt("/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Overleaf Results/f_train.txt", fdata[4:]) -np.savetxt("/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Overleaf Results/s_train.txt", sdata[4:]) - - -#----------------------------------------------------- -# Model Mixing -n_train = 20 -n_test = 100 -s = 0.03 - -x_train = x1_data[4:] -x_grid = x1_test -#x_test = np.linspace(0.01, 1.0, n_test) - -np.random.seed(1234567) -y_train = np.array([2.508488,2.481616,2.462142,2.436121,2.405677,2.374887,2.343086,2.305914,2.271658,2.232709,2.203263,2.175726,2.136858, - 2.116650, 2.068122, 2.040274, 2.017935, 1.990049, 1.964042, 1.937480]) -nn = fhat1.shape[0] -f_train = np.concatenate([fhat1.reshape(nn,1), fhat2.reshape(nn,1)], axis = 1) -s_train = np.concatenate([std1.reshape(nn,1), std2.reshape(nn,1)], axis = 1) - -from openbt import OPENBT -m = OPENBT(model = "mixbart", tc = 4, modelname = "eft", ntree = 10, k = 1, ndpost = 10000, nskip = 2000, nadapt = 5000, - adaptevery = 500, overallsd = 0.01, minnumbot = 3, nsprior = True, overallnu = 5, numcut = 300) -fit = m.fit(x_train, y_train, F= f_train[4:(n_train+4),:], S= s_train[4:(n_train+4),:]) -f_grid = f_train[(n_train+4):,:] -fitp = m.predict(x_grid, F = f_grid) - -fitp['mmean'] - -fig, ax = plt.subplots(figsize=(5.2, 5.2)) -ax.plot(x_grid, fitp['mmean'], zorder = 2) -ax.plot(x_grid, fitp['mmean'], zorder = 2) -ax.plot(x_grid, fitp['m_lower'], zorder = 2) -ax.plot(x_grid, fitp['m_upper'], zorder = 2) -plt.show() - - - - - - - - - - - - - - -#------------------------- - -fit1.kernel_ -fit1.kernel_.__call__(x1_test) -fit1.kernel_.__call__(x1_train) -fit1.cov(x1_train) -fit1.cov(x1_test) -fit1.cov(x1_test, x1_train) -dir(fit1) - -np.sqrt(fit1.cbar_sq_mean_) -np.sqrt(fit1.df_ * fit1.scale_**2 / (fit1.df_ + 2)) # Posterior mode estimate of cbar - -k1_out = kernel1.get_params() -k1_out['k1'] -kernel1.__call__(x1_test) - - - -# Not what we want -# Get truncation errors for both models -- stop using this chunk -gp_trunc1 = gm.TruncationGP(kernel = kernel1, center=center0, disp=disp0, df=df0, scale=scale0) -gp_trunc2 = gm.TruncationGP(kernel = kernel2, center=center0, disp=disp0, df=df0, scale=scale0) -fit_trunc1 = gp_trunc1.fit(x1_train, y=c1_array, orders=np.arange(0,ns+1)) -fit_trunc2 = gp_trunc2.fit(x2_train, y=c2_array, orders=np.arange(0,nl+1)) - -fit1.predict(x1_train, order=2, return_std=True, kind='trunc') -fit2.predict(x2_train, order=4, return_std=True, kind='trunc') - - - - -#----------------------------------------------------------- -# Paper example -x = np.linspace(0, 1, 100) -X = x[:, None] # make a 2D version of x to match the input data structure from SciKitLearn -n_orders = 4 # Here we examine the case where we have info on four non-trivial orders -orders = np.arange(0, n_orders) - -final_order = 20 # We are going to treat the order-20 result as the final, converged answer -orders_all = np.arange(0, final_order+1) - -# The true values of the hyperparameters for generating the EFT coefficients -ls = 0.2 -sd = 1 -center = 0 -ref = 10 -ratio = 0.5 -nugget = 1e-10 -seed = 3 - -# Get train data -def regular_train_test_split(x, dx_train, dx_test, offset_train=0, offset_test=0, xmin=None, xmax=None): - train_mask = np.array([(i - offset_train) % dx_train == 0 for i in range(len(x))]) - test_mask = np.array([(i - offset_test) % dx_test == 0 for i in range(len(x))]) - if xmin is None: - xmin = np.min(x) - if xmax is None: - xmax = np.max(x) - train_mask = train_mask & (x >= xmin) & (x <= xmax) - test_mask = test_mask & (x >= xmin) & (x <= xmax) & (~ train_mask) - return train_mask, test_mask - -x_train_mask, x_valid_mask = regular_train_test_split( - x, dx_train=24, dx_test=6, offset_train=1, offset_test=1) - -kernel = RBF(length_scale=ls, length_scale_bounds='fixed') + \ - WhiteKernel(noise_level=nugget, noise_level_bounds='fixed') -gp = gm.ConjugateGaussianProcess(kernel=kernel, center=center, df=np.inf, scale=sd, nugget=0) - -# Draw coefficients and then use `partials` to create the sequence of partial sums -# that defines the order-by-order EFT predictions -# Negative sign to make y_n positive, for aesthetics only -coeffs_all = - gp.sample_y(X, n_samples=final_order+1, random_state=seed) -data_all = gm.partials(coeffs_all, ratio, ref=ref, orders=orders_all) -diffs_all = np.array([data_all[:, 0], *np.diff(data_all, axis=1).T]).T - -# Get the "all-orders" curve -data_true = data_all[:, -1] - -# Will only consider "known" lower orders for most of the notebook -coeffs = coeffs_all[:, :n_orders] -data = data_all[:, :n_orders] -diffs = diffs_all[:, :n_orders] - -# Truncation errors -kernel_fit = RBF(length_scale=ls) + WhiteKernel(noise_level=nugget, noise_level_bounds='fixed') -gp_trunc = gm.TruncationGP(kernel=kernel_fit, ref=ref, ratio=ratio, center=center0, disp=disp0, df=df0, scale=scale0) -fit0 = gp_trunc.fit(X, y=data, orders=orders) -fit0.predict(X, order = 3) \ No newline at end of file diff --git a/misc/example.R b/misc/example.R deleted file mode 100644 index 63f5aac..0000000 --- a/misc/example.R +++ /dev/null @@ -1,114 +0,0 @@ -# example.R -# Simple example showing fitting various models to simple test datasets, -# including saving fit to disk and re-loading to run predictions or -# extract variable activity information. - - -#----------------------------------------------------------------------- -# Test Branin function, rescaled -# see: http://www.sfu.ca/~ssurjano/ -#----------------------------------------------------------------------- -braninsc <- function(xx) -{ - x1 <- xx[1] - x2 <- xx[2] - - x1bar <- 15*x1 - 5 - x2bar <- 15 * x2 - - term1 <- x2bar - 5.1*x1bar^2/(4*pi^2) + 5*x1bar/pi - 6 - term2 <- (10 - 10/(8*pi)) * cos(x1bar) - - y <- (term1^2 + term2 - 44.81) / 51.95 - return(y) -} - - -#----------------------------------------------------------------------- -# Simulate branin data for testing -#----------------------------------------------------------------------- -set.seed(99) -n=500 -p=2 -x = matrix(runif(n*p),ncol=p) -y=rep(0,n) -for(i in 1:n) y[i] = braninsc(x[i,]) - -# Load the R wrapper functions to the OpenBT library. -source("Documents/Open BT Project SRC/openbt.R") - -# Homoscedasitc BART model -fit=openbt(x,y,pbd=c(0.7,0.0),ntreeh=1,numcut=100,tc=4,model="bart",modelname="branin") - -# Heteroscedastic HBART model -# fit=openbt(x,y,k=10,numcut=100,tc=4,model="hbart",modelname="branin") - -# Truncated BART model (c/o Dai Feng of Merck) -# tv=sample(1:n,5) -# miny=min(y) -# y[tv]=miny -# fit=openbt(x,y,pbd=c(0.7,0.0),ntreeh=1,numcut=100,tc=4,model="merck_truncated",modelname="branin") - - -# Extract posterior tree samples in vector coding -trees=openbt.scanpost(fit) -trees$mt[[1]][[1]] -trees$st[[1]][[1]] - - -# Save fitted model to local directory -openbt.save(fit,"test") - - -# Load fitted model to a new object. -fit2=openbt.load("test") - -# Note that saved models are saved in compressed format and so -# are not human readable. The loaded models are uncompressed -# into a temporary working directory. These directories -# are automatically deleted upon exiting R. -# Generally the user does not need to care about these -# behind-the-scenes details. -fit$folder -fit2$folder - - -# Predict the underlying response function -fitp=predict.openbt(fit2,x,tc=4) - -# Calculate variable activity information -fitv=vartivity.openbt(fit2) - -# Plot fitted model -plot(y,fitp$mmean,xlab="observed",ylab="fitted") -abline(0,1) - -# 3d plot if you like -library(rgl) -plot3d(x[,1],x[,2],y) -points3d(x[,1],x[,2],fitp$mmean,col="red") -points3d(x[,1],x[,2],fitp$mmean+2*fitp$smean,col="pink") -points3d(x[,1],x[,2],fitp$mmean-2*fitp$smean,col="pink") - -# Plot variable activity -plot(fitv) - -# Calculate Sobol indices -fits=sobol.openbt(fit2) -fits$msi -fits$mtsi -fits$msij - -# Example of MO optimization -fit.b=openbt(x,y,pbd=c(0.7,0.0),ntreeh=1,numcut=100,tc=4,model="bart",modelname="branin-b") - -pf=mopareto.openbt(fit,fit.b,tc=24) # quite slow, use many cores to speed it up - -# plot front -par(mfrow=c(1,2)) -plot(0,0,xlim=c(-2,2),ylim=c(-2,2),type="n",xlab="y1",ylab="y2") -for(i in length(pf)) points(t(pf[[i]]$theta),pch=20) -# plot set -plot(0,0,xlim=c(0,1),ylim=c(0,1),type="n",xlab="x1",ylab="x2") -for(i in 1:length(pf)) rect(pf[[i]]$a[1,],pf[[i]]$a[2,],pf[[i]]$b[1,],pf[[i]]$b[2,],col=gray(0.5,alpha=0.1),border=NA) - diff --git a/misc/example_model_mixing.R b/misc/example_model_mixing.R deleted file mode 100644 index 7bc7146..0000000 --- a/misc/example_model_mixing.R +++ /dev/null @@ -1,495 +0,0 @@ -#example_model_mixing.R -setwd("/home/johnyannotty/Documents/Open BT Project SRC") - -# Load the R wrapper functions to the OpenBT library. -source("/home/johnyannotty/Documents/Open BT Project SRC/openbt.R") -source("/home/johnyannotty/Documents/Model Mixing BART/Model Mixing R Code/Model Mixing BART Testing Priors.R") -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Construct the required functions -#True function -fg = function(g){ - x = 1/(32*g^2) - K = besselK(x = x, nu = 0.25) - b = exp(x)/(2*sqrt(2)*g)*K - return(b) -} - -#Construct the small-g expansion -fsg = function(g, ns){ - #Get the term number - k = 0:ns - - #Get the coefficients -- only even coefficients are non-zero - sk = ifelse(k %% 2 == 0,sqrt(2)*gamma(k + 1/2)*(-4)^(k/2)/factorial(k/2), 0) - - #Get the expansion - f = sum(sk*g^k) - return(f) -} - -#Construct the large-g expansion -flg = function(g, nl){ - #Get the term number - k = 0:nl - - #Get the coefficients - lk = gamma(k/2 + 0.25)*(-0.5)^k/(2*factorial(k)) - - #Get the expansion - f = sum(lk*g^(-k))/sqrt(g) - return(f) -} - -#Construct the small-g discrepancy -dsg = function(g,ns){ - #Get the term number - k = 0:ns - - #Get the coefficients -- only even coefficients are non-zero - sk = ifelse(k %% 2 == 0,sqrt(2)*gamma(k + 1/2)*(-4)^(k/2)/factorial(k/2), 0) - - #Estimate cbar - cbar = sqrt(mean(sk^2)) - - #get the variance estimate - if((ns/2)%%2 == 0){ - v = (cbar^2)*(factorial(ns/2 + 2)^2)*g^(ns + 4) - }else{ - v = (cbar^2)*(factorial(ns/2 + 1)^2)*g^(ns + 2) - } - s = sqrt(v) - return(s) -} - -#Construct the large-g discrepancy -dlg = function(g,nl){ - #Get the term number - k = 0:nl - - #Get the coefficients - lk = gamma(k/2 + 0.25)*(-0.5)^k/(2*factorial(k)) - - #Estimate cbar - if(nl < 2){ - print("Warning, nl < 2, dbar is not estimated as intended") - dbar = 1 - }else{ - #Estimate cbar using coefs of order 2 through nl - dbar = sqrt(mean(lk[-c(1,2)]^2)) - } - - #Get standard deviation - v = (dbar^2)*(1/(factorial(nl+1)^2))*(1/g^(2*nl + 3)) - s = sqrt(v) - return(s) -} - -#Test out the functions and approximations -fg(0.1) -fsg(0.1, 4) -fg(0.3) -flg(0.3, 5) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Graph the expansions and the true function. -#Create a grid of g's -g_grid = seq(0.01, 0.5, length = 300) - -#Get a simple plot using only one large and one small g expansion -plot_exp = function(g_grid, ns, nl){ - #Get labels - fs_name = paste0('Fs(',ns,')') - fl_name = paste0('Fl(',nl,')') - #Evaluate the functions - f = fg(g_grid) - fs = sapply(g_grid, fsg, ns = ns) - fl = sapply(g_grid, flg, nl = nl) - plot(g_grid, f, main = 'F(g) and Exansions vs. g', xlab = 'g', ylab = 'F(g)', - ylim = c(1,4), type = 'l', panel.first = {grid(col = 'lightgrey')}) - lines(g_grid, fs, col = 'red', lty = 'dashed') - lines(g_grid, fl, col = 'blue', lty = 'dashed') - legend('bottomright', legend = c('F(g)', fs_name, fl_name), cex = 0.75, - lty = c(1, 2, 2), col = c('black', 'red', 'blue')) -} - -plot_exp(g_grid, ns = 2, nl = 2) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Generate training data -#Set parameters -n_train = 20 -n_test = 300 -s = 0.005 #s = 0.03 - -set.seed(321) -x_train = seq(0.03, 0.5, length = n_train) -#x_train = runif(n_train,0.03,0.5) -#x_train = x_train[order(x_train)] -y_train = fg(x_train) + rnorm(n_train, 0, s) - -#Set a grid of test points -x_test = seq(0.03, 0.5, length = n_test) -fg_test = fg(x_test) -fs_test = sapply(x_test, fsg, 2) -fl_test = sapply(x_test, flg, 4) - -#Plot the training data -plot(x_train, y_train, pch = 16, cex = 0.8, main = 'Training data') -lines(g_grid, fg(g_grid)) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Define the Model set -- a matrix where each column is an the output evaluated at the train/test pts -#small_g = c(2,4,6) -#large_g = c(3,6,9) -small_g = c(2,4,6) #Which small-g expansions to use -large_g = NULL #Which large-g expansions to use -g_exp = c(small_g, large_g) #Mix both small and large g -K = length(g_exp) -Ks = length(small_g) #Number of small-g models -Kl = length(large_g) #Number of large-g models - -#Define matrices to store the function values -f_train = matrix(0, nrow = n_train, ncol = K) -f_test = matrix(0, nrow = n_test, ncol = K) - -#Define discrepancy information -f_train_dmean = matrix(0, nrow = n_train, ncol = K) -f_train_dsd = matrix(1, nrow = n_train, ncol = K) -f_test_dmean = matrix(0, nrow = n_test, ncol = K) -f_test_dsd = matrix(1, nrow = n_test, ncol = K) - -#Computation -for(i in 1:K){ - #Get the small g expansion output - if(i <= length(small_g)){ - f_train[,i] = sapply(x_train, fsg, ns = g_exp[i]) - f_test[,i] = sapply(x_test, fsg, ns = g_exp[i]) - - f_train_dsd[,i] = sapply(x_train, dsg, ns = g_exp[i]) - f_test_dsd[,i] = sapply(x_test, dsg, ns = g_exp[i]) - }else{ - #Get the large g expansion output - f_train[,i] = sapply(x_train, flg, nl = g_exp[i]) - f_test[,i] = sapply(x_test, flg, nl = g_exp[i]) - - f_train_dsd[,i] = sapply(x_train, dlg, nl = g_exp[i]) - f_test_dsd[,i] = sapply(x_test, dlg, nl = g_exp[i]) - } -} - -#Cast x_train and x_test as matrices -x_train = as.matrix(x_train, ncol = 1) -x_test = as.matrix(x_test, ncol = 1) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Model Mixing with OpenBT -fit=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),ntree = 10,ntreeh=1,numcut=300,tc=4,model="mixbart",modelname="physics_model", - ndpost = 10000, nskip = 1000, nadapt = 5000, adaptevery = 500, printevery = 500, - power = 1.0, minnumbot = 2, overallsd = 0.01, k = 3) - -#Get mixed mean function -fitp=predict.openbt(fit,x.test = x_test, f.test = f_test,tc=4) - -plot(x_test, fg(x_test), pch = 16, cex = 0.8, main = 'Fits', type = 'l', ylim = c(1.8,2.8)) -points(x_train, y_train, pch = 3) -lines(x_test, f_test[,1], col = 'red', lty = 2) -lines(x_test, f_test[,2], col = 'blue', lty = 2) -lines(x_test, fitp$mmean, col = 'green4', lwd = 2) -lines(x_test, fitp$m.lower, col = 'orange', lwd = 2) -lines(x_test, fitp$m.upper, col = 'orange', lwd = 2) - -#Get the model weights -fitw = openbt.mixingwts(fit, x.test = x_test, numwts = K, tc = 4) - -#Plot model weights -plot(x_test, fitw$wmean[,1], lwd = 2, col = 'red', type = 'l', ylim = c(-0.15,1.05)) -lines(x_test, fitw$wmean[,2], col = 'blue', lwd = 2) -lines(x_test, fitw$w.upper[,1], col = 'red', lty = 'dashed') -lines(x_test, fitw$w.lower[,1], col = 'red', lty = 'dashed') -lines(x_test, fitw$w.upper[,2], col = 'blue', lty = 'dashed') -lines(x_test, fitw$w.lower[,2], col = 'blue', lty = 'dashed') -#lines(x_test, fitw$wmean[,3], col = 'green', pch = 16) -#lines(x_test, fitw$w.upper[,3], col = 'green', lty = 'dashed') -#lines(x_test, fitw$w.lower[,3], col = 'green', lty = 'dashed') - -#Trace Plot of the weights -plot(fitw$wdraws[[1]][,55], type = 'l') -plot(fitw$wdraws[[1]][,150], type = 'l') -plot(fitw$wdraws[[1]][,185], type = 'l') -plot(fitw$wdraws[[2]][,20], type = 'l') -plot(fitw$wdraws[[2]][,150], type = 'l') -plot(fitw$wdraws[[2]][,275], type = 'l') -wts_trace = list(wt1 = fitw$wdraws[[1]][,c(25,150,275)], wt2 = fitw$wdraws[[2]][,c(25,150,275)]) - -#Trace plots of thetas -tnp10 = read.table(paste0(fit$folder,"/physics_model.tnp1draws0")) -tnp13 = read.table(paste0(fit$folder,"/physics_model.tnp1draws3")) -tnp20 = read.table(paste0(fit$folder,"/physics_model.tnp2draws0")) -tnp23 = read.table(paste0(fit$folder,"/physics_model.tnp2draws3")) -tnp_list = list(tnp10, tnp13, tnp20, tnp23) - -par(mfrow = c(2,3)) -mu_trace_ind = c(1,7,15) -#mu_trace_ind = 1:15 -for(i in 1:4){ - for(j in mu_trace_ind){ - temp = tnp_list[[i]] - plot(temp[,j], type ='l') - } -} -mu1_matrix = cbind(tnp_list[[1]][,mu_trace_ind],tnp_list[[3]][,mu_trace_ind]) -mu2_matrix = cbind(tnp_list[[2]][,mu_trace_ind],tnp_list[[4]][,mu_trace_ind]) -colnames(mu1_matrix) = colnames(mu2_matrix) = paste0(rep(c('LowX.', 'HighX.'),each = 3), colnames(mu1_matrix)) -tnp_trace = list(mu1_matrix = mu1_matrix, mu2_matrix = mu2_matrix) - -#Save results -fitp_out = list(mmean = fitp$mmean,m.lower = fitp$m.lower, m.upper = fitp$m.upper) -fitw_out = list(wmean = fitw$wmean,w.lower = fitw$w.lower, w.upper = fitw$w.upper) -fittpn_out = tnp_trace -#saveRDS(fitp_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary Prior April 2022/Stationary Prior 04-26/sg246_1p_0426.rds") -#saveRDS(fitw_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary Prior April 2022/Stationary Prior 04-26/sg246_1w_0426.rds") -#saveRDS(fittpn_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary BART priors 03-01/sg2sg2_1tnp.rds") - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Model Mixing with OpenBT - With Discrepancy -f_train[,2] = rev(fg(x_train) - f_train[,1]) + fg(x_train) -f_train_dsd[,2] = rev(f_train_dsd[,1]) -f_test[,2] = rev(fg(x_test) -f_test[,1]) + fg(x_test) -f_test_dsd[,2] = rev(f_test_dsd[,1]) -maxf = max(f_train) -#f_test[,2] = ifelse(f_test[,2]>20,20,f_test[,2]) - -fit=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),ntree = 10, ntreeh=1,numcut=300,tc=4,model="mixbart",modelname="physics_model", - ndpost = 10000, nskip = 2000, nadapt = 5000, adaptevery = 500, printevery = 500, - power = 1.0, minnumbot = 1, overallsd = sd(y_train)/sqrt(300), k = 3, summarystats = FALSE, - f.discrep.mean = f_train_dmean, f.discrep.sd = f_train_dsd) - -#Get mixed mean function -fitp=predict.openbt(fit,x.test = x_test, f.test = f_test, f.discrep.mean = f_test_dmean, f.discrep.sd = f_test_dsd ,tc=4, q.lower = 0.025, q.upper = 0.975) - -plot(x_test, fg(x_test), pch = 16, cex = 0.8, main = 'Fits', type = 'l', ylim = c(1.8,2.8)) -points(x_train, y_train, pch = 3) -lines(x_test, f_test[,1], col = 'red', lty = 2) -lines(x_test, f_test[,2], col = 'blue', lty = 2) -lines(x_test, fitp$mmean, col = 'green4', lwd = 2) -lines(x_test, fitp$m.lower, col = 'orange', lwd = 2) -lines(x_test, fitp$m.upper, col = 'orange', lwd = 2) - -#Get the model weights -fitw = openbt.mixingwts(fit, x.test = x_test, numwts = K, tc = 4) - -#Plot model weights -plot(x_test, fitw$wmean[,1], pch = 16, col = 'red', type = 'l', ylim = c(-0.05,1.05), lwd = 2, - panel.first = {grid(col = 'lightgrey')}) -lines(x_test, fitw$wmean[,2], col = 'blue', pch = 16, lwd = 2) -lines(x_test, fitw$w.upper[,1], col = 'red', lty = 'dashed', lwd = 1) -lines(x_test, fitw$w.lower[,1], col = 'red', lty = 'dashed', lwd = 1) -lines(x_test, fitw$w.upper[,2], col = 'blue', lty = 'dashed', lwd = 1) -lines(x_test, fitw$w.lower[,2], col = 'blue', lty = 'dashed', lwd = 1) -abline(h = 1, col = 'grey', lty = 'dashed') -abline(h = 0, col = 'grey', lty = 'dashed') - -#Get prior weights pointwise -w1 = (1/f_train_dsd[,1]^2)/(1/f_train_dsd[,1]^2 + 1/f_train_dsd[,2]^2) -points(x_train, w1, pch = 2, col = 'red') -points(x_train, 1-w1, pch = 2, col = 'blue') - -#Plot the prior fit -bhat_matrix = cbind(w1,1-w1) -plot_prior_pw_fit(bhat_matrix, f_train, x_train, f_test, x_test, y_lim = c(1.8,2.8)) -plot_prior_pw_wts(bhat_matrix, x_train, y_lim = c(-0.05, 1.05)) - -wts_trace = list(wt1 = fitw$wdraws[[1]][,c(25,150,275)], wt2 = fitw$wdraws[[2]][,c(25,150,275)]) -plot(fitw$wdraws[[1]][,119],type='l') - -#Look at variances -apply(fitw$wdraws[[2]],2,var) -cbind(x_test[20:50],apply(fitw$wdraws[[2]],2,var)[20:50], f_test[20:50,2]) -v = 20 -x_test[v] -sqrt(t(f_test[v,])%*%cov(data.frame(fitw$wdraws[[1]][,v], fitw$wdraws[[2]][,v]))%*%(f_test[v,])) - -#Trace plots of thetas -tnp10 = read.table(paste0(fit$folder,"/physics_model.tnp1draws0")) -tnp13 = read.table(paste0(fit$folder,"/physics_model.tnp1draws3")) -tnp20 = read.table(paste0(fit$folder,"/physics_model.tnp2draws0")) -tnp23 = read.table(paste0(fit$folder,"/physics_model.tnp2draws3")) -tnp_list = list(tnp10, tnp13, tnp20, tnp23) - -par(mfrow = c(2,3)) -mu_trace_ind = c(1,7,15) -#mu_trace_ind = 1:15 -for(i in 1:4){ - for(j in mu_trace_ind){ - temp = tnp_list[[i]] - plot(temp[,j], type ='l') - } -} -mu1_matrix = cbind(tnp_list[[1]][,mu_trace_ind],tnp_list[[3]][,mu_trace_ind]) -mu2_matrix = cbind(tnp_list[[2]][,mu_trace_ind],tnp_list[[4]][,mu_trace_ind]) -colnames(mu1_matrix) = colnames(mu2_matrix) = paste0(rep(c('LowX.', 'HighX.'),each = 3), colnames(mu1_matrix)) -tnp_trace = list(mu1_matrix = mu1_matrix, mu2_matrix = mu2_matrix) - -#Save the results -#openbt.save(fit,"/home/johnyannotty/Documents/Model Mixing BART/smallg2largeg4") -fitp_out = list(mmean = fitp$mmean,m.lower = fitp$m.lower, m.upper = fitp$m.upper) -fitw_out = list(wmean = fitw$wmean,w.lower = fitw$w.lower, w.upper = fitw$w.upper, wts_trace = wts_trace) -fittpn_out = tnp_trace -#saveRDS(fitp_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Non-Stationary Priors 03-28/sg4lg4_4p.rds") -#saveRDS(fitw_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Non-Stationary Priors 03-28/sg4lg4_4w.rds") -#saveRDS(fittpn_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary priors 03-01/sg2lg4_1tnp.rds") - -fitp_out = list(mmean = fitp$mmean,m.lower = fitp$m.lower, m.upper = fitp$m.upper) -fitw_out = list(wmean = fitw$wmean,w.lower = fitw$w.lower, w.upper = fitw$w.upper) -#saveRDS(fitp_out, "/home/johnyannotty/Documents/Model Mixing BART/BAND Presentation Data/JAM 04-07/sg2lg4_10tree_p.rds") -#saveRDS(fitw_out, "/home/johnyannotty/Documents/Model Mixing BART/BAND Presentation Data/JAM 04-07/sg2lg4_10tree_w.rds") - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Model Mixing with OpenBT -- Read in priors -#prior_mean = c(0.05,0.001) #rep(0,2) -#prior_sd = c(0.045,0.02) #rep((max(y_train)-min(y_train))/(2*1.5*sqrt(20)), 2) -#prior_info = cbind( -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~prior_mean, prior_sd) -m = 20 -k = 2.25 -#prior_out = prior1(f_train, y_train, k,m) -prior_out = prior2(f_train, y_train, k,m) -prior_info = cbind(prior_out$beta_list, prior_out$tau_list) - -fit=openbt(x_train,y_train,f_train,pbd=c(0.7,0.0),ntree = m, ntreeh=1,numcut=300,tc=4,model="mixbart",modelname="physics_model", - ndpost = 10, nskip = 1000, nadapt = 5000, adaptevery = 500, printevery = 500, - power = 1.0, minnumbot = 5, overallsd = sd(y_train)/sqrt(16), k = k, f.prior.info = prior_info) - -#Get mixed mean function -fitp=predict.openbt(fit,x.test = x_test, f.test = f_test,tc=4) - -plot(g_grid, fg(g_grid), pch = 16, cex = 0.8, main = 'Fits', type = 'l', ylim = c(1.8,2.8)) -points(x_train, y_train, pch = 3) -lines(g_grid, sapply(g_grid,fsg, small_g), col = 'red', lty = 2) -lines(g_grid, sapply(g_grid,flg, large_g), col = 'blue', lty = 2) -points(x_test, fitp$mmean, col = 'green4') -points(x_test, fitp$m.lower, col = 'orange') -points(x_test, fitp$m.upper, col = 'orange') - -#Get the model weights -fitw = openbt.mixingwts(fit, x.test = x_test, numwts = 2, tc = 4) - -#Plot model weights -plot(x_test, fitw$wmean[,1], pch = 16, col = 'red', type = 'o', ylim = c(-1,1)) -points(x_test, fitw$wmean[,2], col = 'blue', pch = 16) -lines(x_test, fitw$w.upper[,1], col = 'red', lty = 'dashed') -lines(x_test, fitw$w.lower[,1], col = 'red', lty = 'dashed') -lines(x_test, fitw$w.upper[,2], col = 'blue', lty = 'dashed') -lines(x_test, fitw$w.lower[,2], col = 'blue', lty = 'dashed') - -#Trace Plot of the weights -plot(fitw$wdraws[[1]][,25], type = 'l') -plot(fitw$wdraws[[1]][,185], type = 'l') -plot(fitw$wdraws[[2]][,25], type = 'l') -plot(fitw$wdraws[[2]][,185], type = 'l') - -wts_trace = list(wt1 = fitw$wdraws[[1]][,c(25,150,275)], wt2 = fitw$wdraws[[2]][,c(25,150,275)]) - -#Trace plots of thetas -tnp10 = read.table(paste0(fit$folder,"/physics_model.tnp1draws0")) -tnp13 = read.table(paste0(fit$folder,"/physics_model.tnp1draws1")) -tnp20 = read.table(paste0(fit$folder,"/physics_model.tnp2draws0")) -tnp23 = read.table(paste0(fit$folder,"/physics_model.tnp2draws1")) -tnp_list = list(tnp10, tnp13, tnp20, tnp23) - -mu_trace_ind = c(1,7,15) -#mu_trace_ind = 1:15 -for(i in 1:4){ - for(j in mu_trace_ind){ - temp = tnp_list[[i]] - plot(temp[,j], type ='l') - } -} - -mu1_matrix = cbind(tnp_list[[1]][,mu_trace_ind],tnp_list[[3]][,mu_trace_ind]) -mu2_matrix = cbind(tnp_list[[2]][,mu_trace_ind],tnp_list[[4]][,mu_trace_ind]) -colnames(mu1_matrix) = colnames(mu2_matrix) = paste0(rep(c('LowX.', 'HighX.'),each = 3), colnames(mu1_matrix)) -tnp_trace = list(mu1_matrix = mu1_matrix, mu2_matrix = mu2_matrix) - -#Save results -fitp_out = list(mmean = fitp$mmean,m.lower = fitp$m.lower, m.upper = fitp$m.upper) -fitw_out = list(wmean = fitw$wmean,w.lower = fitw$w.lower, w.upper = fitw$w.upper, wts_trace = wts_trace) -fittpn_out = tnp_trace -#saveRDS(fitp_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary BART priors 03-01/sg2sg2_4p.rds") -#saveRDS(fitw_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary BART priors 03-01/sg2sg2_4w.rds") -#saveRDS(fittpn_out, "/home/johnyannotty/Documents/Model Mixing BART/Open BT Examples/Stationary BART priors 03-01/sg2sg2_4tnp.rds") - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Check calculations -inv_prior_var = diag(c(30,60)) -beta0 = c(0,0) -data_rng = c(1,5) -sig2 = 0.3 -solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)%*%(t(f_train[data_rng,])%*%y_train[data_rng]/sig2 + inv_prior_var%*%beta0) -solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var) - -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[1,1]) -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[2,2]) - -b = solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)%*%(t(f_train[data_rng,])%*%y_train[data_rng]/sig2 + inv_prior_var%*%beta0) -f_train[data_rng,]%*%b - -bhat = solve(t(f_train[data_rng,])%*%f_train[data_rng,])%*%t(f_train[data_rng,])%*%y_train[data_rng] -f_train[data_rng,]%*%bhat - -#G prior -g = 10 -q = g/(g+1) -bhat = solve(t(f_train[data_rng,])%*%f_train[data_rng,])%*%t(f_train[data_rng,])%*%y_train[data_rng] -q*bhat - -#use discrepancies for the prior -data_rng = c(8:13) -#tau2 = (0.6/2)^2 -tau2 = 1/(2^2) -inv_prior_var = diag(c(sum(f_train_dsd[data_rng,1]^2),sum(f_train_dsd[data_rng,2]^2)))/tau2 -beta0 = c(0.5,0.5) -sig2 = 0.56 -solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)%*%(t(f_train[data_rng,])%*%y_train[data_rng]/sig2 + inv_prior_var%*%beta0) -solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var) - -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[1,1]) -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[2,2]) - -b = solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)%*%(t(f_train[data_rng,])%*%y_train[data_rng]/sig2 + inv_prior_var%*%beta0) -f_train[data_rng,]%*%b -bhat = solve(t(f_train[data_rng,])%*%f_train[data_rng,])%*%t(f_train[data_rng,])%*%y_train[data_rng] -f_train[data_rng,]%*%bhat - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#Priors -data_rng = c(2:3) -pr = 1 -bartp = bart_prior(y_train[data_rng], c=2,m=1,K=2) -nsp1 = ns_prior1(dsd.train = f_train_dsd[data_rng,], 2,1) -nsp2 = ns_prior2(dsd.train = f_train_dsd[data_rng,], 5,1) - -sig2 = 0.01 -if(pr == 1){ - inv_prior_var = diag(1/diag(nsp1$tau_matrix),2)^2 - beta0 = nsp1$beta -}else{ - inv_prior_var = diag(1/diag(nsp2$tau_matrix),2)^2 - beta0 = nsp2$beta -} - -b = solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)%*%(t(f_train[data_rng,])%*%y_train[data_rng]/sig2 + inv_prior_var%*%beta0) -f_train[data_rng,]%*%b - -bhat = solve(t(f_train[data_rng,])%*%f_train[data_rng,])%*%t(f_train[data_rng,])%*%y_train[data_rng] -f_train[data_rng,]%*%bhat - -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[1,1]) -sqrt(solve(t(f_train[data_rng,])%*%f_train[data_rng,]/sig2 + inv_prior_var)[2,2]) diff --git a/misc/openbt.R b/misc/openbt.R deleted file mode 100644 index 70aedce..0000000 --- a/misc/openbt.R +++ /dev/null @@ -1,2286 +0,0 @@ -## openbt.R: R script wrapper functions for OpenBT. -## Copyright (C) 2012-2019 Matthew T. Pratola -## -## This file is part of OpenBT. -## -## OpenBT is free software: you can redistribute it and/or modify -## it under the terms of the GNU Affero General Public License as published by -## the Free Software Foundation, either version 3 of the License, or -## (at your option) any later version. -## -## OpenBT is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU Affero General Public License for more details. -## -## You should have received a copy of the GNU Affero General Public License -## along with this program. If not, see . -## -## Author contact information -## Matthew T. Pratola: mpratola@gmail.com - - -# Load/Install required packages -required <- c("zip","data.table") -tbi <- required[!(required %in% installed.packages()[,"Package"])] -if(length(tbi)) { - cat("***Installing OpenBT package dependencies***\n") - install.packages(tbi,repos="https://cloud.r-project.org",quiet=TRUE) -} -library(zip,quietly=TRUE,warn.conflicts=FALSE) -library(data.table,quietly=TRUE,warn.conflicts=FALSE) - - -openbt = function( -x.train, -y.train, -f.train = matrix(1,nrow = length(y.train), ncol = 2), -ntree=NULL, -ntreeh=NULL, -ndpost=1000, nskip=100, -k=NULL, -power=2.0, base=.95, -tc=2, -sigmav=rep(1,length(y.train)), -f.sd.train = NULL, # rename -wts.prior.info = NULL, # keep/rename -fmean=mean(y.train), -overallsd = NULL, -overallnu= NULL, -chv = cor(x.train,method="spearman"), -pbd=.7, -pb=.5, -stepwpert=.1, -probchv=.1, -minnumbot=5, -printevery=100, -numcut=100, -xicuts=NULL, -nadapt=1000, -adaptevery=100, -summarystats=FALSE, -truncateds=NULL, -model=NULL, -modelname="model" -) -{ -#-------------------------------------------------- -# model type definitions -modeltype=0 # undefined -MODEL_BT=1 -MODEL_BINOMIAL=2 -MODEL_POISSON=3 -MODEL_BART=4 -MODEL_HBART=5 -MODEL_PROBIT=6 -MODEL_MODIFIEDPROBIT=7 -MODEL_MERCK_TRUNCATED=8 -MODEL_MIXBART=9 -if(is.null(model)) -{ - cat("Model type not specified.\n") - cat("Available options are:\n") - cat("model='bt'\n") - cat("model='binomial'\n") - cat("model='poisson'\n") - cat("model='bart'\n") - cat("model='hbart'\n") - cat("model='probit'\n") - cat("model='modifiedprobit'\n") - cat("model='merck_truncated'\n") - cat("model='mixbart'\n") - - stop("missing model type.\n") -} -if(model=="bart") -{ - modeltype=MODEL_BART - if(is.null(ntree)) ntree=200 - if(is.null(ntreeh)) ntreeh=1 - if(is.null(k)) k=2 - if(is.null(overallsd)) overallsd=sd(y.train) - if(is.null(overallnu)) overallnu=10 - pbd=c(pbd,0.0) -} -if(model=="hbart") -{ - modeltype=MODEL_HBART - if(is.null(ntree)) ntree=200 - if(is.null(ntreeh)) ntreeh=40 - if(is.null(k)) k=5 - if(is.null(overallsd)) overallsd=sd(y.train) - if(is.null(overallnu)) overallnu=10 -} - -if(model=="probit") -{ - modeltype=MODEL_PROBIT - if(is.null(ntree)) ntree=200 - if(is.null(ntreeh)) ntreeh=1 - if(is.null(k)) k=1 - if(is.null(overallsd)) overallsd=1 - if(is.null(overallnu)) overallnu=-1 - if(length(pbd)==1) pbd=c(pbd,0.0) -} -if(model=="modified-probit") -{ - modeltype=MODEL_MODIFIEDPROBIT - if(is.null(ntree)) ntree=200 - if(is.null(ntreeh)) ntreeh=40 - if(is.null(k)) k=1 - if(is.null(overallsd)) overallsd=1 - if(is.null(overallnu)) overallnu=-1 -} -if(model=="merck_truncated") -{ - modeltype=MODEL_MERCK_TRUNCATED - if(is.null(ntree)) ntree=200 - if(is.null(ntreeh)) ntreeh=1 - if(is.null(k)) k=2 - if(is.null(overallsd)) overallsd=sd(y.train) - if(is.null(overallnu)) overallnu=10 - if(is.null(truncateds)) { - miny=min(y.train)[1] - truncateds=(y.train==miny) - } -} -if(model=="mixbart") -{ - modeltype=MODEL_MIXBART - if(is.null(ntree)) ntree=20 - if(is.null(ntreeh)) ntreeh=1 - if(is.null(k)) k=1 - if(is.null(overallsd)) overallsd=sd(y.train) - if(is.null(overallnu)) overallnu=10 - pbd=c(pbd,0.0) -} -#-------------------------------------------------- -nd = ndpost -burn = nskip -m = ntree -mh = ntreeh -#-------------------------------------------------- -#data -if(!is.matrix(x.train)){x.train = matrix(x.train, ncol = 1)} -n = length(y.train) -p = ncol(x.train) -#np = nrow(x.test) -x = t(x.train) -#xp = t(x.test) -if(modeltype==MODEL_BART || modeltype==MODEL_HBART || modeltype==MODEL_MERCK_TRUNCATED) -{ - y.train=y.train-fmean - fmean.out=paste(0.0) -} -if(modeltype==MODEL_PROBIT || modeltype==MODEL_MODIFIEDPROBIT) -{ - fmean.out=paste(qnorm(fmean)) - uniqy=sort(unique(y.train)) - if(length(uniqy)>2) stop("Invalid y.train: Probit requires dichotomous response coded 0/1") - if(uniqy[1]!=0 || uniqy[2]!=1) stop("Invalid y.train: Probit requires dichotomous response coded 0/1") -} - -#Set mix discrepancy to FALSE if we use a different model -nsprior = FALSE -wtsprior = FALSE -if(modeltype==MODEL_MIXBART) -{ - #Center the response - fmean.out=paste(0.0) - - #Check to see if any discrepancy data has been passed into the function -- if so, we will use the discrepancy model mixing - if(!is.null(f.sd.train)){ - nsprior = TRUE - } - if(!is.null(wts.prior.info)){ - wtsprior = TRUE - if(ncol(wts.prior.info) != 2 | nrow(wts.prior.info)!=ncol(f.train)) stop("Invalid wts.prior.info: Required dimensions of num.models x 2") - } -} -#-------------------------------------------------- -#cutpoints -if(!is.null(xicuts)) # use xicuts -{ - xi=xicuts -}else # default to equal numcut per dimension -{ - xi=vector("list",p) - minx=floor(apply(x,1,min)) - maxx=ceiling(apply(x,1,max)) - for(i in 1:p) - { - xinc=(maxx[i]-minx[i])/(numcut+1) - xi[[i]]=(1:numcut)*xinc+minx[i] - } -} - -if(is.null(xicuts) & modeltype==MODEL_MIXBART){ - xi=vector("list",p) - minx_temp=apply(x,1,min) - maxx_temp=apply(x,1,max) - - maxx = round(maxx_temp,1) + ifelse((round(maxx_temp,1)-maxx_temp)>0,0,0.1) - minx = round(minx_temp,1) - ifelse((minx_temp - round(minx_temp,1))>0,0,0.1) - for(i in 1:p) - { - xinc=(maxx[i]-minx[i])/(numcut+1) - xi[[i]]=(1:numcut)*xinc+minx[i] - } -} -#-------------------------------------------------- -if(modeltype==MODEL_BART || modeltype==MODEL_HBART || modeltype==MODEL_MERCK_TRUNCATED || modeltype==MODEL_MIXBART) -{ - rgy = range(y.train) -} -if(modeltype==MODEL_PROBIT || modeltype==MODEL_MODIFIEDPROBIT) -{ - rgy = c(-2,2) -} - -if(modeltype==MODEL_MIXBART){ - tau = (1)/(2*sqrt(m)*k) - beta0 = 1/(2*m) - overallsd = sqrt((overallnu+2)*overallsd^2/overallnu) -}else{ - tau = (rgy[2]-rgy[1])/(2*sqrt(m)*k) - beta0=0 -} - -if(modeltype==MODEL_MIXBART & nsprior){ - #tau = 1/(sqrt(m)*k) - #tau = 1/(2*sqrt(m)*k) - tau = 1/(2*(m)*k) - beta0 = 1/m -} - -#-------------------------------------------------- -overalllambda = overallsd^2 -#-------------------------------------------------- -powerh=power -baseh=base -if(length(power)>1) { - powerh=power[2] - power=power[1] -} -if(length(base)>1) { - baseh=base[2] - base=base[1] -} -#-------------------------------------------------- -pbdh=pbd -pbh=pb -if(length(pbd)>1) { - pbdh=pbdh[2] - pbd=pbd[1] -} -if(length(pb)>1) { - pbh=pb[2] - pb=pb[1] -} -#-------------------------------------------------- -if(modeltype==MODEL_BART) -{ - cat("Model: Bayesian Additive Regression Trees model (BART)\n") -} -#-------------------------------------------------- -if(modeltype==MODEL_HBART) -{ - cat("Model: Heteroscedastic Bayesian Additive Regression Trees model (HBART)\n") -} -#-------------------------------------------------- -if(modeltype==MODEL_PROBIT) -{ - cat("Model: Dichotomous outcome model: Albert & Chib Probit (fixed)\n") -# overallnu=-1 - if(ntreeh>1) - stop("method probit requires ntreeh=1") - if(pbdh>0.0) - stop("method probit requires pbd[2]=0.0") -} -#-------------------------------------------------- -if(modeltype==MODEL_MODIFIEDPROBIT) -{ - cat("Model: Dichotomous outcome model: Modified Albert & Chib Probit\n") -} -#-------------------------------------------------- -if(modeltype==MODEL_MERCK_TRUNCATED) -{ - cat("Model: Truncated BART model\n") -} -#-------------------------------------------------- -if(modeltype==MODEL_MIXBART) -{ - cat("Model: Model Mixing with Bayesian Additive Regression Trees\n") -} -#-------------------------------------------------- -stepwperth=stepwpert -if(length(stepwpert)>1) { - stepwperth=stepwpert[2] - stepwpert=stepwpert[1] -} -#-------------------------------------------------- -probchvh=probchv -if(length(probchv)>1) { - probchvh=probchv[2] - probchv=probchv[1] -} -#-------------------------------------------------- -minnumboth=minnumbot -if(length(minnumbot)>1) { - minnumboth=minnumbot[2] - minnumbot=minnumbot[1] -} - -#-------------------------------------------------- -#write out config file -xroot="x" -yroot="y" -sroot="s" -chgvroot="chgv" -froot="f" -fsdroot="fsd" -wproot="wpr" -xiroot="xi" -folder=tempdir(check=TRUE) -if(!dir.exists(folder)) dir.create(folder) -tmpsubfolder=tempfile(tmpdir="") -tmpsubfolder=substr(tmpsubfolder,5,nchar(tmpsubfolder)) -tmpsubfolder=paste("openbt",tmpsubfolder,sep="") -folder=paste(folder,"/",tmpsubfolder,sep="") -if(!dir.exists(folder)) dir.create(folder) -fout=file(paste(folder,"/config",sep=""),"w") -writeLines(c(paste(modeltype),xroot,yroot,fmean.out,paste(m),paste(mh),paste(nd),paste(burn), - paste(nadapt),paste(adaptevery),paste(tau),paste(beta0),paste(overalllambda), - paste(overallnu),paste(base),paste(power),paste(baseh),paste(powerh), - paste(tc),paste(sroot),paste(chgvroot),paste(froot),paste(fsdroot),paste(nsprior),paste(wproot),paste(wtsprior), - paste(pbd),paste(pb),paste(pbdh),paste(pbh),paste(stepwpert),paste(stepwperth), - paste(probchv),paste(probchvh),paste(minnumbot),paste(minnumboth), - paste(printevery),paste(xiroot),paste(modelname),paste(summarystats)),fout) -close(fout) - -# folder=paste(".",modelname,"/",sep="") -# system(paste("rm -rf ",folder,sep="")) -# system(paste("mkdir ",folder,sep="")) -# system(paste("cp config ",folder,"config",sep="")) - - -#-------------------------------------------------- -#write out data subsets -nslv=tc-1 -ylist=split(y.train,(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(ylist[[i]],file=paste(folder,"/",yroot,i,sep="")) -xlist=split(as.data.frame(x.train),(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(t(xlist[[i]]),file=paste(folder,"/",xroot,i,sep="")) -slist=split(sigmav,(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(slist[[i]],file=paste(folder,"/",sroot,i,sep="")) -chv[is.na(chv)]=0 # if a var as 0 levels it will have a cor of NA so we'll just set those to 0. -write(chv,file=paste(folder,"/",chgvroot,sep="")) -for(i in 1:p) write(xi[[i]],file=paste(folder,"/",xiroot,i,sep="")) -rm(chv) - -#Write the function output if using model mixing -if(modeltype == MODEL_MIXBART){ - flist=split(as.data.frame(f.train),(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) write(t(flist[[i]]),file=paste(folder,"/",froot,i,sep="")) - - if(nsprior){ - # -- delete these two lines - #fdmlist=split(as.data.frame(f.discrep.mean),(seq(n)-1) %/% (n/nslv)) - #for(i in 1:nslv) write(t(fdmlist[[i]]),file=paste(folder,"/",fdmroot,i,sep="")) - - fdslist=split(as.data.frame(f.sd.train),(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) write(t(fdslist[[i]]),file=paste(folder,"/",fsdroot,i,sep="")) - } - - if(wtsprior){ - write(wts.prior.info,file=paste(folder,"/",wproot,sep="")) - } -} - -if(modeltype==MODEL_MERCK_TRUNCATED) -{ - tlist=split(truncateds,(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) { - truncs=which(tlist[[i]]==TRUE)-1 #-1 for correct indexing in c/c++ - ftrun=file(paste(folder,"/","truncs",i,sep=""),"w") - write(truncs,ftrun) - close(ftrun) - } -} -#-------------------------------------------------- -#run program -cmdopt=100 #default to serial/OpenMP -runlocal=FALSE -cmd="openbtcli --conf" -if(Sys.which("openbtcli")[[1]]=="") # not installed in a global location, so assume current directory - runlocal=TRUE - -if(runlocal) cmd="./openbtcli --conf" - -cmdopt=system(cmd) - -if(cmdopt==101) # MPI -{ - cmd=paste("mpirun -np ",tc," openbtcli ",folder,sep="") -} - -if(cmdopt==100) # serial/OpenMP -{ - if(runlocal) - cmd=paste("./openbtcli ",folder,sep="") - else - cmd=paste("openbtcli ",folder,sep="") -} - -#cat(cmd) -system(cmd) -#system(paste("rm -f ",folder,"/config",sep="")) -#system(paste("mv ",folder,"fit ",folder,modelname,".fit",sep="")) - -res=list() -res$modeltype=modeltype -res$model=model -res$xroot=xroot; res$yroot=yroot;res$m=m; res$mh=mh; res$nd=nd; res$burn=burn -res$nadapt=nadapt; res$adaptevery=adaptevery; res$tau=tau;res$beta0=beta0;res$overalllambda=overalllambda -res$overallnu=overallnu; res$k=k; res$base=base; res$power=power; res$baseh=baseh; res$powerh=powerh -res$tc=tc; res$sroot=sroot; res$chgvroot=chgvroot;res$froot=froot;res$fsdroot=fsdroot; -res$nsprior = nsprior; res$pbd=pbd; res$pb=pb -res$pbdh=pbdh; res$pbh=pbh; res$stepwpert=stepwpert; res$stepwperth=stepwperth -res$probchv=probchv; res$probchvh=probchvh; res$minnumbot=minnumbot; res$minnumboth=minnumboth -res$printevery=printevery; res$xiroot=xiroot; res$minx=minx; res$maxx=maxx; -res$summarystats=summarystats; res$modelname=modelname -class(xi)="OpenBT_cutinfo" -res$xicuts=xi -res$fmean=fmean -res$folder=folder -class(res)="OpenBT_posterior" - -return(res) -} - - - - -#-------------------------------------------------- -#Get Predictions -#-------------------------------------------------- - -predict.openbt = function( -fit=NULL, -x.test=NULL, -f.test=matrix(1,nrow = 1, ncol = 2), -# f.discrep.mean = NULL, # delete -# f.sd.test = NULL, # delete -tc=2, -fmean=fit$fmean, -q.lower=0.025, -q.upper=0.975 -) -{ - -# model type definitions -MODEL_BT=1 -MODEL_BINOMIAL=2 -MODEL_POISSON=3 -MODEL_BART=4 -MODEL_HBART=5 -MODEL_PROBIT=6 -MODEL_MODIFIEDPROBIT=7 -MODEL_MERCK_TRUNCATED=8 -MODEL_MIXBART=9 - -#-------------------------------------------------- -# params -if(is.null(fit)) stop("No fitted model specified!\n") -if(is.null(x.test)) stop("No prediction points specified!\n") - -nslv=tc -x.test=as.matrix(x.test) -p=ncol(x.test) -n=nrow(x.test) -k=2 #Default number of models for model mixing -xproot="xp" -fproot="fp" -#fpdmroot="fpdm" -#fpdsroot="fpds" - -if(fit$modeltype==MODEL_MIXBART){ - if(is.null(f.test)){stop("No function output specified for model mixing!\n")} - k=ncol(f.test) #Number of models - #if(fit$nsprior){stop("No function discrepancy mean and/or standard deviation was provided, but model was trained with functional discrepancy.") } -} - -if(fit$modeltype==MODEL_BART || fit$modeltype==MODEL_HBART || fit$modeltype==MODEL_MERCK_TRUNCATED) -{ - fmean.out=paste(fmean) -} -if(fit$modeltype==MODEL_PROBIT || fit$modeltype==MODEL_MODIFIEDPROBIT) -{ - fmean.out=paste(qnorm(fmean)) -} -if(fit$modeltype==MODEL_MIXBART){ - fmean.out=paste(0.0) -} - -#-------------------------------------------------- -#write out config file -fout=file(paste(fit$folder,"/config.pred",sep=""),"w") -writeLines(c(fit$modelname,fit$modeltype,fit$xiroot,xproot,fproot, - paste(fit$nd),paste(fit$m), - paste(fit$mh),paste(p),paste(k),paste(tc), - fmean.out), fout) -close(fout) - -#-------------------------------------------------- -#write out data subsets -#folder=paste(".",fit$modelname,"/",sep="") -xlist=split(as.data.frame(x.test),(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(t(xlist[[i]]),file=paste(fit$folder,"/",xproot,i-1,sep="")) -for(i in 1:p) write(fit$xicuts[[i]],file=paste(fit$folder,"/",fit$xiroot,i,sep="")) - -if(fit$modeltype==MODEL_MIXBART){ - #for(i in 1:k) write(f.test[,i],file=paste(fit$folder,"/",fproot,i,sep="")) - flist=split(as.data.frame(f.test),(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) write(t(flist[[i]]),file=paste(fit$folder,"/",fproot,i-1,sep="")) - - if(fit$nsprior){ - #fdmlist=split(as.data.frame(f.discrep.mean),(seq(n)-1) %/% (n/nslv)) - #for(i in 1:nslv) write(t(fdmlist[[i]]),file=paste(fit$folder,"/",fpdmroot,i-1,sep="")) - - #fdslist=split(as.data.frame(f.sd.train),(seq(n)-1) %/% (n/nslv)) - #for(i in 1:nslv) write(t(fdslist[[i]]),file=paste(fit$folder,"/",fpdsroot,i-1,sep="")) - } -} - -#-------------------------------------------------- -#run prediction program -cmdopt=100 #default to serial/OpenMP -runlocal=FALSE -cmd="openbtcli --conf" -if(Sys.which("openbtcli")[[1]]=="") # not installed in a global location, so assume current directory - runlocal=TRUE - -if(runlocal) cmd="./openbtcli --conf" - -cmdopt=system(cmd) - -if(cmdopt==101) # MPI -{ - cmd=paste("mpirun -np ",tc," openbtpred ",fit$folder,sep="") -} - -if(cmdopt==100) # serial/OpenMP -{ - if(runlocal) - cmd=paste("./openbtpred ",fit$folder,sep="") - else - cmd=paste("openbtpred ",fit$folder,sep="") -} - -#cmd=paste("mpirun -np ",tc," openbtpred",sep="") -#cat(cmd) -system(cmd) -system(paste("rm -f ",fit$folder,"/config.pred",sep="")) - - -#-------------------------------------------------- -#format and return -res=list() - -# Old, original code for reading in the posterior predictive draws. -# res$mdraws=read.table(paste(fit$folder,"/",fit$modelname,".mdraws",0,sep="")) -# res$sdraws=read.table(paste(fit$folder,"/",fit$modelname,".sdraws",0,sep="")) -# for(i in 2:nslv) -# { -# res$mdraws=cbind(res$mdraws,read.table(paste(fit$folder,"/",fit$modelname,".mdraws",i-1,sep=""))) -# res$sdraws=cbind(res$sdraws,read.table(paste(fit$folder,"/",fit$modelname,".sdraws",i-1,sep=""))) -# } -# res$mdraws=as.matrix(res$mdraws) -# res$sdraws=as.matrix(res$sdraws) - -# Faster using data.table's fread than the built-in read.table. -# However, it does strangely introduce some small rounding error on the order of 8.9e-16. -fnames=list.files(fit$folder,pattern=paste(fit$modelname,".mdraws*",sep=""),full.names=TRUE) -res$mdraws=do.call(cbind,sapply(fnames,data.table::fread)) -fnames=list.files(fit$folder,pattern=paste(fit$modelname,".sdraws*",sep=""),full.names=TRUE) -res$sdraws=do.call(cbind,sapply(fnames,data.table::fread)) - -#Get Results for models that are not model mixing -if(fit$modeltype!=MODEL_MIXBART){ - res$mmean=apply(res$mdraws,2,mean) - res$smean=apply(res$sdraws,2,mean) - res$msd=apply(res$mdraws,2,sd) - res$ssd=apply(res$sdraws,2,sd) - res$m.5=apply(res$mdraws,2,quantile,0.5) - res$m.lower=apply(res$mdraws,2,quantile,q.lower) - res$m.upper=apply(res$mdraws,2,quantile,q.upper) - res$s.5=apply(res$sdraws,2,quantile,0.5) - res$s.lower=apply(res$sdraws,2,quantile,q.lower) - res$s.upper=apply(res$sdraws,2,quantile,q.upper) - res$pdraws=NULL - res$pmean=NULL - res$psd=NULL - res$p.5=NULL - res$p.lower=NULL - res$p.upper=NULL -} - -#Get probabilities for the probit models -if(fit$modeltype==MODEL_PROBIT || fit$modeltype==MODEL_MODIFIEDPROBIT) -{ - res$pdraws=read.table(paste(fit$folder,"/",fit$modelname,".pdraws",0,sep="")) - for(i in 2:nslv) - { - res$pdraws=cbind(res$pdraws,read.table(paste(fit$folder,"/",fit$modelname,".pdraws",i-1,sep=""))) - } - - res$pdraws=as.matrix(res$pdraws) - res$pmean=apply(res$pdraws,2,mean) - res$psd=apply(res$pdraws,2,sd) - res$p.5=apply(res$pdraws,2,quantile,0.5) - res$p.lower=apply(res$pdraws,2,quantile,q.lower) - res$p.upper=apply(res$pdraws,2,quantile,q.upper) -} - -#Get model mixing results -- using only a constant variance -- change later to match with HBART -if(fit$modeltype==MODEL_MIXBART){ - res$mmean=apply(res$mdraws,2,mean) - res$msd=apply(res$mdraws,2,sd) - res$m.5=apply(res$mdraws,2,quantile,0.5) - res$m.lower=apply(res$mdraws,2,quantile,q.lower) - res$m.upper=apply(res$mdraws,2,quantile,q.upper) - res$smean=apply(res$sdraws,2,mean) - res$ssd=apply(res$sdraws,2,sd) - res$s.5=apply(res$sdraws,2,median) - res$s.lower=apply(res$sdraws,2,quantile,q.lower) - res$s.upper=apply(res$sdraws,2,quantile,q.lower) - res$pdraws=NULL - res$pmean=NULL - res$psd=NULL - res$p.5=NULL - res$p.lower=NULL - res$p.upper=NULL -} - -res$q.lower=q.lower -res$q.upper=q.upper -res$modeltype=fit$modeltype - -class(res)="OpenBT_predict" - -return(res) -} - - -#-------------------------------------------------- -#Get Variable activity -#-------------------------------------------------- - -vartivity.openbt = function( -fit=NULL, -q.lower=0.025, -q.upper=0.975 -) -{ - -#-------------------------------------------------- -# params -if(is.null(fit)) stop("No fitted model specified!\n") -p=length(fit$xicuts) -m=fit$m -mh=fit$mh -nd=fit$nd -modelname=fit$modelname - - -#-------------------------------------------------- -#write out config file -fout=file(paste(fit$folder,"/config.vartivity",sep=""),"w") -writeLines(c(fit$modelname, - paste(nd),paste(m), - paste(mh),paste(p)) ,fout) -close(fout) - - -#-------------------------------------------------- -#run vartivity program -- it's not actually parallel so no call to mpirun. -runlocal=FALSE -if(Sys.which("openbtcli")[[1]]=="") # not installed in a global locaiton, so assume current directory - runlocal=TRUE - -if(runlocal) - cmd=paste("./openbtvartivity ",fit$folder,sep="") -else - cmd=paste("openbtvartivity ",fit$folder,sep="") - -#cmd=paste("./openbtvartivity",sep="") -system(cmd) -system(paste("rm -f ",fit$folder,"/config.vartivity",sep="")) - - -#-------------------------------------------------- -#read in result -res=list() -res$vdraws=read.table(paste(fit$folder,"/",fit$modelname,".vdraws",sep="")) -res$vdrawsh=read.table(paste(fit$folder,"/",fit$modelname,".vdrawsh",sep="")) -res$vdraws=as.matrix(res$vdraws) -res$vdrawsh=as.matrix(res$vdrawsh) - -# normalize counts -colnorm=apply(res$vdraws,1,sum) -ix=which(colnorm>0) -res$vdraws[ix,]=res$vdraws[ix,]/colnorm[ix] -colnorm=apply(res$vdrawsh,1,sum) -ix=which(colnorm>0) -res$vdrawsh[ix,]=res$vdrawsh[ix,]/colnorm[ix] - -res$mvdraws=apply(res$vdraws,2,mean) -res$mvdrawsh=apply(res$vdrawsh,2,mean) -res$vdraws.sd=apply(res$vdraws,2,sd) -res$vdrawsh.sd=apply(res$vdrawsh,2,sd) -res$vdraws.5=apply(res$vdraws,2,quantile,0.5) -res$vdrawsh.5=apply(res$vdrawsh,2,quantile,0.5) -res$vdraws.lower=apply(res$vdraws,2,quantile,q.lower) -res$vdraws.upper=apply(res$vdraws,2,quantile,q.upper) -res$vdrawsh.lower=apply(res$vdrawsh,2,quantile,q.lower) -res$vdrawsh.upper=apply(res$vdrawsh,2,quantile,q.upper) -res$q.lower=q.lower -res$q.upper=q.upper -res$modeltype=fit$modeltype - -class(res)="OpenBT_vartivity" - -return(res) -} - -#-------------------------------------------------- -#Sobol function -#-------------------------------------------------- - -sobol.openbt = function( -fit=NULL, -q.lower=0.025, -q.upper=0.975, -tc=2 -) -{ - -#-------------------------------------------------- -# params -if(is.null(fit)) stop("No fitted model specified!\n") -p=length(fit$xicuts) -m=fit$m -mh=fit$mh -nd=fit$nd -modelname=fit$modelname - - -#-------------------------------------------------- -#write out config file -fout=file(paste(fit$folder,"/config.sobol",sep=""),"w") -writeLines(c(fit$modelname,fit$xiroot, - paste(nd),paste(m), - paste(mh),paste(p),paste(fit$minx), - paste(fit$maxx),paste(tc)) ,fout) -close(fout) - - -#-------------------------------------------------- -#run Sobol program -cmdopt=100 #default to serial/OpenMP -runlocal=FALSE -cmd="openbtcli --conf" -if(Sys.which("openbtcli")[[1]]=="") # not installed in a global locaiton, so assume current directory - runlocal=TRUE - -if(runlocal) cmd="./openbtcli --conf" - -cmdopt=system(cmd) - -if(cmdopt==101) # MPI -{ - cmd=paste("mpirun -np ",tc," openbtsobol ",fit$folder,sep="") -} - -if(cmdopt==100) # serial/OpenMP -{ - if(runlocal) - cmd=paste("./openbtsobol ",fit$folder,sep="") - else - cmd=paste("openbtsobol ",fit$folder,sep="") -} - -system(cmd) -system(paste("rm -f ",fit$folder,"/config.sobol",sep="")) - - -#-------------------------------------------------- -#read in result -res=list() -draws=read.table(paste(fit$folder,"/",fit$modelname,".sobol",0,sep="")) -for(i in 2:tc) - draws=rbind(draws,read.table(paste(fit$folder,"/",fit$modelname,".sobol",i-1,sep=""))) -draws=as.matrix(draws) - - -labs=gsub("\\s+",",",apply(combn(1:p,2),2,function(zz) Reduce(paste,zz))) -res$vidraws=draws[,1:p] -res$vijdraws=draws[,(p+1):(p+p*(p-1)/2)] -res$tvidraws=draws[,(ncol(draws)-p):(ncol(draws)-1)] -res$vdraws=draws[,ncol(draws)] -res$sidraws=res$vidraws/res$vdraws -res$sijdraws=res$vijdraws/res$vdraws -res$tsidraws=res$tvidraws/res$vdraws -res$vidraws=as.matrix(res$vidraws) -colnames(res$vidraws)=paste("V",1:p,sep="") -res$vijdraws=as.matrix(res$vijdraws) -colnames(res$vijdraws)=paste("V",labs,sep="") -res$tvidraws=as.matrix(res$tvidraws) -colnames(res$tvidraws)=paste("TV",1:p,sep="") -res$vdraws=as.matrix(res$vdraws) -colnames(res$vdraws)="V" -res$sidraws=as.matrix(res$sidraws) -colnames(res$sidraws)=paste("S",1:p,sep="") -res$sijdraws=as.matrix(res$sijdraws) -colnames(res$sijdraws)=paste("S",labs,sep="") -res$tsidraws=as.matrix(res$tsidraws) -colnames(res$tsidraws)=paste("TS",1:p,sep="") -rm(draws) - -# summaries -res$msi=apply(res$sidraws,2,mean) -res$msi.sd=apply(res$sidraws,2,sd) -res$si.5=apply(res$sidraws,2,quantile,0.5) -res$si.lower=apply(res$sidraws,2,quantile,q.lower) -res$si.upper=apply(res$sidraws,2,quantile,q.upper) -names(res$msi)=paste("S",1:p,sep="") -names(res$msi.sd)=paste("S",1:p,sep="") -names(res$si.5)=paste("S",1:p,sep="") -names(res$si.lower)=paste("S",1:p,sep="") -names(res$si.upper)=paste("S",1:p,sep="") - -res$msij=apply(res$sijdraws,2,mean) -res$sij.sd=apply(res$sijdraws,2,sd) -res$sij.5=apply(res$sijdraws,2,quantile,0.5) -res$sij.lower=apply(res$sijdraws,2,quantile,q.lower) -res$sij.upper=apply(res$sijdraws,2,quantile,q.upper) -names(res$msij)=paste("S",labs,sep="") -names(res$sij.sd)=paste("S",labs,sep="") -names(res$sij.5)=paste("S",labs,sep="") -names(res$sij.lower)=paste("S",labs,sep="") -names(res$sij.upper)=paste("S",labs,sep="") - -res$mtsi=apply(res$tsidraws,2,mean) -res$tsi.sd=apply(res$tsidraws,2,sd) -res$tsi.5=apply(res$tsidraws,2,quantile,0.5) -res$tsi.lower=apply(res$tsidraws,2,quantile,q.lower) -res$tsi.upper=apply(res$tsidraws,2,quantile,q.upper) -names(res$mtsi)=paste("TS",1:p,sep="") -names(res$tsi.sd)=paste("TS",1:p,sep="") -names(res$tsi.5)=paste("TS",1:p,sep="") -names(res$tsi.lower)=paste("TS",1:p,sep="") -names(res$tsi.upper)=paste("TS",1:p,sep="") - - -res$q.lower=q.lower -res$q.upper=q.upper -res$modeltype=fit$modeltype - -class(res)="OpenBT_sobol" - -return(res) -} - -#-------------------------------------------------- -#Pareto function -#-------------------------------------------------- -# Pareto Front Multiobjective Optimization using 2 fitted BART models -mopareto.openbt = function( -fit1=NULL, -fit2=NULL, -q.lower=0.025, -q.upper=0.975, -tc=2 -) -{ - -#-------------------------------------------------- -# params -if(is.null(fit1) || is.null(fit2)) stop("No fitted models specified!\n") -#if(fit1$xicuts != fit2$xicuts) stop("Models not compatible\n") -if(fit1$nd != fit2$nd) stop("Models have different number of posterior samples\n") -p=length(fit1$xicuts) -m1=fit1$m -m2=fit2$m -mh1=fit1$mh -mh2=fit2$mh -nd=fit1$nd -modelname=fit1$modelname - - -#-------------------------------------------------- -#write out config file -fout=file(paste(fit1$folder,"/config.mopareto",sep=""),"w") -writeLines(c(fit1$modelname,fit2$modelname,fit1$xiroot, - fit2$folder, - paste(nd),paste(m1), - paste(mh1),paste(m2),paste(mh2),paste(p),paste(fit1$minx), - paste(fit1$maxx),paste(fit1$fmean),paste(fit2$fmean),paste(tc)) ,fout) -close(fout) - - -#-------------------------------------------------- -#run Pareto Front program -cmdopt=100 #default to serial/OpenMP -runlocal=FALSE -cmd="openbtcli --conf" -if(Sys.which("openbtcli")[[1]]=="") # not installed in a global locaiton, so assume current directory - runlocal=TRUE - -if(runlocal) cmd="./openbtcli --conf" - -cmdopt=system(cmd) - -if(cmdopt==101) # MPI -{ - cmd=paste("mpirun -np ",tc," openbtmopareto ",fit1$folder,sep="") -} - -if(cmdopt==100) # serial/OpenMP -{ - if(runlocal) - cmd=paste("./openbtmopareto ",fit1$folder,sep="") - else - cmd=paste("openbtmopareto ",fit1$folder,sep="") -} - -system(cmd) -system(paste("rm -f ",fit1$folder,"/config.mopareto",sep="")) - - -#-------------------------------------------------- -#read in result -res=list() -ii=1 -for(i in 1:tc) -{ - con=file(paste(fit1$folder,"/",fit1$modelname,".mopareto",i-1,sep="")) - open(con) - s=readLines(con,1) - while(length(s)>0) { - temp=as.numeric(unlist(strsplit(s," "))) - k=as.integer(temp[1]) - theta=matrix(0,ncol=k,nrow=2) - theta[1,]=temp[2:(2+k-1)] - theta[2,]=temp[(2+k):(2+2*k-1)] - a=matrix(0,nrow=p,ncol=k) - for(i in 1:p) a[i,]=temp[(2+(2+i-1)*k):(2+(2+i)*k-1)] - b=matrix(0,nrow=p,ncol=k) - for(i in 1:p) b[i,]=temp[(2+(2+p+i-1)*k):(2+(2+p+i)*k-1)] - entry=list() - entry[["theta"]]=theta - entry[["a"]]=a - entry[["b"]]=b - res[[ii]]=entry - ii=ii+1 - s=readLines(con,1) - } - close(con) -} - -# res=list() -# draws=read.table(paste(fit$folder,"/",fit$modelname,".sobol",0,sep="")) -# for(i in 2:tc) -# draws=rbind(draws,read.table(paste(fit$folder,"/",fit$modelname,".sobol",i-1,sep=""))) -# draws=as.matrix(draws) - - -# labs=gsub("\\s+",",",apply(combn(1:p,2),2,function(zz) Reduce(paste,zz))) -# res$vidraws=draws[,1:p] -# res$vijdraws=draws[,(p+1):(p+p*(p-1)/2)] -# res$tvidraws=draws[,(ncol(draws)-p):(ncol(draws)-1)] -# res$vdraws=draws[,ncol(draws)] -# res$sidraws=res$vidraws/res$vdraws -# res$sijdraws=res$vijdraws/res$vdraws -# res$tsidraws=res$tvidraws/res$vdraws -# res$vidraws=as.matrix(res$vidraws) -# colnames(res$vidraws)=paste("V",1:p,sep="") -# res$vijdraws=as.matrix(res$vijdraws) -# colnames(res$vijdraws)=paste("V",labs,sep="") -# res$tvidraws=as.matrix(res$tvidraws) -# colnames(res$tvidraws)=paste("TV",1:p,sep="") -# res$vdraws=as.matrix(res$vdraws) -# colnames(res$vdraws)="V" -# res$sidraws=as.matrix(res$sidraws) -# colnames(res$sidraws)=paste("S",1:p,sep="") -# res$sijdraws=as.matrix(res$sijdraws) -# colnames(res$sijdraws)=paste("S",labs,sep="") -# res$tsidraws=as.matrix(res$tsidraws) -# colnames(res$tsidraws)=paste("TS",1:p,sep="") -# rm(draws) - -# # summaries -# res$msi=apply(res$sidraws,2,mean) -# res$msi.sd=apply(res$sidraws,2,sd) -# res$si.5=apply(res$sidraws,2,quantile,0.5) -# res$si.lower=apply(res$sidraws,2,quantile,q.lower) -# res$si.upper=apply(res$sidraws,2,quantile,q.upper) -# names(res$msi)=paste("S",1:p,sep="") -# names(res$msi.sd)=paste("S",1:p,sep="") -# names(res$si.5)=paste("S",1:p,sep="") -# names(res$si.lower)=paste("S",1:p,sep="") -# names(res$si.upper)=paste("S",1:p,sep="") - -# res$msij=apply(res$sijdraws,2,mean) -# res$sij.sd=apply(res$sijdraws,2,sd) -# res$sij.5=apply(res$sijdraws,2,quantile,0.5) -# res$sij.lower=apply(res$sijdraws,2,quantile,q.lower) -# res$sij.upper=apply(res$sijdraws,2,quantile,q.upper) -# names(res$msij)=paste("S",labs,sep="") -# names(res$sij.sd)=paste("S",labs,sep="") -# names(res$sij.5)=paste("S",labs,sep="") -# names(res$sij.lower)=paste("S",labs,sep="") -# names(res$sij.upper)=paste("S",labs,sep="") - -# res$mtsi=apply(res$tsidraws,2,mean) -# res$tsi.sd=apply(res$tsidraws,2,sd) -# res$tsi.5=apply(res$tsidraws,2,quantile,0.5) -# res$tsi.lower=apply(res$tsidraws,2,quantile,q.lower) -# res$tsi.upper=apply(res$tsidraws,2,quantile,q.upper) -# names(res$mtsi)=paste("TS",1:p,sep="") -# names(res$tsi.sd)=paste("TS",1:p,sep="") -# names(res$tsi.5)=paste("TS",1:p,sep="") -# names(res$tsi.lower)=paste("TS",1:p,sep="") -# names(res$tsi.upper)=paste("TS",1:p,sep="") - - -# res$q.lower=q.lower -# res$q.upper=q.upper -# res$modeltype=fit$modeltype - -class(res)="OpenBT_mopareto" - -return(res) -} - - - -# Reweight predictions using output of influence.openbt(). -repredict.openbt<-function(dropid=NULL,pred=NULL,infl=NULL,idx=NULL) -{ - if(is.null(pred)) stop("Model prediction object required.\n") - if(is.null(infl)) stop("Model influence object required.\n") - if(class(pred)!="OpenBT_predict") stop("Model prediction object not recognized.\n") - if(class(infl)!="OpenBT_influence") stop("Model influence object not recognized.\n") - if(infl$method!="IS_GLOBAL") stop("Cannot reweight predictions with influence method ",infl$method,".\n") - if(is.null(dropid)) stop("No hold-out specified for reweighting.\n") - if(dropid<1) stop("Invalid dropid.\n") - if(dropid>infl$np) stop("Invalid dropid.\n") - - - #re-weight and return the predictions - res=list() - res$mdraws=pred$mdraws - if(is.null(idx)) { - for(i in 1:nrow(res$mdraws)) { - res$mdraws[i,]=res$mdraws[i,]*infl$w[i,dropid] - } - } - if(!is.null(idx) && length(idx>0)) { - for(i in 1:nrow(res$mdraws)) { - res$mdraws[i,idx]=res$mdraws[i,idx]*infl$w[i,dropid] - res$mdraws[i,-idx]=res$mdraws[i,-idx]*(1/nrow(res$mdraws)) - } - } - res$mdraws=res$mdraws*nrow(res$mdraws) - res$sdraws=pred$sdraws - res$mmean=apply(res$mdraws,2,mean) - res$smean=pred$smean - res$msd=apply(res$mdraws,2,sd) - res$ssd=pred$ssd - res$m.5=apply(res$mdraws,2,quantile,0.5) - res$m.lower=apply(res$mdraws,2,quantile,pred$q.lower) - res$m.upper=apply(res$mdraws,2,quantile,pred$q.upper) - res$s.5=pred$s.5 - res$s.lower=pred$s.lower - res$s.upper=pred$s.upper - res$pdraws=pred$pdraws - res$pmean=pred$pmean - res$psd=pred$psd - res$p.5=pred$p.5 - res$p.lower=pred$p.lower - res$p.upper=pred$p.upper - res$q.lower=pred$q.lower - res$q.upper=pred$q.upper - res$modeltype=pred$modeltype - - class(res)="OpenBT_predict" - return(res) -} - - -# Calculate various metrics of influence for a regression tree model. -influence.openbt<-function(x.infl,y.infl,fit=NULL,tc=2,method="IS_GLOBAL") -{ - if(is.null(fit)) stop("Model fit object required.\n") - if(class(fit)!="OpenBT_posterior") stop("Model fit object not recognized.\n") - - nd=fit$nd - np=nrow(x.infl) - pp=predict.openbt(fit=fit,x.test=x.infl,tc=tc) - - if(method=="IS_GLOBAL") { - # w is an nd by np matrix where the j_th column contains the weights for each - # nd posterior samples if (x_j,y_j) were removed from the dataset. - w=matrix(0,nrow=nd,ncol=np) - for(i in 1:nd) w[i,]=(2*pi)^(1/2)*pp$sdraws[i,]*exp(1/(2*pp$sdraws[i,])*(y.infl-pp$mdraws[i,])^2) - w=t(t(w)/apply(w,2,sum)) - - # Perhaps a simple indicator of which observation has a lot of influence: - infl=apply(w,2,max) - infl.ids=rep(0,nd) - for(i in 1:nd) infl.ids[i]=which(w[i,]==max(w[i,])) - } - - res=list() - res$nd=nd - res$np=np - res$x.infl=x.infl - res$y.infl=y.infl - res$w=w - res$infl=infl - res$infl.ids=infl.ids - res$method=method - - class(res)="OpenBT_influence" - - return(res) -} - - -summary.OpenBT_influence<-function(infl) -{ - cat("No summary method for object.\n") -} - -print.OpenBT_influence<-function(infl) -{ - cat("OpenBT Influence\n") - cat("metric: ",infl$method,"\n") - cat(infl$nd, " posterior samples.\n") - cat(infl$np, " observations.\n") - - if(infl$method=="IS_GLOBAL") { - idx=sort(infl$infl,decreasing=TRUE,index.return=TRUE)$ix - cat("\nTop 5 maximum weights: ", infl$infl[idx][1:5],"\n") - cat("\nTop 5 influential observations by weights: \n") - for(i in 1:5) { - cat("Input ",idx[i]," max.influence=",infl$infl[idx[i]], - " y=",infl$y.infl[idx[i]], " x=",infl$x.infl[idx[i],][1],"\n") - } - - tab=sort(table(infl$infl.ids),decreasing=TRUE) - cat("\nTop 5 influential observations by frequency: \n") - print(tab[1:5]) - } -} - -plot.OpenBT_influence<-function(infl) -{ - tab.ids=table(infl$infl.ids) - - par(mfrow=c(1,2)) - plot(1:infl$np,infl$infl,xlab="Observation Index",ylab="max.influence",type="h",lwd=2) - title(paste("metric: ",infl$method,sep="")) - plot(tab.ids,xlab="Observation Index",ylab="Frequency") - title(paste("metric: ",infl$method,sep="")) -} - - - -# Scan the trees in the posterior to extract tree properties -# Returns the mean trees as a list of lists in object mt -# and the variance trees as a list of lists in object st. -# The format is mt[[i]][[j]] is the jth posterior tree from the ith posterior -# sum-of-trees (ensemble) sample. -# The tree is encoded in 4 vectors - the node ids, the node variables, -# the node cutpoints and the node thetas. -openbt.scanpost<-function(post) -{ - fp=file(paste(post$folder,"/",post$modelname,".fit",sep=""),open="r") - if(scan(fp,what=integer(),nmax=1,quiet=TRUE) != post$nd) stop("Error scanning posterior\n") - if(scan(fp,what=integer(),nmax=1,quiet=TRUE) != post$m) stop("Error scanning posterior\n") - if(scan(fp,what=integer(),nmax=1,quiet=TRUE) != post$mh) stop("Error scanning posterior\n") - if(scan(fp,what=integer(),nmax=1,quiet=TRUE) != post$nd*post$m) stop("Error scanning posterior\n") - - # scan mean trees - numnodes=scan(fp,what=integer(),nmax=post$nd*post$m,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - ids=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - vars=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - cuts=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - thetas=scan(fp,what=double(),nmax=lenvec,quiet=TRUE) - - # scan var trees - if(scan(fp,what=integer(),nmax=1,quiet=TRUE) != post$nd*post$mh) stop("Error scanning posterior\n") - snumnodes=scan(fp,what=integer(),nmax=post$nd*post$mh,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - sids=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - svars=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - scuts=scan(fp,what=integer(),nmax=lenvec,quiet=TRUE) - lenvec=scan(fp,what=integer(),nmax=1,quiet=TRUE) - sthetas=scan(fp,what=double(),nmax=lenvec,quiet=TRUE) - - close(fp) - - # Now rearrange things into lists of lists so its easier to manipulate - mt=list() - ndx=2 - cs.numnodes=c(0,cumsum(numnodes)) - for(i in 1:post$nd) { - ens=list() - for(j in 1:post$m) - { - tree=list(id=ids[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - var=vars[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - cut=cuts[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - theta=thetas[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]]) - ens[[j]]=tree - ndx=ndx+1 - } - mt[[i]]=ens - } - - - st=list() - ndx=2 - cs.numnodes=c(0,cumsum(snumnodes)) - for(i in 1:post$nd) { - ens=list() - for(j in 1:post$mh) - { - tree=list(id=sids[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - var=svars[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - cut=scuts[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]], - theta=sthetas[(cs.numnodes[ndx-1]+1):cs.numnodes[ndx]]) - ens[[j]]=tree - ndx=ndx+1 - } - st[[i]]=ens - } - - return(list(mt=mt,st=st)) -} - - - -# Save a posterior tree fit from the tmp working directory -# into a local zip file given by [file].zip -# If not file option specified, uses [model name].zip as the file. -openbt.save<-function(post,fname=NULL) -{ - if(class(post)!="OpenBT_posterior") stop("Invalid object.\n") - - if(is.null(fname)) fname=post$modelname - if(substr(fname,nchar(fname)-3,nchar(fname))!=".obt") fname=paste(fname,".obt",sep="") - - save(post,file=paste(post$folder,"/post.RData",sep="")) - - zipr(fname,paste(post$folder,"/",list.files(post$folder),sep="")) - cat("Saved posterior to ",fname,"\n") -} - - -# Load a posterior tree fit from a zip file into a tmp working directory. -openbt.load<-function(fname) -{ - if(substr(fname,nchar(fname)-3,nchar(fname))!=".obt") fname=paste(fname,".obt",sep="") - - folder=tempdir(check=TRUE) - if(!dir.exists(folder)) dir.create(folder) - tmpsubfolder=tempfile(tmpdir="") - tmpsubfolder=substr(tmpsubfolder,5,nchar(tmpsubfolder)) - tmpsubfolder=paste("openbt",tmpsubfolder,sep="") - folder=paste(folder,"/",tmpsubfolder,sep="") - if(!dir.exists(folder)) dir.create(folder) - - unzip(fname,exdir=folder) - post=loadRData(paste(folder,"/post.RData",sep="")) - post$folder=folder - - return(post) -} - - -# This is so lame. Seriously. -loadRData <- function(fname) -{ - load(fname) - get(ls()[ls() != "fname"]) -} - - -print.OpenBT_posterior<-function(post) -{ - MODEL_BT=1 - MODEL_BINOMIAL=2 - MODEL_POISSON=3 - MODEL_BART=4 - MODEL_HBART=5 - MODEL_PROBIT=6 - MODEL_MODIFIEDPROBIT=7 - MODEL_MERCK_TRUNCATED=8 - - cat("OpenBT Posterior\n") - cat("Model type: ") - if(post$modeltype==MODEL_BART) - { - cat("Bayesian Additive Regression Trees model (BART)\n") - cat("k=",post$k,"\n") - cat("tau=",post$tau,"\n") - cat("lambda=",post$overalllambda,"\n") - cat("nu=",post$overallnu,"\n") - } - if(post$modeltype==MODEL_HBART) - { - cat("Heteroscedastic Bayesian Additive Regression Trees model (HBART)\n") - } - if(post$modeltype==MODEL_PROBIT) - { - cat("Dichotomous outcome model: Albert & Chib Probit (fixed)\n") - } - if(post$modeltype==MODEL_MODIFIEDPROBIT) - { - cat("Dichotomous outcome model: Modified Albert & Chib Probit\n") - } - if(post$modeltype==MODEL_MERCK_TRUNCATED) - { - cat("Truncated BART model\n") - } - - cat("ntree=", post$m, " \n",sep="") - cat("ntreeh=",post$mh," \n",sep="") - cat(post$nd," posterior draws.\n") - summary(post$xicuts) -} - -summary.OpenBT_posterior<-function(post) -{ - cat("No summary method for object.\n") -} - -print.OpenBT_predict<-function(pred) -{ - MODEL_BT=1 - MODEL_BINOMIAL=2 - MODEL_POISSON=3 - MODEL_BART=4 - MODEL_HBART=5 - MODEL_PROBIT=6 - MODEL_MODIFIEDPROBIT=7 - MODEL_MERCK_TRUNCATED=8 - - cat("OpenBT Prediction\n") - cat(ncol(pred$mdraws), " prediction locations.\n") - cat(nrow(pred$mdraws), " realizations.\n") - if(pred$modeltype==MODEL_PROBIT || pred$modeltype==MODEL_MODIFIEDPROBIT) - { - cat("Probability quantiles: ",pred$q.lower,",",pred$q.upper,"\n") - } - cat("Mean quantiles: ",pred$q.lower,",",pred$q.upper,"\n") - cat("Variance quantiles: ",pred$q.lower,",",pred$q.upper,"\n\n") -} - -summary.OpenBT_predict<-function(pred) -{ - cat("No summary method for object.\n") -} - -print.OpenBT_sobol<-function(sobol) -{ - cat("OpenBT Sobol Indices\n") - cat(ncol(sobol$vidraws), " variables.\n") - cat(nrow(sobol$vidraws), " realizations.\n") -} - -summary.OpenBT_sobol<-function(sobol) -{ - cat("Summary of Posterior Sobol Sensitivity Indices\n") - - cat("Expected Sobol Indices (Mean)\n") - print(sobol$msi) - print(sobol$mtsi) - print(sobol$msij) - - cat("\nStd. Dev. of Sobol Indices (Mean)\n") - print(sobol$msi.sd) - print(sobol$tsi.sd) - print(sobol$sij.sd) -} - -plot.OpenBT_sobol<-function(sobol) -{ - par(mfrow=c(3,1)) - boxplot(sobol$sidraws,ylab="Sobol Sensitivity",main="First Order Sobol Indices",xlab="Variables") - boxplot(sobol$sijdraws,ylab="Sobol Sensitivity",main="Two-way Sobol Indices",xlab="Variables") - boxplot(sobol$tsidraws,ylab="Sobol Sensitivity",main="Total Sobol Indices",xlab="Variables") -} - -print.OpenBT_vartivity<-function(vartivity) -{ - cat("OpenBT Variable Activity\n") - cat(ncol(vartivity$vdraws), " variables.\n") - cat(nrow(vartivity$vdraws), " realizations.\n") -} - -summary.OpenBT_vartivity<-function(vartivity) -{ - cat("Summary of Posterior Variable Activity\n") - - p=length(vartivity$mvdraws) - if(p<11) - { - cat("Expected Variable Activity (Mean)\n") - mean.vartivity=vartivity$mvdraws - ix=sort(mean.vartivity,index.return=TRUE,decreasing=TRUE)$ix - mean.vartivity=round(mean.vartivity[ix],2) - names(mean.vartivity)=ix - print(mean.vartivity) - cat("Expected Variable Activity (Variance)\n") - sd.vartivity=vartivity$mvdrawsh - ix=sort(sd.vartivity,index.return=TRUE,decreasing=TRUE)$ix - sd.vartivity=round(sd.vartivity[ix],2) - names(sd.vartivity)=ix - print(sd.vartivity) - } - else - { - cat("Expected Variable Activity (Mean)\n") - mean.vartivity=vartivity$mvdraws - ix=sort(mean.vartivity,index.return=TRUE,decreasing=TRUE)$ix - mean.vartivity=round(mean.vartivity[ix],2) - rest=sum(mean.vartivity[11:p]) - mean.vartivity=mean.vartivity[1:11] - mean.vartivity[11]=rest - names(mean.vartivity)=c(ix[1:10],"...") - print(mean.vartivity) - cat("Expected Variable Activity (Variance)\n") - sd.vartivity=vartivity$mvdrawsh - ix=sort(sd.vartivity,index.return=TRUE,decreasing=TRUE)$ix - sd.vartivity=round(sd.vartivity[ix],2) - rest=sum(sd.vartivity[11:p]) - sd.vartivity=sd.vartivity[1:11] - sd.vartivity[11]=rest - names(sd.vartivity)=c(ix[1:10],"...") - print(sd.vartivity) - } -} - -plot.OpenBT_vartivity<-function(vartivity) -{ - par(mfrow=c(1,2)) - yrange=c(0,max(max(vartivity$vdraws),max(vartivity$vdrawsh))) - boxplot(vartivity$vdraws,ylab="% node splits",main="Mean Trees",xlab="Variables",ylim=yrange) - boxplot(vartivity$vdrawsh,ylab="% node splits",main="Variance Trees",xlab="Variables",ylim=yrange) -} - -summary.OpenBT_cutinfo<-function(xi) -{ - p=length(xi) - cat("Number of variables: ",p,"\n") - cat("Number of cutpoints per variable\n") - for(i in 1:p) - { - cat("Variable ",i,": ",length(xi[[i]])," cutpoints\n") - } -} - -print.OpenBT_cutinfo<-function(xi) -{ - summary.OpenBT_cutinfo(xi) -} - -# Takes the n x p design matrix and a scalar or vector of number of cutpoints -# per variable, returns a BARTcutinfo object with variables/cutpoints initalized. -makecuts<-function(x,numcuts) -{ - p=ncol(x) - if(length(numcuts)==1) - { - numcuts=rep(numcuts,p) - } - else if(ncol(x) != length(numcuts)) - { - cat("Number of variables does not equal length of numcuts vector!\n") - return(0) - } - - xi=vector("list",p) - minx=apply(x,2,min) - maxx=apply(x,2,max) - for(i in 1:p) - { - xinc=(maxx[i]-minx[i])/(numcuts[i]+1) - xi[[i]]=(1:numcuts[i])*xinc+minx[i] - } - - class(xi)="OpenBT_cutinfo" - return(xi) -} - -# Takes an existing OpenBT_cutinfo object xi and the particular variable to update, id, -# and the vector of cutpoints to manually assign to that variable, updates and returns -# the modified OpenBT_cutinfo object. -setvarcuts<-function(xi,id,cutvec) -{ - p=length(xi) - if(id>p || id<1) - { - cat("Invalid variable specified\n") - return(0) - } - xi[[id]]=cutvec - return(xi) -} - - -#-------------------------------------------------- -#Get model mixing weights -#-------------------------------------------------- -mixingwts.openbt = function( - fit=NULL, - x.test=NULL, - tc=2, - numwts=NULL, - q.lower=0.025, - q.upper=0.975 -) -{ - # model type definitions - MODEL_BT=1 - MODEL_BINOMIAL=2 - MODEL_POISSON=3 - MODEL_BART=4 - MODEL_HBART=5 - MODEL_PROBIT=6 - MODEL_MODIFIEDPROBIT=7 - MODEL_MERCK_TRUNCATED=8 - MODEL_MIXBART=9 - - model_types = c("mixbart", "mix_emulate") - #-------------------------------------------------- - # params - if(is.null(fit)) stop("No fitted model specified!\n") - if(is.null(x.test)) stop("No prediction points specified!\n") - if(is.null(numwts)){stop("Missing number of model weights parameter numwts.\n")} - if(!(fit$model %in% model_types)){stop("Wrong model type! This function is for mixbart.\n")} - #if(fit$modeltype!=MODEL_MIXBART){stop("Wrong model type! This function is for mixbart.\n")} - - nslv=tc - x.test=as.matrix(x.test) - p=ncol(x.test) - n=nrow(x.test) - xwroot="xw" - fitroot=".fit" - if(fit$model == "mixbart"){m = fit$m; mh = fit$mh}else{m = fit$mix_model_args$ntree; mh = fit$mix_model_args$ntreeh} - if(fit$model != "mixbart"){fitroot = ".fitmix"} - #-------------------------------------------------- - #write out config file - fout=file(paste(fit$folder,"/config.mxwts",sep=""),"w") - writeLines(c(fit$modelname,fit$modeltype,fit$xiroot,xwroot, - fitroot,paste(fit$nd),paste(m), - paste(mh),paste(p),paste(numwts),paste(tc)), fout) - close(fout) - - #-------------------------------------------------- - #write out data subsets - #folder=paste(".",fit$modelname,"/",sep="") - xlist=split(as.data.frame(x.test),(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) write(t(xlist[[i]]),file=paste(fit$folder,"/",xwroot,i-1,sep="")) - for(i in 1:p) write(fit$xicuts[[i]],file=paste(fit$folder,"/",fit$xiroot,i,sep="")) - - #-------------------------------------------------- - #run prediction program - cmdopt=100 #default to serial/OpenMP - runlocal=FALSE - cmd="openbtcli --conf" - if(Sys.which("openbtcli")[[1]]=="") # not installed in a global location, so assume current directory - runlocal=TRUE - - if(runlocal) cmd="./openbtcli --conf" - - cmdopt=system(cmd) - - if(cmdopt==101) # MPI - { - cmd=paste("mpirun -np ",tc," openbtmixingwts ",fit$folder,sep="") - } - - if(cmdopt==100) # serial/OpenMP - { - if(runlocal) - cmd=paste("./openbtmixingwts ",fit$folder,sep="") - else - cmd=paste("openbtmixingwts ",fit$folder,sep="") - } - - system(cmd) - system(paste("rm -f ",fit$folder,"/config.mxwts",sep="")) - - - #-------------------------------------------------- - #format and return - res=list() - wt_list = list() - mean_matrix = sd_matrix = ub_matrix = lb_matrix = med_matrix = matrix(0, nrow = n, ncol = numwts) - - #Get the file names for the model weights - #--file name for model weight j using processor p is ".wjdrawsp" - for(j in 1:numwts){ - #Get the files for weight j - tagname = paste0(".w", j,"draws*") - fnames=list.files(fit$folder,pattern=paste(fit$modelname,tagname,sep=""),full.names=TRUE) - - #Bind the posteriors for weight j across all x points -- npost X n data - wt_list[[j]] = do.call(cbind,sapply(fnames,data.table::fread)) - - #Now populate the summary stat matrices -- n X k matrices - mean_matrix[,j] = apply(wt_list[[j]], 2, mean) - sd_matrix[,j] = apply(wt_list[[j]], 2, sd) - med_matrix[,j] = apply(wt_list[[j]], 2, median) - lb_matrix[,j] = apply(wt_list[[j]], 2, quantile,q.lower) - ub_matrix[,j] = apply(wt_list[[j]], 2, quantile,q.upper) - } - - #Save the list of posterior draws -- each list element is an npost X n dataframe - res$wdraws = wt_list - - #Get model mixing results - res$wmean=mean_matrix - res$wsd=sd_matrix - res$w.5=med_matrix - res$w.lower=lb_matrix - res$w.upper=ub_matrix - - res$q.lower=q.lower - res$q.upper=q.upper - res$modeltype=fit$modeltype - - class(res)="OpenBT_mixingwts" - return(res) -} - - -#---------------------------------------------- -# Model mixing and emulation interface -#---------------------------------------------- -openbt.mix_emulate = function( -mix_model_data = list(y_train = NULL, x_train = NULL), -emu_model_data = list('model1'=list(z_train = NULL, x_train = NULL),'model2'=list(z_train = NULL, x_train = NULL)), -mix_model_args = list('ntree'=10,'ntreeh'=1,'k'=2,'overallnu'=10,'overallsd'=NA,'power'=2.0,'base'=0.95,'powerh'=NA,'baseh'=NA), -emu_model_args = matrix(c(100,1,2,10,NA,2.0,0.95,NA,NA), nrow = 2, ncol = 9,byrow = TRUE, - dimnames = list(paste0('model',1:2),c('ntree','ntreeh','k','overallnu','overallsd','power','base','powerh','baseh'))), -discrep_model_args = list('k' = 10, 'beta0' = NA), -ndpost=1000, nskip=100,nadapt=1000,adaptevery=100, -pbd=.7, -pb=.5, -stepwpert=.1, -probchv=.1, -minnumbot=5, -tc=2, -printevery=100, -numcut=100, -xicuts=NULL, -sigmav_list=NULL, -chv_list = NULL, -summarystats = FALSE, -model=NULL, -modelname="model" -#sigmav=rep(1,length(y.train)), -#chv = cor(x.train,method="spearman"), -) -{ -#-------------------------------------------------- -# model type definitions and check default arguments -modeltype=0 -MODEL_MIX_EMULATE=1 #Full problem -MODEL_MIX_ORTHOGONAL=2 # Full problem with orthogonal discrepancy - -modeltype_list = c('mix_emulate','mix_orthogonal') - -if(is.null(model)){ - stop(cat("Enter a model type. Valid types include:\n", paste(modeltype_list,collapse = ', '))) -} - -if(!(model %in% modeltype_list)){ - stop(cat("Invalid model type. Valid types include:\n", paste(modeltype_list,collapse = ', '))) -} - -if(model == 'mix_emulate'){ - modeltype = MODEL_MIX_EMULATE - if(is.null(mix_model_args$overallsd)) ntreeh=1 -} - -#-------------------------------------------------- -# Define terms -nd = ndpost -burn = nskip -nummodels = length(emu_model_data) - -#-------------------------------------------------- -# Process the data -# Model mixing data -if(is.null(mix_model_data$x_train) || is.null(mix_model_data$y_train)){ - stop('Invalid model mixing inputs. Either x_train or y_train is NULL.') -} - -# Cast x_train data to matrix -if(!is.matrix(mix_model_data$x_train)){ - mix_model_data$x_train = matrix(is.null(mix_model_data$x_train), ncol = 1) -} - -# Define terms -n = nrow(mix_model_data$x_train) -p = ncol(mix_model_data$x_train) -ymean = mean(mix_model_data$y_train) - -# Emulation data -nc_vec = 0 -pc_vec = 0 -for(l in 1:nummodels){ - - if(is.null(emu_model_data[[l]]$x_train) || is.null(emu_model_data[[l]]$z_train)){ - stop(paste0('Invalid emulation inputs. Either x_train or y_train is NULL in model ', l,'.')) - } - - # Cast the x_train data to matrix - if(!is.matrix(mix_model_data$x_train)){ - emu_model_data[[l]]$x_train = matrix(is.null(emu_model_data[[l]]$x_train), ncol = 1) - } - - # Define terms - nc_vec[l] = nrow(emu_model_data[[l]]$x_train) - pc_vec[l] = ncol(emu_model_data[[l]]$x_train) - -} - -# Get means of the model runs and mean center the data -zmean_vec = 0 -for(l in 1:nummodels){ - zmean_vec[l] = mean(emu_model_data[[l]]$z_train) - emu_model_data[[l]]$z_train = emu_model_data[[l]]$z_train - zmean_vec[l] -} - -#-------------------------------------------------- -# cutpoints -# get joint input matrix -col_list1 = colnames(mix_model_data$x_train) -col_list2 = unique(unlist(lapply(emu_model_data, function(x) colnames(x$x_train)))) - -if(sum(sort(col_list1) != sort(col_list2))>0){ - stop(cat("The input variables in model mixing differ from the input variables in emulation.", - "\nModel Mixing Inputs: \n", col_list1, - "\nAll Emulation Inputs: \n", col_list2)) -} - -# Create a master list of inputs -- includes all from field obs and computer models -x_matrix = mix_model_data$x_train -xc_col_list = list() -for(l in 1:nummodels){ - # get matrix and Fill columns that are not present with NA - x_new = matrix(0, nrow = nc_vec[l], ncol = p, dimnames = list(NULL, col_list1)) - xc_cols = 0 - ind = 1 - for(j in 1:length(col_list1)){ - cname = col_list1[j] - if(cname %in% colnames(emu_model_data[[l]]$x_train)){ - # Add the corresponding data to the x_matrix - x_new[,cname] = emu_model_data[[l]]$x_train[,cname] - # Keep track of which columns are in the inputs for the lth computer model - xc_cols[ind] = j - ind = ind + 1 - }else{ - # If the predictor is not in the input space of lth computer model, then just use NA in the x_matrix - x_new[,cname] = rep(NA, nc_vec[l]) - } - } - xc_col_list = append(xc_col_list, list(xc_cols)) - x_matrix = rbind(x_matrix, x_new) -} -names(xc_col_list) = paste0('model',1:nummodels) - -# Use x_matrix to create cutpoints -if(!is.null(xicuts)){ - xi=xicuts -}else{ - xi=vector("list",p) - minx_temp=apply(t(x_matrix),1,function(x) min(x, na.rm = TRUE)) - maxx_temp=apply(t(x_matrix),1,function(x) max(x, na.rm = TRUE)) - - maxx = round(maxx_temp,1) + ifelse((round(maxx_temp,1)-maxx_temp)>0,0,0.1) - minx = round(minx_temp,1) - ifelse((minx_temp - round(minx_temp,1))>0,0,0.1) - for(i in 1:p){ - xinc=(maxx[i]-minx[i])/(numcut+1) - xi[[i]]=(1:numcut)*xinc+minx[i] - } -} - -#------------------------------------------------ -# Priors -#------------------------------------------------ -# Model Mixing -# -- Terminal node parameters -rgy = range(mix_model_data$y_train) -m = mix_model_args$ntree -k = mix_model_args$k - -tau_disc = (rgy[2]-rgy[1])/(2*sqrt(m)*discrep_model_args$k) -tau_wts = (1)/(2*sqrt(m)*k) -beta_wts = 1/(2*m) -if(is.na(discrep_model_args$beta0)){ - beta_disc = mean(mix_model_data$y_train)/m -}else{ - beta_disc = discrep_model_args$beta0/m -} - -# -- Error variance -if(is.na(mix_model_args$overallsd)){ - mix_model_args$overallsd = sd(mix_model_data$y_train) -} -overalllambda_mix = mix_model_args$overallsd^2 -overallnu_mix = mix_model_args$overallnu - -# -- Tree prior (mean and variance trees) -power_mix = mix_model_args$power -base_mix = mix_model_args$base -powerh_mix = mix_model_args$powerh -baseh_mix = mix_model_args$baseh - -if(is.na(powerh_mix)){ - powerh_mix=power_mix -} -if(is.na(baseh_mix)){ - baseh_mix=base_mix -} - -# Emulation with BART -tau_emu = 0 -base_emu = baseh_emu = 0 -power_emu = powerh_emu = 0 -overalllambda_emu = overallnu_emu = 0 -for(l in 1:nummodels){ - # -- Terminal node parameters - m = emu_model_args[l,'ntree'] - k = emu_model_args[l,'k'] - rgz = range(emu_model_data[[l]]$z_train) - tau_emu[l] = (rgz[2]-rgz[1])/(2*sqrt(m)*k) - - # -- Error variance - if(is.na(emu_model_args[l,'overallsd'])){ - emu_model_args[l,'overallsd'] = sd(emu_model_data[[l]]$z_train) - } - overalllambda_emu[l] = emu_model_args[l,'overallsd']^2 - overallnu_emu[l] = emu_model_args[l,'overallnu'] - - # -- Tree prior (mean and variance trees) - power_emu[l] = emu_model_args[l,'power'] - base_emu[l] = emu_model_args[l,'base'] - powerh_emu[l] = emu_model_args[l,'powerh'] - baseh_emu[l] = emu_model_args[l,'baseh'] - - if(is.na(powerh_emu[l])) { - powerh_emu[l] = power_emu[l] - } - if(is.na(baseh_emu[l])) { - baseh_emu[l]=base_emu[l] - } -} - -#-------------------------------------------------- -# Birth and death probability -pbdh=pbd -pbh=pb -if(length(pbd)>1) { - pbdh=pbd[2] - pbd=pbd[1] -} -if(length(pb)>1) { - pbh=pb[2] - pb=pb[1] -} - - -#-------------------------------------------------- -# Process other arguments -#-------------------------------------------------- -stepwperth=stepwpert -if(length(stepwpert)>1) { - stepwperth=stepwpert[2] - stepwpert=stepwpert[1] -} - -probchvh=probchv -if(length(probchv)>1) { - probchvh=probchv[2] - probchv=probchv[1] -} - -minnumboth=minnumbot -if(length(minnumbot)>1) { - minnumboth=minnumbot[2] - minnumbot=minnumbot[1] -} - -# Set default argument for the sigmavs -if(is.null(sigmav_list)){ - for(l in 0:nummodels){ - sigmav_n = ifelse(l == 0, n, nc_vec[l]+n) - sigmav_list[[l+1]] = rep(1,sigmav_n) - } -}else{ - if(!is.list(sigmav_list)){ - stop("Invalid data type: sigmav_list must be a list object with length K+1.") - } - for(l in 0:nummodels){ - sigmav_n = ifelse(l == 0, n, nc_vec[l]+n) - if(length(sigmav_list[[l+1]]) != sigmav_n){ - stop(cat("Invalid sigmav_list entry at item",l+1,"\n Length of sigmav_list[[l]] is incorrect. Required lengths are listed below: \n", - "\t Mixing (l=1): ", n, " (number of field obs) \n", - "\t Emulation (l>1): ", n + nc_vec[l+1], " (number of field obs + number of model runs))")) - } - - } -} - -# Set the default argument for the chvs -if(is.null(chv_list)){ - for(l in 0:nummodels){ - if(l == 0){ - chv_data = mix_model_data$x_train - }else{ - xmixtemp = mix_model_data$x_train[,unlist(xc_col_list[l])] - if(length(unlist(xc_col_list[l])) == 1){xmixtemp = matrix(xmixtemp,ncol = 1)} - chv_data = rbind(emu_model_data[[l]]$x_train, xmixtemp) - } - chv_list[[l+1]] = cor(chv_data,method="spearman") - } -}else{ - if(!is.list(chv_list)){ - stop("Invalid data type: chv_list must be a list object with length K+1.") - } - for(l in 0:nummodels){ - chv_p = ifelse(l == 0, p, pc_vec[l]) - if(chv_list[[l+1]] != chv_p){ - stop(paste0("Invalid chv_list entry at item k = ",l+1,". Number of rows & columns in chv_list[[k]] must match", - " the number of columns in the design matrix for emulator k-1. If k = 1, then the number of rows & columns in", - " chv_list[[1]] must match the number of columns in the design matrix for the field observations.")) - } - } -} - -#-------------------------------------------------- -# Print statements -#-------------------------------------------------- -if(modeltype==MODEL_MIX_EMULATE){ - cat("Model: KOH Bayesian Additive Model Mixing Trees\n") -} -if(modeltype==MODEL_MIX_ORTHOGONAL){ - cat("Model: Orthogonal Bayesian Additive Model Mixing Trees\n") -} - -#-------------------------------------------------- -#write out config file -#-------------------------------------------------- -# Field data roots -yroots="yf" -xroots=c("xf",paste0("xc",1:nummodels)) -zroots=paste0("zc",1:nummodels) -sroots=c("sf",paste0("sc",1:nummodels)) -chgvroots=c("chgvf",paste0("chgvc",1:nummodels)) -idroots="id" - -# cut points root -xiroot="xi" - -# Design column numbers for computer models -xc_design_cols = 0 -h = 1 -# Format -- number of cols for model 1, col numbers for model 1,...,number of cols for model K, col numbers for model K -for(l in 1:nummodels){ - xc_design_cols[h] = paste(pc_vec[l]) - xc_design_cols[(h+1):(h+pc_vec[l])] = paste(xc_col_list[[l]]) - h = h+pc_vec[l] + 1 -} - -# Flatten the model arguments -m_vec = paste(c(mix_model_args$ntree, emu_model_args[,'ntree'])) -mh_vec = paste(c(mix_model_args$ntreeh, emu_model_args[,'ntreeh'])) -tau_vec = paste(c(tau_disc, tau_wts, tau_emu)) #Two mixing parameters, K emu parameters -beta_vec = paste(c(beta_disc, beta_wts)) #Only mixing parameters -base_vec = paste(c(base_mix, base_emu)) -baseh_vec = paste(c(baseh_mix, baseh_emu)) -power_vec = paste(c(power_mix, power_emu)) -powerh_vec = paste(c(powerh_mix, powerh_emu)) -lambda_vec = paste(c(overalllambda_mix, overalllambda_emu)) -nu_vec = paste(c(overallnu_mix, overallnu_emu)) -means_vec = c(ymean,zmean_vec) - -# Bind all of the K+1 dimensional vectors together into one matrix -# Then flatten by row -- this makes things easier when reading in the data in c++ -# This way, we read in all data for mixing, then read in the data for each emulator one at a time -info_matrix = data.frame(xroots, yzroots = c(yroots,zroots),sroots,chgvroots,means = paste(means_vec), - m_vec, mh_vec, base_vec, baseh_vec, power_vec, powerh_vec, lambda_vec, nu_vec) - -info_vec = unlist(as.vector(t(info_matrix))) - -# Creating temp directory and config file -folder=tempdir(check=TRUE) -if(!dir.exists(folder)) dir.create(folder) -tmpsubfolder=tempfile(tmpdir="") -tmpsubfolder=substr(tmpsubfolder,5,nchar(tmpsubfolder)) -tmpsubfolder=paste("openbt",tmpsubfolder,sep="") -folder=paste(folder,"/",tmpsubfolder,sep="") -if(!dir.exists(folder)) dir.create(folder) -fout=file(paste(folder,"/config",sep=""),"w") - -# Write lines to the config file fout -# writeLines(c(paste(modeltype),paste(nummodels), -# xroots,yroot,zroots,sroots,chgvroots,paste(zmean_vec),xc_design_cols, -# paste(nd),paste(burn),paste(nadapt),paste(adaptevery), -# m_vec,mh_vec,tau_vec,beta_vec,lambda_vec,nu_vec,base_vec,power_vec,baseh_vec,powerh_vec, -# paste(pbd),paste(pb),paste(pbdh),paste(pbh),paste(stepwpert),paste(stepwperth), -# paste(probchv),paste(probchvh),paste(minnumbot),paste(minnumboth), -# paste(printevery),paste(xiroot),paste(tc),paste(modelname),paste(summarystats)),fout) - -writeLines(c(paste(modeltype),paste(nummodels),info_vec,xc_design_cols, - paste(nd),paste(burn),paste(nadapt),paste(adaptevery), - tau_vec,beta_vec, - paste(pbd),paste(pb),paste(pbdh),paste(pbh),paste(stepwpert),paste(stepwperth), - paste(probchv),paste(probchvh),paste(minnumbot),paste(minnumboth), - paste(printevery),paste(xiroot),paste(tc),paste(modelname),paste(summarystats)),fout) -close(fout) - -#-------------------------------------------------- -#write out data subsets -#-------------------------------------------------- -nslv=tc-1 -# Model Mixing data -ylist=split(mix_model_data$y_train,(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(ylist[[i]],file=paste(folder,"/",yroots,i,sep="")) - -xlist=split(as.data.frame(mix_model_data$x_train),(seq(n)-1) %/% (n/nslv)) -for(i in 1:nslv) write(t(xlist[[i]]),file=paste(folder,"/",xroots[1],i,sep="")) - -# Emulation Data -#idlist = vector(mode='list', length = nslv) -for(l in 1:nummodels){ - zlist=split(emu_model_data[[l]]$z_train,(seq(nc_vec[l])-1) %/% (nc_vec[l]/nslv)) - for(i in 1:nslv) write(zlist[[i]],file=paste(folder,"/",zroots[l],i,sep="")) - - xlist=split(as.data.frame(emu_model_data[[l]]$x_train),(seq(nc_vec[l])-1) %/% (nc_vec[l]/nslv)) - for(i in 1:nslv) write(t(xlist[[i]]),file=paste(folder,"/",xroots[l+1],i,sep="")) -} - -# Write the ids per mpi -#for(i in 1:nslv) write(t(idlist[[i]]),file=paste(folder,"/",idroots,i,sep="")) - -# Variance and rotation data -for(l in 0:nummodels){ - ntemp = ifelse(l==0, n, nc_vec[l]+n) - slist=split(sigmav_list[[l+1]],(seq(ntemp)-1) %/% (ntemp/nslv)) - for(i in 1:nslv) write(slist[[i]],file=paste(folder,"/",sroots[l+1],i,sep="")) - - chv_list[[l+1]][is.na(chv_list[[l+1]])]=0 # if a var as 0 levels it will have a cor of NA so we'll just set those to 0. - write(chv_list[[l+1]],file=paste(folder,"/",chgvroots[l+1],sep="")) -} - -# Cutpoints -for(i in 1:p) write(xi[[i]],file=paste(folder,"/",xiroot,i,sep="")) -rm(chv_list) - -#-------------------------------------------------- -#run program -#-------------------------------------------------- -cmdopt=100 #default to serial/OpenMP -runlocal=FALSE -cmd="openbtmixing --conf" -if(Sys.which("openbtmixing")[[1]]=="") # not installed in a global location, so assume current directory - runlocal=TRUE - -if(runlocal) cmd="./openbtmixing --conf" - -cmdopt=system(cmd) - -if(cmdopt==101) # MPI -{ - cmd=paste("mpirun -np ",tc," openbtmixing ",folder,sep="") -} - -if(cmdopt==100) # serial/OpenMP -{ - if(runlocal) - cmd=paste("./openbtmixing ",folder,sep="") - else - cmd=paste("openbtmixing ",folder,sep="") -} - -system(cmd) -#system(paste("rm -f ",folder,"/config",sep="")) - -# Collect results -res=list() -res$modeltype=modeltype; res$model=model -res$xroot=xroots; res$yroot=yroots; res$zroots=zroots;res$sroot=sroots; res$chgvroot=chgvroots; res$xc_col_list=xc_col_list -res$means = means_vec; res$nummodels = nummodels; -res$nd=nd; res$burn=burn; res$nadapt=nadapt; res$adaptevery=adaptevery; -res$mix_model_args = mix_model_args; res$emu_model_args = emu_model_args; -res$pbd=pbd; res$pb=pb; res$pbdh=pbdh; res$pbh=pbh; res$stepwpert=stepwpert; res$stepwperth=stepwperth -res$probchv=probchv; res$probchvh=probchvh; res$minnumbot=minnumbot; res$minnumboth=minnumboth -res$printevery=printevery; res$xiroot=xiroot; res$minx=minx; res$maxx=maxx; -res$summarystats=summarystats; res$tc=tc; res$modelname=modelname -class(xi)="OpenBT_cutinfo" -res$xicuts=xi -res$folder=folder -class(res)="OpenBT_posterior" - -#res$k=k_vec; res$m=m_vec; res$mh=mh_vec;res$tau=tau_vec; res$beta_vec=beta_vec; res$overalllambda=lambda_vec; res$overallnu=nu_vec; -#res$base=base_vec; res$power=power_vec; res$baseh=baseh_vec; res$powerh=powerh_vec - -return(res) - -} - -# Predict function for mixing and emulation -openbt.predict_mix_emulate = function(fit=NULL, x_test=NULL,tc=2,q.lower=0.025,q.upper=0.975){ - # model type definitions - MODEL_MIX_EMULATE=1 #Full problem - MODEL_MIX_ORTHOGONAL=2 # Full problem with orthogonal discrepancy - - #-------------------------------------------------- - # Define objects - if(is.null(fit)) stop("No fitted model specified!\n") - if(is.null(x_test)) stop("No prediction points specified for !\n") - nslv=tc - means_vec=fit$means - p=ncol(x_test) - n=nrow(x_test) - nummodels = fit$nummodels - xproot = 'xp' - - x_test=as.matrix(x_test) - x_col_list = fit$xc_col_list - - #-------------------------------------------------- - # Flatten any vector arguments that need to be passed to pred - # Design column numbers for computer models -- flattens the x_col_list - xc_design_cols = 0 - h = 1 - # Format -- number of cols for model 1, col numbers for model 1,...,number of cols for model K, col numbers for model K - for(l in 1:nummodels){ - pc = length(x_col_list[[l]]) - xc_design_cols[h] = paste(pc) - xc_design_cols[(h+1):(h+pc)] = paste(fit$xc_col_list[[l]]) - h = h + pc + 1 - } - - # Flatten the m and mh vectors - mvec = fit$mix_model_args$ntree - mvec[2:(nummodels+1)] = fit$emu_model_args[,'ntree'] - mhvec = fit$mix_model_args$ntreeh - mhvec[2:(nummodels+1)] = fit$emu_model_args[,'ntreeh'] - - # Create an info matrix and vector of arguments that are required for the individual models - info_matrix = data.frame(means = paste(means_vec),mvec, mhvec) - info_vec = unlist(as.vector(t(info_matrix))) - - #-------------------------------------------------- - #write out config file - fout=file(paste(fit$folder,"/config.pred",sep=""),"w") - writeLines(c(fit$modelname,fit$modeltype,fit$xiroot,xproot, - paste(fit$nd),paste(nummodels),paste(p),paste(tc), - info_vec,xc_design_cols), fout) - close(fout) - - #-------------------------------------------------- - #write out data subsets - #folder=paste(".",fit$modelname,"/",sep="") - xlist=split(as.data.frame(x_test),(seq(n)-1) %/% (n/nslv)) - for(i in 1:nslv) write(t(xlist[[i]]),file=paste(fit$folder,"/",xproot,i-1,sep="")) - for(i in 1:p) write(fit$xicuts[[i]],file=paste(fit$folder,"/",fit$xiroot,i,sep="")) - - #-------------------------------------------------- - #run prediction program - cmdopt=100 #default to serial/OpenMP - runlocal=FALSE - cmd="openbtcli --conf" - if(Sys.which("openbtcli")[[1]]=="") # not installed in a global location, so assume current directory - runlocal=TRUE - - if(runlocal) cmd="./openbtcli --conf" - - cmdopt=system(cmd) - - if(cmdopt==101) # MPI - { - cmd=paste("mpirun -np ",tc," openbtmixingpred ",fit$folder,sep="") - } - - if(cmdopt==100) # serial/OpenMP - { - if(runlocal) - cmd=paste("./openbtmixingpred ",fit$folder,sep="") - else - cmd=paste("openbtmixingpred ",fit$folder,sep="") - } - - #cmd=paste("mpirun -np ",tc," openbtpred",sep="") - #cat(cmd) - system(cmd) - system(paste("rm -f ",fit$folder,"/config.pred",sep="")) - - #-------------------------------------------------- - #format and return - res_model=vector(mode = 'list', length = nummodels+2) - names(res_model) = c("mixmodel", paste0("emulator",1:nummodels),"Information") - res = vector(mode = 'list', length = 2) - names(res) = c('mdraws', 'sdraws') - - fnames=list.files(fit$folder,pattern=paste(fit$modelname,".mdraws*",sep=""),full.names=TRUE) - res$mdraws=do.call(cbind,sapply(fnames,data.table::fread)) - fnames=list.files(fit$folder,pattern=paste(fit$modelname,".sdraws*",sep=""),full.names=TRUE) - res$sdraws=do.call(cbind,sapply(fnames,data.table::fread)) - - #Separate results into mixing vs emulation - for(i in 0:nummodels){ - # Initialize the list elements - res_model[[i+1]] = vector(mode = 'list', length = 12) - names(res_model[[i+1]]) = paste0(rep(c("m","s"),6), - rep(c('draws','mean','sd','.5','.lower','.upper'),each = 2)) - ind = seq(i*fit$nd+1, (i+1)*fit$nd, 1) - - # Summarize the data for the selected model - res_model[[i+1]]$mdraws = res$mdraws[ind,] - res_model[[i+1]]$sdraws = res$sdraws[ind,] - res_model[[i+1]]$mmean=apply(res_model[[i+1]]$mdraws,2,mean) - res_model[[i+1]]$smean=apply(res_model[[i+1]]$sdraws,2,mean) - res_model[[i+1]]$msd=apply(res_model[[i+1]]$mdraws,2,sd) - res_model[[i+1]]$ssd=apply(res_model[[i+1]]$sdraws,2,sd) - res_model[[i+1]]$m.5=apply(res_model[[i+1]]$mdraws,2,quantile,0.5) - res_model[[i+1]]$s.5=apply(res_model[[i+1]]$sdraws,2,quantile,0.5) - res_model[[i+1]]$m.lower=apply(res_model[[i+1]]$mdraws,2,quantile,q.lower) - res_model[[i+1]]$s.lower=apply(res_model[[i+1]]$sdraws,2,quantile,q.lower) - res_model[[i+1]]$m.upper=apply(res_model[[i+1]]$mdraws,2,quantile,q.upper) - res_model[[i+1]]$s.upper=apply(res_model[[i+1]]$sdraws,2,quantile,q.upper) - } - - res_model[[nummodels+2]] = list(q.lower=q.lower,q.upper=q.upper,modeltype=fit$modeltype) - rm(res) - class(res_model)="OpenBT_predict" - - return(res_model) -} diff --git a/misc/openbt.py b/misc/openbt.py deleted file mode 100644 index 0cbad00..0000000 --- a/misc/openbt.py +++ /dev/null @@ -1,903 +0,0 @@ -from re import M -from types import ModuleType -import invoke # A task execution tool; unused -from sklearn.base import ClassifierMixin, RegressorMixin, BaseEstimator -# ^ Two of these aren't used yet; B.E. is the parent class of OPENBT -import tempfile # Generate temporary directories/files -from pathlib import Path # To write filepaths -from collections import defaultdict # For setting dictionaries with default values; unused -from scipy.stats import spearmanr # For calculating the spearman coeff -import pickle # For writing to compressed (pickled) test files; unused -import numpy as np # For manipulating arrays, doing math, etc. -import subprocess # For running a process in the command line -from scipy.stats import norm # Similar to pnorm, rnorm, qnorm, etc. in R -import sys; import os # For exit() and checking the config files -import itertools # To makes labels for sobol variable pairs -import pandas as pd # Sobol has column names, so each returned array has to be a pandas df - -class OPENBT(BaseEstimator): - """Class to run openbtcli by using sklearn-like calls""" - def __init__(self, **kwargs): - self.ndpost = 1000 # All of these are defaults; overwriting comes later - self.nskip = 100 - self.nadapt = 1000 - self.power = 2.0 - self.base = 0.95 - self.tc = 2 - self.pbd = 0.7 - self.pb = 0.5 - self.stepwpert = 0.1 - self.probchv = 0.1 - self.minnumbot = 5 - self.printevery = 100 - self.numcut = 100 - self.adaptevery = 100 - # Here are the extra parameters that I added, since I wanted to customize them: - self.overallsd = None; self.overallnu = None - self.k = None - self.ntree = None; self.ntreeh = None - self.truncateds = None - self.nummodels = 2 - self.nsprior = False - self.wtsprior = False - # hyperthread = False # Supposed to let you run processes on all hyperthreads, not just each core - # I added a few more if statements in _define_params() to make these go smoothly - self.modelname = "model" - self.summarystats = "FALSE" - self.__dict__.update((key, value) for key, value in kwargs.items()) - # print(self.__dict__) - self._set_model_type() - - - def fit(self, X, y, F = None, S = None): - """ - Writes out data and fits model. X and y should be numpy arrays, - but I added some checks to allow 1-D lists to be accepted. No promises on - attempting to pass in a grid of lists within a list as an array. - """ - # Cast X to np.array if the input is a list - if (type(X) == list): - X = np.array(X) - print("Completed list-to-numpy_array conversion for X. Be careful about row/column mixups!") - - # Cast y to np.array if the input is a list - if (type(y) == list): - y = np.array(y) - print("Completed list-to-numpy_array conversion for y. Be careful about row/column mixups!") - - # Reshape X_train to be pxn --- keeping this to remain in sync with remainder of code - self.X_train = np.transpose(X) - - if (len(self.X_train.shape) == 1): # If shape is (n, ), change it to (n, 1): - self.X_train = self.X_train.reshape(1, len(X)) - - if (len(y.shape) == 1): # Same thing for y - y = y.reshape(len(y), 1) - - # Set the y and fmean information - self.y_orig = y - self.fmean = np.mean(y) - - if self.model == 'mixbart': - # Check if F is specified - if F is None: - raise ValueError("Please input the F matrix for model mixing.") - # Set prior type (stationary vs. nonstationary) - if S is None: - self.nsprior = False - else: - self.nsprior = True - # Check the dimensions of F and S - if (not F.shape == S.shape): - raise ValueError("Dimensions of F and S do not match.") - - - # Cast F and S to arrays (only needed for S when using nsprior) - if isinstance(F, list): - F = np.array(F) - - if isinstance(S, list) and self.nsprior: - S = np.array(F) - - # Set the data as Class objects - self.nummodels = F.shape[1] - self.F_train = F - self.S_train = S - - # Set other default arguments - self.wtsprior = False # T/F did user pass in list of hyperparameters for each model weight (rather than using the standard ones) - - # Overwrite any default parameters - self._define_params() - print("Writing config file and data") - self._write_config_file() - self._write_data() - print("Running model...") - self._run_model() - - # New: Return attributes to be saved as a separate fit object: - res = {} # Missing the influence attribute from the R code (skip for now) - self.maxx = np.ceil(np.max(self.X_train, axis=1)) - self.minx = np.floor(np.min(self.X_train, axis=1)) - keys = list(self.__dict__.keys()) # Ones we want to save into the object - for later_key in ['p_test', 'n_test', 'q_lower', 'q_upper', 'xproot', - 'mdraws', 'sdraws', 'mmean', 'smean', 'msd', 'ssd', 'm_5', 's_5', - 'm_lower', 's_lower', 'm_upper', 's_upper']: - if later_key in keys: - keys.remove(later_key) # Taking away possible residual keys from fitp, fits, or fitv - for key in keys: - res[key] = self.__dict__[key] - res['minx'] = self.minx; res['maxx'] = self.maxx - return res - - - def _set_model_type(self): - models = {"bt": 1, - "binomial": 2, - "poisson": 3, - "bart": 4, - "hbart": 5, - "probit": 6, - "modifiedprobit": 7, - "merck_truncated": 8, - "mixbart":9 - } - if self.model not in models: - raise KeyError("Not supported model type. Please specify a model.") - self.modeltype = models[self.model] - k_map = {1: 2, 2: 2, 3: 2, 4: 2, 5: 5, 6: 1, 7: 1, 8: 2, 9:2} - onu_map = {1: 1, 2: 1, 3: 1, 4: 10, 5: 10, 6: -1, 7: -1, 8: 10, 9:10} - ntree_map = {1: 1, 2: 1, 3: 1, 4: 200, 5: 200, 6: 200, 7: 200, 8: 200, 9:25} - ntreeh_map = {1: 1, 2: 1, 3: 1, 4: 1, 5: 40, 6: 1, 7: 40, 8: 1, 9:1} - if (self.k is None): - print("Overwriting k to agree with the model's default") - self.k = k_map[self.modeltype] - if (self.overallnu is None): - print("Overwriting overallnu to agree with the model's default") - self.overallnu = onu_map[self.modeltype] - if (self.ntree is None): - print("Overwriting ntree to agree with the model's default") - self.ntree = ntree_map[self.modeltype] - if (self.ntreeh is None): - print("Overwriting ntreeh to agree with the model's default") - self.ntreeh = ntreeh_map[self.modeltype] - # overallsd will be done in the define_params function. - - - # --- the try case is checking to see if a list of length 2 is the item for each dictionary key + "h" - def _update_h_args(self, arg): - try: - self.__dict__[arg + "h"] = self.__dict__[arg][1] - self.__dict__[arg] = self.__dict__[arg][0] - except: - self.__dict__[arg + "h"] = self.__dict__[arg] - # ^ Right now it seems to do the 'except' step for all args it's used with, FYI - - - def _define_params(self): - """Set up parameters for the openbtcli - """ - if (self.modeltype in [4, 5, 8]): - self.y_train = self.y_orig - self.fmean - self.fmean_out = 0 - self.rgy = [np.min(self.y_train), np.max(self.y_train)] - elif (self.modeltype in [6, 7]): - self.fmean_out = norm.ppf(self.fmean) - self.y_train = self.y_orig - self.rgy = [-2, 2] - self.uniqy = np.unique(self.y_train) # Already sorted, btw - if(len(self.uniqy) > 2 or self.uniqy[1] != 0 or self.uniqy[2] != 1): - sys.exit("Invalid y.train: Probit requires dichotomous response coded 0/1") - elif self.modeltype in [9]: - self.fmean_out = 0 - self.fmean = 0 - self.y_train = self.y_orig - self.rgy = [np.min(self.y_train), np.max(self.y_train)] - else: # Unused modeltypes for now, but still set their properties just in case - self.y_train = self.y_orig - self.fmean_out = None - self.rgy = [-2, 2] # These proprties are ambiguous for these modeltypes by the way... - - self.n = self.y_train.shape[0] - self.p = self.X_train.shape[0] - # Cutpoints - if "xicuts" not in self.__dict__: - self.xi = {} - maxx = np.ceil(np.max(self.X_train, axis=1)) - minx = np.floor(np.min(self.X_train, axis=1)) - for feat in range(self.p): - xinc = (maxx[feat] - minx[feat])/(self.numcut+1) - self.xi[feat] = [ - np.arange(1, (self.numcut)+1)*xinc + minx[feat]] - - # Set the terminal node hyperparameters - self.tau = (self.rgy[1] - self.rgy[0])/(2*np.sqrt(self.ntree)*self.k) - - # Overwrite the hyperparameter settings when model mixing - if self.modeltype == 9: - if self.nsprior: - self.tau = 1/(2*self.ntree*self.k) - self.beta = 1/self.ntree - else: - self.tau = 1/(2*np.sqrt(self.ntree)*self.k) - self.beta = 1/(2*self.ntree) - else: - self.beta = 0 - - # Map for the overall sd default values - osd = np.std(self.y_train, ddof = 1) - osd_map = {1: 1, 2: 1, 3: 1, 4: osd, 5: osd, 6: 1, 7: 1, 8: osd, 9:osd} # ****** Think about this for model mixing - - # Overall sd update - if (self.overallsd is None): - print("Overwriting overallsd to agree with the model's default") - self.overallsd = osd_map[self.modeltype] - - # overall lambda calibration - self.overalllambda = self.overallsd**2 - - # Birth and Death probability -- set product tree pbd to 0 for selected models - if (self.modeltype == 6) & (isinstance(self.pbd, float)): - self.pbd = [self.pbd, 0] - if self.modeltype in [4,9] and (isinstance(self.pbd, float)): - self.pbd = [self.pbd, 0] - - [self._update_h_args(arg) for arg in ["power", "base", - "pbd", "pb", "stepwpert", - "probchv", "minnumbot"]] - - # define the roots for the output files - self.xroot = "x" - self.yroot = "y" - self.sroot = "s" - self.chgvroot = "chgv" - self.froot = "f" - self.fsdroot = "fsd" - self.wproot = "wpr" - self.xiroot = "xi" - # Check probit: - if self.modeltype == 6: - if self.ntreeh > 1: - raise ValueError("ntreeh should be 1") - if self.pbdh > 0: - raise ValueError("pbdh should be 1") - # Special quantity for merck_truncated: - if (self.truncateds is None) & (self.modeltype == 8): - miny = np.min(self.y_train) - self.truncateds = (self.y_train == miny) - if self.tc <= 1: - print("Setting tc to 2"); self.tc = 2 - # print((self.k, self.overallsd, self.overallnu, self.ntree, self.ntreeh)) - - - # Need to generalize -- this is only used in fit - def _write_config_file(self): - """Create temp directory to write config and data files - """ - f = tempfile.mkdtemp(prefix="openbtpy_") - self.fpath = Path(f) - run_params = [self.modeltype, - self.xroot, self.yroot, self.fmean_out, - self.ntree, self.ntreeh, - self.ndpost, self.nskip, - self.nadapt, self.adaptevery, - self.tau, self.beta, - self.overalllambda, - self.overallnu, self.base, - self.power, self.baseh, self.powerh, - self.tc, self.sroot, self.chgvroot, - self.froot, self.fsdroot, self.nsprior, - self.wproot, self.wtsprior, - self.pbd, self.pb, self.pbdh, self.pbh, self.stepwpert, - self.stepwperth, - self.probchv, self.probchvh, self.minnumbot, - self.minnumboth, self.printevery, self.xiroot, self.modelname, - self.summarystats] - # print(run_params) - self.configfile = Path(self.fpath / "config") - with self.configfile.open("w") as tfile: - for param in run_params: - tfile.write(str(param)+"\n") - # print(os.path.abspath(self.configfile)) - # sys.exit('Examining tmp file(s)') # The config file was correct when I looked at it manually. - - - def __write_chunks(self, data, no_chunks, var, *args): - if no_chunks == 0: - print("Writing all data to one 'chunk'"); no_chunks = 1 - if (self.tc - int(self.tc) == 0): - splitted_data = np.array_split(data, no_chunks) - else: - sys.exit('Fit: Invalid tc input - exiting process') - int_added = 0 if var in ["xp","fp","xw"] else 1 - - # print(splitted_data) - for i, ch in enumerate(splitted_data): - # print(i); print(ch) - np.savetxt(str(self.fpath / Path(self.__dict__[var+"root"] + str(i+int_added))), - ch, fmt=args[0]) - - - def _write_data(self): - splits = (self.n - 1) // (self.n/(self.tc)) # Should = tc - 1 as long as n >= tc - # print("splits =", splits) - self.__write_chunks(self.y_train, splits, "y", '%.7f') - self.__write_chunks(np.transpose(self.X_train), splits, "x", '%.7f') - self.__write_chunks(np.ones((self.n), dtype="int"), - splits, "s", '%.0f') - print(self.fpath) - if self.X_train.shape[0] == 1: - print("1 x variable, so correlation = 1") - np.savetxt(str(self.fpath / Path(self.chgvroot)), [1], fmt='%.7f') - elif self.X_train.shape[0] == 2: - print("2 x variables") - np.savetxt(str(self.fpath / Path(self.chgvroot)), - [spearmanr(self.X_train, axis=1)[0]], fmt='%.7f') - else: - print("3+ x variables") - np.savetxt(str(self.fpath / Path(self.chgvroot)), - spearmanr(self.X_train, axis=1)[0], fmt='%.7f') - - for k, v in self.xi.items(): - np.savetxt( - str(self.fpath / Path(self.xiroot + str(k+1))), v, fmt='%.7f') - - # Write model mixing files - if self.modeltype == 9: - # F-hat matrix - self.__write_chunks(self.F_train, splits, "f", '%.7f') - # S-hat matrix when using nsprior - if self.nsprior: - self.__write_chunks(self.S_train, splits, "fsd", '%.7f') - # Wts prior when passed in - if self.wtsprior: - np.savetxt(str(self.fpath / Path(self.wproot)), np.concatenate(self.betavec, self.tauvec),fmt='%.7f') - - # print(os.path.abspath(self.fpath)) - # sys.exit('Examining tmp file(s)') # The data files were correct: - # For tc = 4: 1 chgv, 1 config, 3 s's, 3 x's, 3 y's, 1 xi (xi had the most data). - - - def _run_model(self, train=True): - cmd = "openbtcli" if train else "openbtpred" - sp = subprocess.run(["mpirun", "-np", str(self.tc), cmd, str(self.fpath)], - stdin=subprocess.DEVNULL, capture_output=True) - # print(sp) - # A check: - # if not(train): - # print(os.path.abspath(self.fpath)); sys.exit('Examining tmp file(s)') - - - - def predict(self, X, q_lower=0.025, q_upper=0.975, F = None, **kwargs): - """ - Like with fit() X (the preds matrix) should be a numpy array, - but I added a couple of checks to allow a list to be accepted. - """ - # Casting lists to arrays when needed - if (type(X) == list): - X = np.array(X) - print("Completed list-to-numpy_array conversion for the preds. Be careful about row/column mixups!") - if (len(X.shape) == 1): # If shape is (n, ), change it to (n, 1): - X = X.reshape(len(X), 1) - # ^ Reshaping in case of weird 1-variable input (e.g. a 1D list) - - # Model mixing checks - if self.modeltype == 9: - if isinstance(F, list): - F = np.array(F) - if F is None: - raise TypeError("No F matrix was provided for model mixing predictions.") - if not F.shape[1] == self.nummodels: - raise ValueError("Number of columns in F does not equal the number of models which were used in the training phase.") - if not F.shape[0] == X.shape[0]: - raise ValueError("Number of rows in F does not match the number of rows in X.") - - # Set control values - self.p_test = X.shape[1] - self.n_test = X.shape[0] - self.q_lower = q_lower; self.q_upper = q_upper - self.xproot = "xp" - self.fproot = "fp" - self.__write_chunks(X, (self.n_test) // (self.n_test/(self.tc)), - self.xproot, - '%.7f') - - # Write chunks for f in model mixing - if self.modeltype == 9: - self.__write_chunks(F, (self.n_test) // (self.n_test/(self.tc)), - self.fproot, - '%.7f') - - # Set and write config file - self.configfile = Path(self.fpath / "config.pred") - pred_params = [self.modelname, self.modeltype, - self.xiroot, self.xproot, self.fproot, - self.ndpost, self.ntree, self.ntreeh, - self.p_test, self.nummodels ,self.tc, self.fmean] - # print(self.ntree); print(self.ntreeh) - with self.configfile.open("w") as pfile: - for param in pred_params: - pfile.write(str(param)+"\n") - self._run_model(train=False) - self._read_in_preds() - # New: make things a bit more like R, and save attributes to a fit object: - res = {} - res['mdraws'] = self.mdraws; res['sdraws'] = self.sdraws; - res['mmean'] = self.mmean; res['smean'] = self.smean; - res['msd'] = self.msd; res['ssd'] = self.ssd; - res['m_5'] = self.m_5; res['s_5'] = self.s_5; - res['m_lower'] = self.m_lower; res['s_lower'] = self.s_lower; - res['m_upper'] = self.m_upper; res['s_upper'] = self.s_upper; - res['q_lower'] = self.q_lower; res['q_upper'] = self.q_upper; - res['x_test'] = X; res['modeltype'] = self.modeltype - return res - - - def _read_in_preds(self): - mdraw_files = sorted(list(self.fpath.glob(self.modelname+".mdraws*"))) - sdraw_files = sorted(list(self.fpath.glob(self.modelname+".sdraws*"))) - mdraws = [] - for f in mdraw_files: - read = open(f, "r"); lines = read.readlines() - if lines[0] != '\n' and lines[1] != '\n': # If it's nonempty - mdraws.append(np.loadtxt(f)) - # print(mdraws[0].shape); print(len(mdraws)) - self.mdraws = np.concatenate(mdraws, axis=1) # Got rid of the transpose - sdraws = [] - for f in sdraw_files: - read = open(f, "r"); lines = read.readlines() - if lines[0] != '\n' and lines[1] != '\n': # If it's nonempty - sdraws.append(np.loadtxt(f)) - # print(sdraws[0]); print(sdraws[0][0]) - # print(len(sdraws)); print(len(sdraws[0])); print(len(sdraws[0][0])) - self.sdraws = np.concatenate(sdraws, axis=1) # Got rid of the transpose - - # New (added by me), since R returns arrays like these by default: - # Calculate mmean and smean arrays, and related statistics - self.mmean = np.empty(len(self.mdraws[0])) - self.smean = np.empty(len(self.sdraws[0])) - self.msd = np.empty(len(self.mdraws[0])) - self.ssd = np.empty(len(self.mdraws[0])) - self.m_5 = np.empty(len(self.mdraws[0])) - self.s_5 = np.empty(len(self.mdraws[0])) - self.m_lower = np.empty(len(self.mdraws[0])) - self.s_lower = np.empty(len(self.sdraws[0])) - self.m_upper = np.empty(len(self.mdraws[0])) - self.s_upper = np.empty(len(self.sdraws[0])) - for j in range(len(self.mdraws[0])): - self.mmean[j] = np.mean(self.mdraws[:, j]) - self.smean[j] = np.mean(self.sdraws[:, j]) - self.msd[j] = np.std(self.mdraws[:, j], ddof = 1) - self.ssd[j] = np.std(self.sdraws[:, j], ddof = 1) - self.m_5[j] = np.quantile(self.mdraws[:, j], 0.50) - self.s_5[j] = np.quantile(self.sdraws[:, j], 0.50) - self.m_lower[j] = np.quantile(self.mdraws[:, j], self.q_lower) - self.s_lower[j] = np.quantile(self.sdraws[:, j], self.q_lower) - self.m_upper[j] = np.quantile(self.mdraws[:, j], self.q_upper) - self.s_upper[j] = np.quantile(self.sdraws[:, j], self.q_upper) - - - def clean_model(self): - subprocess.run(f"rm -rf {str(self.fpath)}", shell=True) - -#------------------------------------------------------------------------------------------ -# Clark's functions (made from scratch, not edited from Zoltan's version): - def _read_in_vartivity(self, q_lower, q_upper): - vdraws_files = sorted(list(self.fpath.glob(self.modelname+".vdraws"))) - self.vdraws = np.array([]) - for f in vdraws_files: - read = open(f, "r"); lines = read.readlines() - if lines[0] != '\n' and lines[1] != '\n': # If it's nonempty - self.vdraws = np.append(self.vdraws, np.loadtxt(f)) - # self.vdraws[3] = 0.5 # to test transposing/ normalizing counts - self.vdraws = self.vdraws.reshape(self.ndpost, self.p) - # print(self.vdraws.shape); print(self.vdraws) - # Normalize counts: - colnorm = np.array([]) - for i in range(len(self.vdraws)): # should = ndpost in most cases - colnorm = np.append(colnorm, self.vdraws[i].sum()) - idx = np.where(colnorm > 0)[0] # print(idx) - # print(colnorm); print(idx) - - colnorm = colnorm.reshape(self.ndpost, 1) # Will always have 1 column b/c we summed - # print(colnorm.shape); print(idx.shape) - self.vdraws[idx] = self.vdraws[idx] / colnorm[idx] - # print(self.vdraws[0:60]); print(self.vdraws.shape) - - self.mvdraws = np.empty(self.p) - self.vdraws_sd = np.empty(self.p) - self.vdraws_5 = np.empty(self.p) - self.q_lower = q_lower; self.q_upper = q_upper - self.vdraws_lower = np.empty(self.p) - self.vdraws_upper = np.empty(self.p) - - for j in range(len(self.vdraws[0])): # (should = self.p) - self.mvdraws[j] = np.mean(self.vdraws[:, j]) - self.vdraws_sd[j] = np.std(self.vdraws[:, j], ddof = 1) - self.vdraws_5[j] = np.quantile(self.vdraws[:, j], 0.50) - self.vdraws_lower[j] = np.quantile(self.vdraws[:, j], self.q_lower) - self.vdraws_upper[j] = np.quantile(self.vdraws[:, j], self.q_upper) - if (len(self.vdraws[0]) == 1): # Make the output just a double, not a 2D array - self.mvdraws = self.mvdraws[0] - self.vdraws_sd = self.vdraws_sd[0] - self.vdraws_5 = self.vdraws_5[0] - self.vdraws_lower = self.vdraws_lower[0] - self.vdraws_upper = self.vdraws_upper[0] - - # Now do everything again for the "h" version of all these quantities: - vdrawsh_files = sorted(list(self.fpath.glob(self.modelname+".vdrawsh"))) - self.vdrawsh = np.array([]) - for f in vdrawsh_files: - self.vdrawsh = np.append(self.vdrawsh, np.loadtxt(f)) - self.vdrawsh = self.vdrawsh.reshape(self.ndpost, self.p) - # Normalize counts: - colnormh = np.array([]) - for i in range(len(self.vdrawsh)): # should = ndpost in most cases - colnormh = np.append(colnormh, self.vdrawsh[i].sum()) - idxh = np.where(colnormh > 0)[0] - colnormh = colnormh.reshape(self.ndpost, 1) # Will always have 1 column since we summed - self.vdrawsh[idxh] = self.vdrawsh[idxh] / colnormh[idxh] - - self.mvdrawsh = np.empty(self.p) - self.vdrawsh_sd = np.empty(self.p) - self.vdrawsh_5 = np.empty(self.p) - self.vdrawsh_lower = np.empty(self.p) - self.vdrawsh_upper = np.empty(self.p) - - for j in range(len(self.vdrawsh[0])): # (should = self.p) - self.mvdrawsh[j] = np.mean(self.vdrawsh[:, j]) - self.vdrawsh_sd[j] = np.std(self.vdrawsh[:, j], ddof = 1) - self.vdrawsh_5[j] = np.percentile(self.vdrawsh[:, j], 0.50) - self.vdrawsh_lower[j] = np.percentile(self.vdrawsh[:, j], self.q_lower) - self.vdrawsh_upper[j] = np.percentile(self.vdrawsh[:, j], self.q_upper) - - if (len(self.vdrawsh[0]) == 1): # Make the output just a double, not a 2D array - self.mvdrawsh = self.mvdrawsh[0] - self.vdrawsh_sd = self.vdrawsh_sd[0] - self.vdrawsh_5 = self.vdrawsh_5[0] - self.vdrawsh_lower = self.vdrawsh_lower[0] - self.vdrawsh_upper = self.vdrawsh_upper[0] - - - def vartivity(self, q_lower=0.025, q_upper=0.975): - """Calculate and return variable activity information - """ - # params (all are already set, actually) - # self.p = len(self.xi[0][0])? # This definition is bad, but p is already defined in define_params() - # Write to config file: - vartivity_params = [self.modelname, self.ndpost, self.ntree, - self.ntreeh, self.p] - self.configfile = Path(self.fpath / "config.vartivity") - # print(vartivity_params) - with self.configfile.open("w") as tfile: - for param in vartivity_params: - tfile.write(str(param)+"\n") - # Run vartivity program -- it's not actually parallel so no call to mpirun. - # run_local = os.path.exists("openbtvartivity") # Doesn't matter, since the command is the same either way - cmd = "openbtvartivity" - sp = subprocess.run([cmd, str(self.fpath)], - stdin=subprocess.DEVNULL, capture_output=True) - # print(sp) - # Read in result (and set extra attributes like .5, .lower, .upper, etc.): - self._read_in_vartivity(q_lower, q_upper) - - # Compile all the new attributes into something that will be saved as "fitv" when the function is called: - res = {} - res['vdraws'] = self.vdraws; res['vdrawsh'] = self.vdrawsh; - res['mvdraws'] = self.mvdraws; res['mvdrawsh'] = self.mvdrawsh; - res['vdraws_sd'] = self.vdraws_sd; res['vdrawsh_sd'] = self.vdrawsh_sd; - res['vdraws_5'] = self.vdraws_5; res['vdrawsh_5'] = self.vdrawsh_5; - res['vdraws_lower'] = self.vdraws_lower; res['vdrawsh_lower'] = self.vdrawsh_lower; - res['vdraws_upper'] = self.vdraws_upper; res['vdrawsh_upper'] = self.vdrawsh_upper; - res['q_lower'] = self.q_lower; res['q_upper'] = self.q_upper; - res['modeltype'] = self.modeltype - return res - - - - - def _read_in_sobol(self, q_lower, q_upper): - sobol_draws_files = sorted(list(self.fpath.glob(self.modelname+".sobol*"))) - # print(sobol_draws_files) - self.so_draws = np.loadtxt(sobol_draws_files[0]) - for i in range(1, self.tc): - read = open(sobol_draws_files[i], "r"); lines = read.readlines() - if lines[0] != '\n' and lines[1] != '\n': # If it's nonempty - self.so_draws = np.vstack((self.so_draws, - np.loadtxt(sobol_draws_files[i]))) - # print(self.so_draws.shape); print(self.so_draws[0:10]) - labs_temp = list(itertools.combinations(range(1, self.p + 1), 2)) - labs = np.empty(len(labs_temp), dtype = '