From 46f367fc16259fdb436bc0bd3fbd1ea6f9f644a8 Mon Sep 17 00:00:00 2001
From: "Sow, Haby" <habysow@rti.org>
Date: Fri, 29 May 2026 10:10:40 -0400
Subject: [PATCH 1/4] n environment fix

---
 .Rbuildignore                |  2 ++
 .gitignore                   |  1 +
 R/allocate.R                 | 11 +++++++----
 R/srs.R                      | 18 ++++++++++++------
 R/sys.R                      | 23 ++++++++++++-----------
 R/sys_pps.R                  |  9 +++++----
 R/util.R                     |  5 +++++
 man/SampleSelectR-package.Rd |  1 +
 8 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 6022675..a799dca 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -7,3 +7,5 @@
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
+^\.positai$
+^\.claude$
diff --git a/.gitignore b/.gitignore
index 667204b..51d5219 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@
 .DS_Store
 .quarto
 docs
+.positai
diff --git a/R/allocate.R b/R/allocate.R
index e21684b..1821f70 100644
--- a/R/allocate.R
+++ b/R/allocate.R
@@ -279,7 +279,10 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
     }
   }
   # Calculate the total (raw) sample size
-  n <- max(ceiling(sum(allocations)), length(N.h) * lbound)
+
+
+  final_n <- max(ceiling(sum(allocations)), length(N.h) * lbound)
+
 
   sizes <- allocations
 
@@ -292,7 +295,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
   total_allocated <- sum(adjusted_allocations)
 
   # Calculate the difference from the total n
-  difference <- n - total_allocated
+  difference <- final_n - total_allocated
 
   # If difference is positive, distribute it proportionally
   if (difference != 0) {
@@ -314,7 +317,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
 
   # Step 5: Adjust the rounded allocations to ensure the sum equals n
   total_allocated <- sum(rounded_allocations)
-  difference <- n - total_allocated
+  difference <- final_n - total_allocated
   # Adjust the allocations by adding/subtracting the difference
   while (difference != 0) {
     i <- sample(1:num_groups, 1) # Randomly select an index
@@ -345,7 +348,7 @@ allocate <- function(allocation, N.h, n.samp = NULL, S.h = NULL, c.h = NULL, cos
   }
   outputs <- as.integer(rounded_allocations)
   if (allocation == "optimal") {
-    n.print <- n
+    n.print <- final_n
   } else {
     n.print <- n.samp
   }
diff --git a/R/srs.R b/R/srs.R
index 677c5e7..d3467ac 100644
--- a/R/srs.R
+++ b/R/srs.R
@@ -28,6 +28,7 @@
 
 #' @export
 
+
 srs <- function(frame, n, outall = FALSE, curstrat = NULL) {
   check_frame_type(frame)
   check_n(n, frame, curstrat, n_le_N = TRUE)
@@ -35,18 +36,22 @@ srs <- function(frame, n, outall = FALSE, curstrat = NULL) {
 
   N <- nrow(frame)
 
-  # Take the srs and create sampling-related columns
+  # Take the SRS and create sampling-related columns
   selectedVector <- sample(x = N, size = n, replace = FALSE)
+
   frame <- frame |>
     tidytable::mutate(
       rowNum = tidytable::row_number(),
-      SelectionProbability = n / N,
-      SamplingWeight = ifelse(.data$rowNum %in% selectedVector, N / n, NA),
-      SelectionIndicator = ifelse(.data$rowNum %in% selectedVector, TRUE, FALSE)
+      SelectionProbability = .env$n / .env$N,
+      SamplingWeight = tidytable::if_else(
+        .data$rowNum %in% .env$selectedVector,
+        .env$N / .env$n,
+        NA_real_
+      ),
+      SelectionIndicator = .data$rowNum %in% .env$selectedVector
     ) |>
     tidytable::select(-tidytable::all_of("rowNum"))
 
-
   # Output to screen
   Sampling_Output(n, N, curstrat = curstrat)
 
@@ -55,8 +60,9 @@ srs <- function(frame, n, outall = FALSE, curstrat = NULL) {
     sample <- frame |>
       tidytable::filter(.data$SelectionIndicator) |>
       tidytable::select(-tidytable::all_of("SelectionIndicator"))
+
     return(sample)
-  } else if (outall) {
+  } else {
     return(frame)
   }
 }
diff --git a/R/sys.R b/R/sys.R
index 5fb4083..ec50f95 100644
--- a/R/sys.R
+++ b/R/sys.R
@@ -34,6 +34,7 @@
 #'
 #' @export
 
+
 sys <- function(frame, n, curstrat = NULL, outall = FALSE) {
   check_frame_type(frame)
   check_n(n, frame, curstrat, n_le_N = TRUE)
@@ -42,32 +43,32 @@ sys <- function(frame, n, curstrat = NULL, outall = FALSE) {
   N <- nrow(frame)
 
   # Sampling method
-
   k <- N / n # Sampling interval
 
-  r <- runif(1, 1, k) # We use a random start between 1 and k
+  r <- runif(1, 1, k) # Random start between 1 and k
 
   selectedVector <- floor(r + k * (0:(n - 1))) # Selected row indices
 
-  # We make sure that selected indices are within frame range
-
+  # Make sure selected indices are within frame range
   selectedVector <- selectedVector[selectedVector <= N]
 
-  # Creating variables accordingly
+  # Create variables
   frame <- frame |>
     tidytable::mutate(
       numrow = tidytable::row_number(),
-      SelectionIndicator = .data$numrow %in% selectedVector,
-      SelectionProbability = n / N,
-      SamplingWeight = ifelse(.data$SelectionIndicator, N / n, NA)
+      SelectionIndicator = .data$numrow %in% .env$selectedVector,
+      SelectionProbability = .env$n / .env$N,
+      SamplingWeight = tidytable::if_else(
+        .data$SelectionIndicator,
+        .env$N / .env$n,
+        NA_real_
+      )
     ) |>
     tidytable::select(-tidytable::all_of("numrow"))
 
   # Output to screen
   Sampling_Output(n, N, k = k, r = r, curstrat = curstrat)
-
-  # Return only selected rows and make sure the selected sample is a data.frame, tibble, or data.table
-
+  # Return only the sample or the frame with selection indicator based on value of outall
   if (outall) {
     return(frame)
   } else {
diff --git a/R/sys_pps.R b/R/sys_pps.R
index ca0ff0c..4e5d3df 100644
--- a/R/sys_pps.R
+++ b/R/sys_pps.R
@@ -47,7 +47,7 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   tbd_data_1 <- frame |>
     tidytable::mutate(
       rowNum = tidytable::row_number(),
-      ExpectedHits = n * (!!(symbol_mos) / totalSize),
+      ExpectedHits = .env$n * (!!(symbol_mos) / totalSize),
       SamplingWeight = .data$ExpectedHits^-1
     )
 
@@ -64,11 +64,12 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   selectedVector <- findInterval(selectedSizePoints, sizeIntervals)
 
   # Using selectedVector, get the total counts of each index
+
   selectedVector_counts <- selectedVector |>
     as.data.frame() |>
-    tidytable::count(selectedVector) |>
-    # Rename to NumberHits
-    tidytable::rename(NumberHits = n)
+    tidytable::count(selectedVector, name = "NumberHits")
+
+
 
   tbd_data_2 <- tbd_data_1 |>
     tidytable::left_join(
diff --git a/R/util.R b/R/util.R
index 6b1f9e8..606e7fb 100644
--- a/R/util.R
+++ b/R/util.R
@@ -1,3 +1,8 @@
+
+
+# Declare tidy-evaluation pronouns used in package code
+utils::globalVariables(c(".env"))
+
 #' Check if the frame is a valid data structure
 #'
 #' Ensures that the input frame is a data.frame, data.table, or tibble.
diff --git a/man/SampleSelectR-package.Rd b/man/SampleSelectR-package.Rd
index ec80e39..de3ab40 100644
--- a/man/SampleSelectR-package.Rd
+++ b/man/SampleSelectR-package.Rd
@@ -12,6 +12,7 @@ Randomly select samples with SRS, systematic, and various PPS methods. Also incl
 Useful links:
 \itemize{
   \item \url{https://github.com/RTIInternational/SampleSelectR}
+  \item \url{https://rtiinternational.github.io/SampleSelectR/}
   \item Report bugs at \url{https://github.com/RTIInternational/SampleSelectR/issues}
 }
 

From dc39467b07613f17039e6b9ac084f593d62bdf95 Mon Sep 17 00:00:00 2001
From: "Sow, Haby" <habysow@rti.org>
Date: Fri, 29 May 2026 10:59:59 -0400
Subject: [PATCH 2/4] Rlang fix

---
 NAMESPACE                 | 1 +
 R/SampleSelectR-package.R | 3 ++-
 R/util.R                  | 4 ----
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 7fffe22..37e108c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,4 +17,5 @@ importFrom(data.table,.NGRP)
 importFrom(data.table,.SD)
 importFrom(data.table,data.table)
 importFrom(rlang,.data)
+importFrom(rlang,.env)
 importFrom(stats,runif)
diff --git a/R/SampleSelectR-package.R b/R/SampleSelectR-package.R
index 50f5a29..1a615a1 100644
--- a/R/SampleSelectR-package.R
+++ b/R/SampleSelectR-package.R
@@ -11,7 +11,8 @@
 #' @importFrom data.table .SD
 #' @importFrom data.table :=
 #' @importFrom data.table data.table
-#' @importFrom stats runif
 #' @importFrom rlang .data
+#' @importFrom rlang .env
+#' @importFrom stats runif
 ## usethis namespace: end
 NULL
diff --git a/R/util.R b/R/util.R
index 606e7fb..5c34668 100644
--- a/R/util.R
+++ b/R/util.R
@@ -1,8 +1,4 @@
 
-
-# Declare tidy-evaluation pronouns used in package code
-utils::globalVariables(c(".env"))
-
 #' Check if the frame is a valid data structure
 #'
 #' Ensures that the input frame is a data.frame, data.table, or tibble.

From c9ac4a11b436d7fa8f88dbc63320ba3a7f21f850 Mon Sep 17 00:00:00 2001
From: "Zimmer, Stephanie" <sazimmer+rtiintl@rti.org>
Date: Fri, 29 May 2026 14:46:23 -0400
Subject: [PATCH 3/4] Add a few more .env pronouns for clarity

---
 R/chromy_pps.R | 4 ++--
 R/sys_pps.R    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/chromy_pps.R b/R/chromy_pps.R
index 0692e76..e70dc6a 100644
--- a/R/chromy_pps.R
+++ b/R/chromy_pps.R
@@ -53,8 +53,8 @@ chromy_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   frame_hits <-
     frame |>
     tidytable::mutate(
-      ExpectedHits = exphits,
-      NumberHits = chromy_inner(exphits),
+      ExpectedHits = .env$exphits,
+      NumberHits = chromy_inner(.env$exphits),
       SelectionIndicator = .data$NumberHits > 0,
       SamplingWeight = ifelse(.data$SelectionIndicator, 1 / .data$ExpectedHits, NA),
     )
diff --git a/R/sys_pps.R b/R/sys_pps.R
index 4e5d3df..53619fd 100644
--- a/R/sys_pps.R
+++ b/R/sys_pps.R
@@ -47,7 +47,7 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
   tbd_data_1 <- frame |>
     tidytable::mutate(
       rowNum = tidytable::row_number(),
-      ExpectedHits = .env$n * (!!(symbol_mos) / totalSize),
+      ExpectedHits = .env$n * (!!(symbol_mos) / .env$totalSize),
       SamplingWeight = .data$ExpectedHits^-1
     )
 
@@ -79,7 +79,7 @@ sys_pps <- function(frame, n, mos, outall = FALSE, curstrat = NULL) {
     # Need to zero filled NumberHits
     tidytable::mutate(
       NumberHits = tidytable::replace_na(.data$NumberHits, replace = 0),
-      SelectionIndicator = .data$rowNum %in% selectedVector,
+      SelectionIndicator = .data$rowNum %in% .env$selectedVector,
       # Make SamplingWeight to be NA if not selected
       SamplingWeight = tidytable::case_when(
         SelectionIndicator == TRUE ~ .data$SamplingWeight,

From c64fde0feaf68aab4d516f03cd964dfe9ffb81dd Mon Sep 17 00:00:00 2001
From: "Zimmer, Stephanie" <sazimmer+rtiintl@rti.org>
Date: Fri, 29 May 2026 15:08:20 -0400
Subject: [PATCH 4/4] Add test for sample frame with  column

---
 tests/testthat/test-select_sample.R | 147 ++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 tests/testthat/test-select_sample.R

diff --git a/tests/testthat/test-select_sample.R b/tests/testthat/test-select_sample.R
new file mode 100644
index 0000000..a593c31
--- /dev/null
+++ b/tests/testthat/test-select_sample.R
@@ -0,0 +1,147 @@
+test_that("n on frame does not cause issue - chromy_pps", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "chromy_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "chromy_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - srs", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "srs",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "srs",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - sys_pps", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "sys_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "sys_pps",
+      n = sampsizes,
+      strata = "Region",
+      mos = "Pop_Tot",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})
+
+test_that("n on frame does not cause issue - sys", {
+  county_2023_slim_n <- county_2023 |>
+    tidytable::select(GEOID, Region, Pop_Tot) |>
+    tidytable::mutate(
+      n = 50,
+      ExpHits_man = 10 * Pop_Tot / sum(Pop_Tot),
+      .by = "Region"
+    )
+
+  sampsizes <- county_2023_slim_n |>
+    tidytable::distinct(Region) |>
+    tidytable::mutate(sample_size = 10)
+
+  set.seed(12345)
+  samp1 <- county_2023_slim_n |>
+    select_sample(
+      "sys",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  set.seed(12345)
+  samp2 <- county_2023_slim_n |>
+    tidytable::select(-n) |>
+    select_sample(
+      "sys",
+      n = sampsizes,
+      strata = "Region",
+      outall = TRUE
+    )
+  expect_equal(
+    samp1 |> tidytable::select(-n),
+    samp2
+  )
+})