From 117b8b1625c644c531ef87bc10515674f93e83b5 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:17:09 -0400 Subject: [PATCH 01/10] fix: replace fixed-offset event parsing with level-aware sub-block parser Add extractGedcomLevel, extractEventSubBlock, extractInfoFromLines, and extractCoordFromSubBlock helpers so processEventLine looks up fields by tag name rather than fixed line offsets, correctly handling missing or reordered GEDCOM event sub-fields. Co-Authored-By: Claude Sonnet 4.6 --- R/readGedcom.R | 68 +++++++++++++++++++++++++------- tests/testthat/test-readGedcom.R | 60 ++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/R/readGedcom.R b/R/readGedcom.R index a9976f76..2e108d6a 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -363,31 +363,71 @@ parseNameLine <- function(line, record) { record } +extractGedcomLevel <- function(line) { + as.integer(stringr::str_extract(line, "^\\d+")) +} + +extractEventSubBlock <- function(block, start_idx) { + event_level <- extractGedcomLevel(block[start_idx]) + n <- length(block) + if (start_idx >= n) return(character(0)) + end_idx <- start_idx + for (j in (start_idx + 1L):n) { + if (extractGedcomLevel(block[j]) <= event_level) break + end_idx <- j + } + if (end_idx == start_idx) return(character(0)) + block[(start_idx + 1L):end_idx] +} + +extractInfoFromLines <- function(lines, tag) { + pattern <- paste0("\\b", tag, "\\b") + matches <- lines[grepl(pattern, lines)] + if (length(matches) == 0L) return(NA_character_) + extractInfo(matches[1L], tag) +} + +extractCoordFromSubBlock <- function(sub_block, tag) { + # Searches all levels of the sub-block so it handles: + # GEDCOM 5.5.x: LATI/LONG as direct children of the event + # GEDCOM 5.5.x standard: LATI/LONG under PLAC (level+2) + # GEDCOM 7.x: LATI/LONG under MAP under PLAC (level+3) + pattern <- paste0("\\b", tag, "\\b") + matches <- sub_block[grepl(pattern, sub_block)] + if (length(matches) == 0L) return(NA_character_) + extractInfo(matches[1L], tag) +} + #' Process Event Lines (Birth or Death) #' #' @description Extracts event details (e.g., date, place, cause, latitude, longitude) from a block of GEDCOM lines. -#' For "birth": expect DATE on line i+1, PLAC on i+2, LATI on i+4, LONG on i+5. -#' For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +#' Uses level-aware sub-block parsing so fields are looked up by tag name rather than fixed offsets. #' @param event A character string indicating the event type ("birth" or "death"). #' @param block A character vector of GEDCOM lines. #' @param i The current line index where the event tag is found. #' @param record A named list representing the individual's record. #' @param pattern_rows A list with counts of GEDCOM tag occurrences. -#' @return The updated record with parsed event information.# -# For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +#' @return The updated record with parsed event information. processEventLine <- function(event, block, i, record, pattern_rows) { - n_lines <- length(block) + sub_block <- extractEventSubBlock(block, i) + if (length(sub_block) == 0L) return(record) + + event_level <- extractGedcomLevel(block[i]) + direct_children <- sub_block[ + vapply(sub_block, extractGedcomLevel, integer(1L)) == event_level + 1L + ] + if (event == "birth") { - if (i + 1 <= n_lines) record$birth_date <- extractInfo(block[i + 1], "DATE") - if (i + 2 <= n_lines) record$birth_place <- extractInfo(block[i + 2], "PLAC") - if (i + 4 <= n_lines) record$birth_lat <- extractInfo(block[i + 4], "LATI") - if (i + 5 <= n_lines) record$birth_long <- extractInfo(block[i + 5], "LONG") + record$birth_date <- extractInfoFromLines(direct_children, "DATE") + record$birth_place <- extractInfoFromLines(direct_children, "PLAC") + record$birth_lat <- extractCoordFromSubBlock(sub_block, "LATI") + record$birth_long <- extractCoordFromSubBlock(sub_block, "LONG") } else if (event == "death") { - if (i + 1 <= n_lines) record$death_date <- extractInfo(block[i + 1], "DATE") - if (i + 2 <= n_lines) record$death_place <- extractInfo(block[i + 2], "PLAC") - if (i + 3 <= n_lines) record$death_caus <- extractInfo(block[i + 3], "CAUS") - if (i + 4 <= n_lines) record$death_lat <- extractInfo(block[i + 4], "LATI") - if (i + 5 <= n_lines) record$death_long <- extractInfo(block[i + 5], "LONG") + record$death_date <- extractInfoFromLines(direct_children, "DATE") + record$death_place <- extractInfoFromLines(direct_children, "PLAC") + record$death_caus <- extractInfoFromLines(direct_children, "CAUS") + record$death_lat <- extractCoordFromSubBlock(sub_block, "LATI") + record$death_long <- extractCoordFromSubBlock(sub_block, "LONG") } record } diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index ab11e915..f909321f 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -575,6 +575,66 @@ test_that("mapFAMS2parents warns and returns NULL when required columns are miss expect_null(out) }) +birt_no_date_content <- c( + "0 @I1@ INDI", + "1 NAME Alice /Jones/", + "1 SEX F", + "1 BIRT", + "2 PLAC Springfield", + "2 LATI N39.7817", + "2 LONG W89.6501" +) + +birt_no_plac_content <- c( + "0 @I1@ INDI", + "1 NAME Bob /Smith/", + "1 SEX M", + "1 BIRT", + "2 DATE 15 MAR 1955" +) + +birt_reordered_content <- c( + "0 @I1@ INDI", + "1 NAME Carol /Lee/", + "1 SEX F", + "1 BIRT", + "2 LATI N40.7128", + "2 LONG W74.0060", + "2 PLAC New York", + "2 DATE 4 JUL 1976" +) + +test_that("processEventLine handles missing DATE gracefully", { + temp_file <- tempfile(fileext = ".ged") + writeLines(birt_no_date_content, temp_file) + df <- readGedcom(temp_file, remove_empty_cols = FALSE) + expect_true(is.na(df$birth_date[1])) + expect_equal(df$birth_place[1], "Springfield") + expect_equal(df$birth_lat[1], "N39.7817") + expect_equal(df$birth_long[1], "W89.6501") + unlink(temp_file) +}) + +test_that("processEventLine handles missing PLAC gracefully", { + temp_file <- tempfile(fileext = ".ged") + writeLines(birt_no_plac_content, temp_file) + df <- readGedcom(temp_file, remove_empty_cols = FALSE) + expect_equal(df$birth_date[1], "15 MAR 1955") + expect_true(is.na(df$birth_place[1])) + unlink(temp_file) +}) + +test_that("processEventLine handles reordered subfields correctly", { + temp_file <- tempfile(fileext = ".ged") + writeLines(birt_reordered_content, temp_file) + df <- readGedcom(temp_file) + expect_equal(df$birth_date[1], "4 JUL 1976") + expect_equal(df$birth_place[1], "New York") + expect_equal(df$birth_lat[1], "N40.7128") + expect_equal(df$birth_long[1], "W74.0060") + unlink(temp_file) +}) + test_that("readGed and readgedcom aliases return the same output as readGedcom", { gedcom_content <- c( "0 @I1@ INDI", From 1763ec8c4944ab6c0bcbb9088fa2e1d4a1b481f6 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:24:05 -0400 Subject: [PATCH 02/10] docs: update stale fixed-offset description in readGedcom roxygen; clarify extractEventSubBlock guard assumption --- R/readGedcom.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/readGedcom.R b/R/readGedcom.R index 2e108d6a..433eb64c 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -20,12 +20,14 @@ #' including name prefix, name suffix, nickname, and married surname. #' #' Birth and death events are recognized from `BIRT` and `DEAT` tags. Event -#' details are currently parsed using fixed offsets within the individual block. -#' For birth events, the parser expects `DATE` at `i + 1`, `PLAC` at `i + 2`, -#' `LATI` at `i + 4`, and `LONG` at `i + 5`. For death events, the parser -#' expects `DATE` at `i + 1`, `PLAC` at `i + 2`, `CAUS` at `i + 3`, `LATI` at -#' `i + 4`, and `LONG` at `i + 5`. Missing elements leave the corresponding -#' output fields as `NA`. +#' details are parsed by collecting all child lines whose GEDCOM level equals +#' the event level plus one (direct children), then looking up sub-fields by +#' tag name. `DATE`, `PLAC`, and `CAUS` are matched as direct children of the +#' event. Coordinates (`LATI` and `LONG`) are searched across all descendant +#' lines, which allows them to be located whether they appear as direct children +#' (common in some GEDCOM 5.5.x exporters), under `PLAC` (standard GEDCOM +#' 5.5.1), or under a `MAP` substructure under `PLAC` (GEDCOM 7.x). Missing +#' sub-fields leave the corresponding output columns as `NA`. #' #' Attribute tags such as `OCCU`, `EDUC`, `RELI`, `CAST`, `NCHI`, `NMR`, `NATI`, #' `RESI`, `PROP`, `SSN`, `TITL`, `DSCR`, and `IDNO` are parsed directly into @@ -370,6 +372,8 @@ extractGedcomLevel <- function(line) { extractEventSubBlock <- function(block, start_idx) { event_level <- extractGedcomLevel(block[start_idx]) n <- length(block) + # start_idx is always within bounds because it comes from a bounded loop in the caller, + # but guard defensively to avoid the descending-sequence pitfall of R's : operator. if (start_idx >= n) return(character(0)) end_idx <- start_idx for (j in (start_idx + 1L):n) { From db59b9875f4e7d7f88d6db24c05d85e793518ce3 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:32:32 -0400 Subject: [PATCH 03/10] feat: bundle FamilySearch spec TSVs and add GEDCOM version detection Co-Authored-By: Claude Sonnet 4.6 --- R/helpReadGedcom.R | 25 +++++++++++++++++ R/readGedcom.R | 3 ++ R/sysdata.rda | Bin 0 -> 3326 bytes data-raw/gedcom_spec.R | 25 +++++++++++++++++ tests/testthat/test-readGedcom.R | 37 +++++++++++++++++++++++++ tests/testthat/test-readGedcomlegacy.R | 2 ++ 6 files changed, 92 insertions(+) create mode 100644 R/sysdata.rda create mode 100644 data-raw/gedcom_spec.R diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index ace867a1..094e35f1 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -34,6 +34,31 @@ collapseNames <- function(verbose, df_temp) { df_temp } +#' Detect GEDCOM Version from File Lines +#' +#' @param lines Character vector of lines from a GEDCOM file. +#' @return A string such as `"5.5.1"`, `"7.0"`, or `"unknown"`. +#' @keywords internal +detectGedcomVersion <- function(lines) { + head_idx <- which(grepl("^0 HEAD\\b", lines))[1L] + if (is.na(head_idx)) return("unknown") + + # End of HEAD is the next level-0 record + next_l0 <- which(grepl("^0 ", lines[(head_idx + 1L):length(lines)]))[1L] + head_end <- if (is.na(next_l0)) length(lines) else head_idx + next_l0 - 1L + head_block <- lines[head_idx:head_end] + + gedc_idx <- which(grepl("^1 GEDC\\b", head_block))[1L] + if (is.na(gedc_idx)) return("unknown") + + # Look ahead within HEAD block for the VERS line under GEDC + lookahead <- head_block[seq(gedc_idx + 1L, min(gedc_idx + 5L, length(head_block)))] + vers_line <- lookahead[grepl("^2 VERS\\b", lookahead)][1L] + if (is.na(vers_line)) return("unknown") + + stringr::str_trim(stringr::str_extract(vers_line, "(?<=VERS ).*")) +} + #' Combine Columns #' #' This function combines two columns, handling conflicts and merging non-conflicting data. diff --git a/R/readGedcom.R b/R/readGedcom.R index 433eb64c..57ef2bfa 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -123,6 +123,8 @@ readGedcom <- function(file_path, } if (verbose == TRUE) message("Reading file: ", file_path) lines <- readLines(file_path) + gedcom_version <- detectGedcomVersion(lines) + if (verbose) message("Detected GEDCOM version: ", gedcom_version) total_lines <- length(lines) if (verbose == TRUE) message("File is ", total_lines, " lines long") @@ -190,6 +192,7 @@ readGedcom <- function(file_path, ) } + attr(df_temp, "gedcom_version") <- gedcom_version df_temp } diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000000000000000000000000000000000000..3101c76d87cb486ce7bbf2ce3f8198703440f2ee GIT binary patch literal 3326 zcmV^|KPvp-{e351ONyC1QFm9 ze_vbc5gQHz($PUw0AeJbAP7^`^#e@O00000$Y>0K zkO8KL28|j9gFs}&21bTJ0BNQ`00RU7I5cR`G#UdYATl&E00T`j000;u0EJ42s$fL@ zP-d9{4I4=H7)AgA0gz~Bh+_~6QbY+oBT1m1k?I;WZA~;9XwcI@(?9?X4Kf-4i4h5) znKWrM6od6gr~^P~00001rho%GC>4jB8W1m`g6!tw9Iyh2nTRFE-tcx{6uhgyV)H~0 zNB{#NyZxhkbl3?7kd9v4N=s*NU*XN{Z~K2g2RTJt*eZA|AOMhd_dAM4F3ElnXm_0E zt6#_%f(FNmy?{=zL~BAoWB~pi2HZU&p}v^0epkS_p(I%569m*zulmLyH1fuGlE7+U z!xd4XVqv8^m3P1He(ei{L-{oAd-YIx>-_cA?bmIzP%uyxM%j!Oy`G(!#@UIH8+NdD zT!Nu6bX-i34DNlbC66`tSRlgS^FuI!su--H)NREDXjlSqWmi!tC`vC*6wlZpW))X} z5LI7gH|<>=oNqD+7XoLFi(W__W3fEucdf_*^PHh^5!FF}de=KRb*B#%2CIaKj40Zv z84({l?{)?z3=RXls^K%kHLfW^u7?cv0DumAl=9UPV{5zE;N-=UfCx0R3wnaVOc^0t zUb3jF@o^5B98|#of0zg4PC32}i25}w^Z-Rpj)H@s~ z>yMW}NKMo+Rpy}y;X|!e3@4Bz%qTj^!pLrO+Hz3$s?W5op44roYi(3D(t(@jfWKb~ z8dY@=l5+yK;e6{}RpsB82`h*shd@Y8!67>WLUmA*a;|fR=JE3D(87d1N)*p6F#P=S zvLV!5`RApFXd2I6_ntW~jd5cO;1*VAaH0VR3NjLN z#ob;=7_?|WXj-T~eMfPo#*xx#D6==D!C!Y4Okx&CNa+b3z#bmIGSaH2bViSSUHJB( zM+08>@3kJ;zhj1KLycdG_nS^S@sRf737$2$B%7#aRBz+V5@LZOgc`te)rO0puChur zs9V~IWQ41txdx!%ngfQrYuQ3@u8I<1LwQiynXUp)rEH-hhL8t+DhNW?6;9>U;k9d7 zxn<5BC0~``_I3)9v5M&_2?lsquKyKf%=5BZs|23#~(; zQj@Zw#iIUc6;0Yfb4aRSH%O{)5$p)}gm~-`muB@1Un{Cd#Hgx&PKgyyHY5In)DNZ( zo=-P1vVbl7pFDH?9DWtXfQhIwDq@DppWk5Y1FwF`;ykfdV9G+h##X zpuk6UN;x8tZB125r;vJ5rNK-!M}+cJyA+WR6;O?tBIVSCmydGQQ%(v5UPGmT1fDu~i+bfW#@Kt>@nm72k5+*D;1 zP*KcOY>?9zj8+jORn%Q;8fQtIZKWFFP`z}JGc&1XGEA_Ej8JU=SQ^y=Fbk`^-U|Q= zn?kIH#i%-wkOjdZwl4$>qY||SoWwCO#*mR9;&A}@2pyF;j)u^I89R0q)B|^Bkw@lI z-lfA7?3cF#ZKE=$p;XaR1G1+pApuZ~RM0%tA85(hEDiwfxKqaPQYv0dBA(Fz(FJhY zW|?Cq4Aza5vZbt3&_h%QumuKekcGZx@Q@)1X6~_kK%$u%WL^qtiPNOvX{%_Kw5I9a zUC7^fKL$3bccm<`;ED~RUZ|K* zk*t;pl^KoEnQo<8JEzZlbeOO)i88>(WbN%!+L75)&Ad>$JE2QP14`10XN>o{#djC7kSY=!IwWY`Q&Eay|3 zi8kMcwMM|@P<4*A+apwcHH>2k8?%C^gildb^bHj}fC>$DIzC__#0}5job$?Z=f=$m z4FM@aER-T_g)&6JWrHfJzyS*Ygh&iU95B#D9xkYBN|dQek}624l^-OGWJth*$Sszf z`YLLvfWmwLLsd*8=&7>vgJgascbHIpVeTGeo_kV*WV_PTF7v5EWRB2Iys4#>zCJ_Qa2x_R2lEH^XRMS^0puO;SoTR)rp36yD7N#|-wP%8-NUoHw zGS-yXdNn-xOC>w89+*BfDL}MPmYV7*=na9p!~heL0-n!V#vN=WG>Kypi)d1^Gi@E6 zk7*UH9o-!)vAUM^FC;KSaV38okrl_=~-k^Nx>#A`OdtgZr(NfQZ1Y(2;$u4w_(m7Kz zaIQ2PQ*26%2q^+YlvA`6)d6+uDH2gx!J$hPfki?K2+>b~5XST4(~bA4uNPzXiCSx_ zrJ`09Drjb`gQ9Awa8ki(>A6Ce&5}YP1Oh-nk|;5h87L}9mJkZ00FfA!8ES$g2yIc% zSd%tti{Qwh7>SU|K!;fg6g@?Ev((CD5?RUF2Uh})atOjf(@2R0C;BI|&{wTa@>e=n zm@B~wc)s&{*$J}%PobnH}BG0)qVrn|V?ql+{;xB?&HxPRTu3MLqR2qdXfDrHj9_-#tQ6^UR* zETyX}7^4RkEItOsTS`o+xsV!`RWzr-J{xMId5I?kVhv0YbBg0?jZ!irXv<|WV4M;p zN{ph$aS9kzeP*WaC_mQSW3~@;+zgmmm1hcOp_v5d>7Z_~@S{TekGEh{6;G3FevEx? zzWMFjQ|m%iK5&mt;f?Y0l@(1(uTD=*?9-yUDk`5#tR+*ywxmuzTpA~0M_&R*(e4J7 z6;FQa-=MlhPVFRYsH%UyI`y;5>+9D(p}P_)nn3)Oe2Mil&ay zK)8D1fp$6Z2L5jsek|y4NUC^$l19lGk()$yU3i1^gY{qg;C|*GiS9Tzo8SCh$rRy2 IK^}n|z(LmPssI20 literal 0 HcmV?d00001 diff --git a/data-raw/gedcom_spec.R b/data-raw/gedcom_spec.R new file mode 100644 index 00000000..1276903f --- /dev/null +++ b/data-raw/gedcom_spec.R @@ -0,0 +1,25 @@ +# Downloads the FamilySearch GEDCOM 7 spec TSVs and saves them as sysdata. +# Run once with: source("data-raw/gedcom_spec.R") + +base_url <- "https://raw.githubusercontent.com/FamilySearch/GEDCOM/main/extracted-files/" + +gedcom_substructures <- utils::read.table( + paste0(base_url, "substructures.tsv"), + sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "" +) +gedcom_payloads <- utils::read.table( + paste0(base_url, "payloads.tsv"), + sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "" +) +gedcom_enumerations <- utils::read.table( + paste0(base_url, "enumerations.tsv"), + sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "" +) + +usethis::use_data( + gedcom_substructures, + gedcom_payloads, + gedcom_enumerations, + internal = TRUE, + overwrite = TRUE +) diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index f909321f..644749ee 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -243,6 +243,8 @@ test_that("readGedcom parses death event correctly", { row.names(df) <- NULL row.names(df_leg) <- NULL df_leg <- dplyr::rename(df_leg, personID = id) + # Strip the gedcom_version attribute added by readGedcom before comparing + attr(df, "gedcom_version") <- NULL expect_equal(df_leg, df) unlink(temp_file) @@ -635,6 +637,41 @@ test_that("processEventLine handles reordered subfields correctly", { unlink(temp_file) }) +gedcom55_header <- c( + "0 HEAD", + "1 GEDC", + "2 VERS 5.5.1", + "2 FORM LINEAGE-LINKED", + "1 CHAR UTF-8", + "0 @I1@ INDI", + "1 NAME Test /Person/", + "1 SEX M" +) + +gedcom7_header <- c( + "0 HEAD", + "1 GEDC", + "2 VERS 7.0", + "1 CHAR UTF-8", + "0 @I1@ INDI", + "1 NAME Test /Person/", + "1 SEX M" +) + +test_that("detectGedcomVersion returns correct version string", { + expect_equal(detectGedcomVersion(gedcom55_header), "5.5.1") + expect_equal(detectGedcomVersion(gedcom7_header), "7.0") + expect_equal(detectGedcomVersion(c("0 @I1@ INDI", "1 NAME No /Head/")), "unknown") +}) + +test_that("readGedcom attaches gedcom_version attribute", { + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom55_header, temp_file) + df <- readGedcom(temp_file) + expect_equal(attr(df, "gedcom_version"), "5.5.1") + unlink(temp_file) +}) + test_that("readGed and readgedcom aliases return the same output as readGedcom", { gedcom_content <- c( "0 @I1@ INDI", diff --git a/tests/testthat/test-readGedcomlegacy.R b/tests/testthat/test-readGedcomlegacy.R index ea7a505c..70fe4251 100644 --- a/tests/testthat/test-readGedcomlegacy.R +++ b/tests/testthat/test-readGedcomlegacy.R @@ -32,6 +32,8 @@ test_that("readGedcom parses death event correctly for legacy", { row.names(df) <- NULL row.names(df_leg) <- NULL df_leg <- dplyr::rename(df_leg, personID = id) + # Strip the gedcom_version attribute added by readGedcom before comparing + attr(df, "gedcom_version") <- NULL expect_equal(df_leg, df) unlink(temp_file) From 475827f706d3a4f6e9c926dff1c37bc2167a1402 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:45:34 -0400 Subject: [PATCH 04/10] fix: guard detectGedcomVersion against NA return and descending seq edge case; add tests Co-Authored-By: Claude Sonnet 4.6 --- R/helpReadGedcom.R | 7 ++++++- R/readGedcom.R | 1 + tests/testthat/test-readGedcom.R | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 094e35f1..8d6fb3ab 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -51,12 +51,17 @@ detectGedcomVersion <- function(lines) { gedc_idx <- which(grepl("^1 GEDC\\b", head_block))[1L] if (is.na(gedc_idx)) return("unknown") + # Guard: if GEDC is the last line of HEAD, there is no VERS to look ahead to + if (gedc_idx >= length(head_block)) return("unknown") + # Look ahead within HEAD block for the VERS line under GEDC lookahead <- head_block[seq(gedc_idx + 1L, min(gedc_idx + 5L, length(head_block)))] vers_line <- lookahead[grepl("^2 VERS\\b", lookahead)][1L] if (is.na(vers_line)) return("unknown") - stringr::str_trim(stringr::str_extract(vers_line, "(?<=VERS ).*")) + val <- extractInfo(vers_line, "VERS") + if (is.na(val) || !nzchar(val)) return("unknown") + val } #' Combine Columns diff --git a/R/readGedcom.R b/R/readGedcom.R index 57ef2bfa..e3896410 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -167,6 +167,7 @@ readGedcom <- function(file_path, records <- Filter(Negate(is.null), records) if (length(records) == 0) { + # Returns NULL without a gedcom_version attribute; callers should check is.null() first. warning("No people found in file") return(NULL) } diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index 644749ee..a25a1848 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -672,6 +672,26 @@ test_that("readGedcom attaches gedcom_version attribute", { unlink(temp_file) }) +test_that("detectGedcomVersion returns unknown when GEDC is present but VERS is missing", { + lines <- c( + "0 HEAD", + "1 GEDC", + "1 CHAR UTF-8", + "0 @I1@ INDI", + "1 NAME Test /Person/", + "1 SEX M" + ) + expect_equal(detectGedcomVersion(lines), "unknown") +}) + +test_that("readGedcom attaches gedcom_version attribute with post_process = FALSE", { + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom55_header, temp_file) + df <- readGedcom(temp_file, post_process = FALSE) + expect_equal(attr(df, "gedcom_version"), "5.5.1") + unlink(temp_file) +}) + test_that("readGed and readgedcom aliases return the same output as readGedcom", { gedcom_content <- c( "0 @I1@ INDI", From 9b729db8b33582cd3bb07b35db395aef62ed3d99 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:54:45 -0400 Subject: [PATCH 05/10] feat: add GEDCOM 7.x MAP coordinate support and calendar-escape date stripping --- NAMESPACE | 2 + R/helpReadGedcom.R | 30 +++++++++++++++ R/readGedcom.R | 24 ++++++++---- _pkgdown.yml | 9 +++++ man/detectGedcomVersion.Rd | 18 +++++++++ man/gedcomLatToNumeric.Rd | 18 +++++++++ man/gedcomLonToNumeric.Rd | 18 +++++++++ man/processEventLine.Rd | 5 +-- man/readGedcom.Rd | 14 ++++--- tests/testthat/test-readGedcom.R | 65 ++++++++++++++++++++++++++++++++ 10 files changed, 186 insertions(+), 17 deletions(-) create mode 100644 man/detectGedcomVersion.Rd create mode 100644 man/gedcomLatToNumeric.Rd create mode 100644 man/gedcomLonToNumeric.Rd diff --git a/NAMESPACE b/NAMESPACE index 86ddec85..b543f584 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,8 @@ export(dropLink) export(findLeaves) export(fitComponentModel) export(fitPedigreeModel) +export(gedcomLatToNumeric) +export(gedcomLonToNumeric) export(getGenDist) export(getWikiTreeSummary) export(identifyComponentModel) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 8d6fb3ab..2017921e 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -64,6 +64,36 @@ detectGedcomVersion <- function(lines) { val } +#' Convert GEDCOM Latitude String to Numeric +#' +#' Converts GEDCOM-style latitude strings like `"N51.5074"` or `"S33.8688"` to +#' signed decimal degrees. Returns `NA` for `NA` input. +#' +#' @param x Character vector of GEDCOM latitude values. +#' @return Numeric vector of decimal degrees (positive = N, negative = S). +#' @export +gedcomLatToNumeric <- function(x) { + ifelse(is.na(x), NA_real_, { + sign <- ifelse(startsWith(x, "N"), 1, -1) + sign * as.numeric(substring(x, 2)) + }) +} + +#' Convert GEDCOM Longitude String to Numeric +#' +#' Converts GEDCOM-style longitude strings like `"E151.2093"` or `"W0.1278"` to +#' signed decimal degrees. Returns `NA` for `NA` input. +#' +#' @param x Character vector of GEDCOM longitude values. +#' @return Numeric vector of decimal degrees (positive = E, negative = W). +#' @export +gedcomLonToNumeric <- function(x) { + ifelse(is.na(x), NA_real_, { + sign <- ifelse(startsWith(x, "E"), 1, -1) + sign * as.numeric(substring(x, 2)) + }) +} + #' Combine Columns #' #' This function combines two columns, handling conflicts and merging non-conflicting data. diff --git a/R/readGedcom.R b/R/readGedcom.R index e3896410..7eccfe8c 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -616,21 +616,29 @@ postProcessGedcom <- function(df_temp, } if (parse_dates == TRUE) { date_cols <- c("birth_date", "death_date") - if (verbose == TRUE) message("Parsing date columns: ", paste(date_cols[date_cols %in% colnames(df_temp)], collapse = ", ")) - # GEDCOM date qualifiers like "ABT", "BEF", "AFT" can be present in date strings. We can remove them before parsing. + calendar_escape_regex <- "@#D[A-Z]+@\\s*" date_qualifier_regex <- "\\b(?:[aA][bBfF][tT]|[bB][eE][tTfF])\\.?\\b\\s*" - if (verbose == TRUE && any(sapply(df_temp[date_cols], function(col) any(grepl(date_qualifier_regex, col, perl = TRUE)))) - ) { - message("Found date qualifiers in date columns. They will be removed before parsing.") + if (verbose == TRUE) message("Parsing date columns: ", paste(date_cols[date_cols %in% colnames(df_temp)], collapse = ", ")) + + if (verbose == TRUE && any(date_cols %in% colnames(df_temp))) { + has_qualifiers <- any(sapply( + df_temp[date_cols[date_cols %in% colnames(df_temp)]], + function(col) any(grepl(date_qualifier_regex, col, perl = TRUE)) + )) + if (has_qualifiers) { + message("Found date qualifiers in date columns. They will be removed before parsing.") + } } # only parse date columns that are present in the data frame - if (any(date_cols %in% colnames(df_temp))) { - df_temp[date_cols] <- lapply(df_temp[date_cols], function(x) { + present_date_cols <- date_cols[date_cols %in% colnames(df_temp)] + if (length(present_date_cols) > 0) { + df_temp[present_date_cols] <- lapply(df_temp[present_date_cols], function(x) { if (is.character(x)) { + x <- stringr::str_replace_all(x, calendar_escape_regex, "") x <- stringr::str_replace_all(x, date_qualifier_regex, "") - as.Date(x, format = "%d %b %Y") + as.Date(stringr::str_trim(x), format = "%d %b %Y") } else { x } diff --git a/_pkgdown.yml b/_pkgdown.yml index fbd085d6..7f59e059 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,12 @@ url: https://r-computing-lab.github.io/BGmisc template: bootstrap: 5 + +reference: + - title: GEDCOM I/O + contents: + - readGedcom + - readGed + - readgedcom + - gedcomLatToNumeric + - gedcomLonToNumeric diff --git a/man/detectGedcomVersion.Rd b/man/detectGedcomVersion.Rd new file mode 100644 index 00000000..d218f287 --- /dev/null +++ b/man/detectGedcomVersion.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpReadGedcom.R +\name{detectGedcomVersion} +\alias{detectGedcomVersion} +\title{Detect GEDCOM Version from File Lines} +\usage{ +detectGedcomVersion(lines) +} +\arguments{ +\item{lines}{Character vector of lines from a GEDCOM file.} +} +\value{ +A string such as `"5.5.1"`, `"7.0"`, or `"unknown"`. +} +\description{ +Detect GEDCOM Version from File Lines +} +\keyword{internal} diff --git a/man/gedcomLatToNumeric.Rd b/man/gedcomLatToNumeric.Rd new file mode 100644 index 00000000..a95df188 --- /dev/null +++ b/man/gedcomLatToNumeric.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpReadGedcom.R +\name{gedcomLatToNumeric} +\alias{gedcomLatToNumeric} +\title{Convert GEDCOM Latitude String to Numeric} +\usage{ +gedcomLatToNumeric(x) +} +\arguments{ +\item{x}{Character vector of GEDCOM latitude values.} +} +\value{ +Numeric vector of decimal degrees (positive = N, negative = S). +} +\description{ +Converts GEDCOM-style latitude strings like `"N51.5074"` or `"S33.8688"` to +signed decimal degrees. Returns `NA` for `NA` input. +} diff --git a/man/gedcomLonToNumeric.Rd b/man/gedcomLonToNumeric.Rd new file mode 100644 index 00000000..4bad72f7 --- /dev/null +++ b/man/gedcomLonToNumeric.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpReadGedcom.R +\name{gedcomLonToNumeric} +\alias{gedcomLonToNumeric} +\title{Convert GEDCOM Longitude String to Numeric} +\usage{ +gedcomLonToNumeric(x) +} +\arguments{ +\item{x}{Character vector of GEDCOM longitude values.} +} +\value{ +Numeric vector of decimal degrees (positive = E, negative = W). +} +\description{ +Converts GEDCOM-style longitude strings like `"E151.2093"` or `"W0.1278"` to +signed decimal degrees. Returns `NA` for `NA` input. +} diff --git a/man/processEventLine.Rd b/man/processEventLine.Rd index d4cff3d3..40bb9417 100644 --- a/man/processEventLine.Rd +++ b/man/processEventLine.Rd @@ -18,10 +18,9 @@ processEventLine(event, block, i, record, pattern_rows) \item{pattern_rows}{A list with counts of GEDCOM tag occurrences.} } \value{ -The updated record with parsed event information.# +The updated record with parsed event information. } \description{ Extracts event details (e.g., date, place, cause, latitude, longitude) from a block of GEDCOM lines. -For "birth": expect DATE on line i+1, PLAC on i+2, LATI on i+4, LONG on i+5. -For "death": expect DATE on line i+1, PLAC on i+2, CAUS on i+3, LATI on i+4, LONG on i+5. +Uses level-aware sub-block parsing so fields are looked up by tag name rather than fixed offsets. } diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index 2350716b..8005f773 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -139,12 +139,14 @@ cleaned full name. Additional name components are parsed when present, including name prefix, name suffix, nickname, and married surname. Birth and death events are recognized from `BIRT` and `DEAT` tags. Event -details are currently parsed using fixed offsets within the individual block. -For birth events, the parser expects `DATE` at `i + 1`, `PLAC` at `i + 2`, -`LATI` at `i + 4`, and `LONG` at `i + 5`. For death events, the parser -expects `DATE` at `i + 1`, `PLAC` at `i + 2`, `CAUS` at `i + 3`, `LATI` at -`i + 4`, and `LONG` at `i + 5`. Missing elements leave the corresponding -output fields as `NA`. +details are parsed by collecting all child lines whose GEDCOM level equals +the event level plus one (direct children), then looking up sub-fields by +tag name. `DATE`, `PLAC`, and `CAUS` are matched as direct children of the +event. Coordinates (`LATI` and `LONG`) are searched across all descendant +lines, which allows them to be located whether they appear as direct children +(common in some GEDCOM 5.5.x exporters), under `PLAC` (standard GEDCOM +5.5.1), or under a `MAP` substructure under `PLAC` (GEDCOM 7.x). Missing +sub-fields leave the corresponding output columns as `NA`. Attribute tags such as `OCCU`, `EDUC`, `RELI`, `CAST`, `NCHI`, `NMR`, `NATI`, `RESI`, `PROP`, `SSN`, `TITL`, `DSCR`, and `IDNO` are parsed directly into diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index a25a1848..5f7aa269 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -715,3 +715,68 @@ test_that("readGed and readgedcom aliases return the same output as readGedcom", unlink(temp_file) }) + +gedcom7_map_content <- c( + "0 HEAD", + "1 GEDC", + "2 VERS 7.0", + "0 @I1@ INDI", + "1 NAME Test /Person/", + "1 SEX F", + "1 BIRT", + "2 DATE 1 JAN 2000", + "2 PLAC London, England", + "3 MAP", + "4 LATI N51.5074", + "4 LONG W0.1278", + "1 DEAT", + "2 DATE 31 DEC 2080", + "2 PLAC Edinburgh, Scotland", + "3 MAP", + "4 LATI N55.9533", + "4 LONG W3.1883" +) + +gedcom_calendar_escape_content <- c( + "0 HEAD", + "1 GEDC", + "2 VERS 5.5", + "0 @I1@ INDI", + "1 NAME Old /Style/", + "1 SEX M", + "1 BIRT", + "2 DATE @#DGREGORIAN@ 15 JUL 1823" +) + +test_that("readGedcom parses GEDCOM 7.x MAP coordinate structure", { + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom7_map_content, temp_file) + df <- readGedcom(temp_file) + expect_equal(attr(df, "gedcom_version"), "7.0") + expect_equal(df$birth_lat[1], "N51.5074") + expect_equal(df$birth_long[1], "W0.1278") + expect_equal(df$death_lat[1], "N55.9533") + expect_equal(df$death_long[1], "W3.1883") + unlink(temp_file) +}) + +test_that("parse_dates strips GEDCOM 5.5 calendar escape before parsing", { + temp_file <- tempfile(fileext = ".ged") + writeLines(gedcom_calendar_escape_content, temp_file) + df <- readGedcom(temp_file, parse_dates = TRUE) + expect_false(is.na(df$birth_date[1])) + expect_equal(format(df$birth_date[1], "%d %b %Y"), "15 Jul 1823") + unlink(temp_file) +}) + +test_that("gedcomLatToNumeric converts N/S notation correctly", { + expect_equal(gedcomLatToNumeric("N51.5074"), 51.5074) + expect_equal(gedcomLatToNumeric("S33.8688"), -33.8688) + expect_true(is.na(gedcomLatToNumeric(NA_character_))) +}) + +test_that("gedcomLonToNumeric converts E/W notation correctly", { + expect_equal(gedcomLonToNumeric("E151.2093"), 151.2093) + expect_equal(gedcomLonToNumeric("W0.1278"), -0.1278) + expect_true(is.na(gedcomLonToNumeric(NA_character_))) +}) From 1f994f9688185e6de225f0296cd2ab2c20cb95a8 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:02:38 -0400 Subject: [PATCH 06/10] fix: guard coordinate helpers against invalid prefix; fix French Republican calendar escape; fix pkgdown catch-all --- R/helpReadGedcom.R | 24 ++++++++++++++---------- R/readGedcom.R | 2 +- _pkgdown.yml | 4 ++++ data-raw/df_royal92.R | 4 ++-- man/gedcomLatToNumeric.Rd | 5 ++++- man/gedcomLonToNumeric.Rd | 5 ++++- tests/testthat/test-readGedcom.R | 10 ++++++++++ 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 2017921e..470183cf 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -67,31 +67,35 @@ detectGedcomVersion <- function(lines) { #' Convert GEDCOM Latitude String to Numeric #' #' Converts GEDCOM-style latitude strings like `"N51.5074"` or `"S33.8688"` to -#' signed decimal degrees. Returns `NA` for `NA` input. +#' signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. #' #' @param x Character vector of GEDCOM latitude values. #' @return Numeric vector of decimal degrees (positive = N, negative = S). +#' @examples +#' gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) #' @export gedcomLatToNumeric <- function(x) { - ifelse(is.na(x), NA_real_, { - sign <- ifelse(startsWith(x, "N"), 1, -1) - sign * as.numeric(substring(x, 2)) - }) + is_north <- startsWith(x, "N") + is_valid <- is_north | startsWith(x, "S") + result <- as.numeric(substring(x, 2)) * ifelse(is_north, 1, -1) + ifelse(is.na(x) | !is_valid, NA_real_, result) } #' Convert GEDCOM Longitude String to Numeric #' #' Converts GEDCOM-style longitude strings like `"E151.2093"` or `"W0.1278"` to -#' signed decimal degrees. Returns `NA` for `NA` input. +#' signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. #' #' @param x Character vector of GEDCOM longitude values. #' @return Numeric vector of decimal degrees (positive = E, negative = W). +#' @examples +#' gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) #' @export gedcomLonToNumeric <- function(x) { - ifelse(is.na(x), NA_real_, { - sign <- ifelse(startsWith(x, "E"), 1, -1) - sign * as.numeric(substring(x, 2)) - }) + is_east <- startsWith(x, "E") + is_valid <- is_east | startsWith(x, "W") + result <- as.numeric(substring(x, 2)) * ifelse(is_east, 1, -1) + ifelse(is.na(x) | !is_valid, NA_real_, result) } #' Combine Columns diff --git a/R/readGedcom.R b/R/readGedcom.R index 7eccfe8c..7a61fc51 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -616,7 +616,7 @@ postProcessGedcom <- function(df_temp, } if (parse_dates == TRUE) { date_cols <- c("birth_date", "death_date") - calendar_escape_regex <- "@#D[A-Z]+@\\s*" + calendar_escape_regex <- "@#D[A-Z ]+@\\s*" date_qualifier_regex <- "\\b(?:[aA][bBfF][tT]|[bB][eE][tTfF])\\.?\\b\\s*" if (verbose == TRUE) message("Parsing date columns: ", paste(date_cols[date_cols %in% colnames(df_temp)], collapse = ", ")) diff --git a/_pkgdown.yml b/_pkgdown.yml index 7f59e059..2ba475c0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -10,3 +10,7 @@ reference: - readgedcom - gedcomLatToNumeric - gedcomLonToNumeric + +- title: Other functions + contents: + - matches(".*") diff --git a/data-raw/df_royal92.R b/data-raw/df_royal92.R index f0f55a4f..008f65a5 100644 --- a/data-raw/df_royal92.R +++ b/data-raw/df_royal92.R @@ -3485,8 +3485,8 @@ if (FALSE) { code_male = "M", code_female = "F", add_phantoms = TRUE, - ped_packed=T, - ped_align = T + ped_packed=TRUE, + ped_align = TRUE ), tooltip_columns = c("personID", "name", "title", "birth_date", "death_date") diff --git a/man/gedcomLatToNumeric.Rd b/man/gedcomLatToNumeric.Rd index a95df188..a677b133 100644 --- a/man/gedcomLatToNumeric.Rd +++ b/man/gedcomLatToNumeric.Rd @@ -14,5 +14,8 @@ Numeric vector of decimal degrees (positive = N, negative = S). } \description{ Converts GEDCOM-style latitude strings like `"N51.5074"` or `"S33.8688"` to -signed decimal degrees. Returns `NA` for `NA` input. +signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. +} +\examples{ +gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) } diff --git a/man/gedcomLonToNumeric.Rd b/man/gedcomLonToNumeric.Rd index 4bad72f7..d524d083 100644 --- a/man/gedcomLonToNumeric.Rd +++ b/man/gedcomLonToNumeric.Rd @@ -14,5 +14,8 @@ Numeric vector of decimal degrees (positive = E, negative = W). } \description{ Converts GEDCOM-style longitude strings like `"E151.2093"` or `"W0.1278"` to -signed decimal degrees. Returns `NA` for `NA` input. +signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. +} +\examples{ +gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) } diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index 5f7aa269..4d8124ed 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -780,3 +780,13 @@ test_that("gedcomLonToNumeric converts E/W notation correctly", { expect_equal(gedcomLonToNumeric("W0.1278"), -0.1278) expect_true(is.na(gedcomLonToNumeric(NA_character_))) }) + +test_that("gedcomLatToNumeric returns NA for unrecognised prefix", { + expect_true(is.na(gedcomLatToNumeric("12.34"))) + expect_true(is.na(gedcomLatToNumeric(""))) +}) + +test_that("gedcomLonToNumeric returns NA for unrecognised prefix", { + expect_true(is.na(gedcomLonToNumeric("12.34"))) + expect_true(is.na(gedcomLonToNumeric(""))) +}) From 002c78dbbe0cd7cf0930cd9028df28fd753a7480 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:14:07 -0400 Subject: [PATCH 07/10] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index d86e48ff..d1b910c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * Optimized gedcom reader, com2links for speed and memory usage, with a focus on large pedigrees * Fixed bug in gedcom reader that resulted in document records being added to the final person in the pedigree * Added more unit tests for gedcom reader and data parser +* several improvements to the GEDCOM parsing functionality, focusing on more robust and flexible event parsing, better support for different GEDCOM versions, and enhanced usability. * Optimized sliceFamilies to be more abstract, and no longer require mtdna * Created `.require_openmx()` to make it easier to use OpenMx functions without making OpenMx a dependency * Smarter string ID handling for ped2id From 7da356d7ef9cdc5771634bc73f5d0ec57cc55729 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 6 Jun 2026 15:21:27 -0400 Subject: [PATCH 08/10] Potential fix for pull request finding Potential fix for pull request finding Potential fix for pull request finding Potential fix for pull request finding Potential fix for pull request finding Potential fix for pull request finding Potential fix for pull request finding Co-Authored-By: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- R/helpReadGedcom.R | 17 +++++++++-------- R/readGedcom.R | 4 +++- _pkgdown.yml | 6 +++--- tests/testthat/test-readGedcom.R | 8 ++++---- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 470183cf..2207ae22 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -44,6 +44,7 @@ detectGedcomVersion <- function(lines) { if (is.na(head_idx)) return("unknown") # End of HEAD is the next level-0 record + if (head_idx >= length(lines)) return("unknown") next_l0 <- which(grepl("^0 ", lines[(head_idx + 1L):length(lines)]))[1L] head_end <- if (is.na(next_l0)) length(lines) else head_idx + next_l0 - 1L head_block <- lines[head_idx:head_end] @@ -75,10 +76,10 @@ detectGedcomVersion <- function(lines) { #' gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) #' @export gedcomLatToNumeric <- function(x) { - is_north <- startsWith(x, "N") - is_valid <- is_north | startsWith(x, "S") - result <- as.numeric(substring(x, 2)) * ifelse(is_north, 1, -1) - ifelse(is.na(x) | !is_valid, NA_real_, result) + out <- rep(NA_real_, length(x)) + ok <- !is.na(x) & (startsWith(x, "N") | startsWith(x, "S")) + out[ok] <- as.numeric(substring(x[ok], 2)) * ifelse(startsWith(x[ok], "N"), 1, -1) + out } #' Convert GEDCOM Longitude String to Numeric @@ -92,10 +93,10 @@ gedcomLatToNumeric <- function(x) { #' gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) #' @export gedcomLonToNumeric <- function(x) { - is_east <- startsWith(x, "E") - is_valid <- is_east | startsWith(x, "W") - result <- as.numeric(substring(x, 2)) * ifelse(is_east, 1, -1) - ifelse(is.na(x) | !is_valid, NA_real_, result) + out <- rep(NA_real_, length(x)) + ok <- !is.na(x) & (startsWith(x, "E") | startsWith(x, "W")) + out[ok] <- as.numeric(substring(x[ok], 2)) * ifelse(startsWith(x[ok], "E"), 1, -1) + out } #' Combine Columns diff --git a/R/readGedcom.R b/R/readGedcom.R index 7a61fc51..5359cfa6 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -381,7 +381,9 @@ extractEventSubBlock <- function(block, start_idx) { if (start_idx >= n) return(character(0)) end_idx <- start_idx for (j in (start_idx + 1L):n) { - if (extractGedcomLevel(block[j]) <= event_level) break + lvl <- extractGedcomLevel(block[j]) + if (is.na(lvl)) next + if (lvl <= event_level) break end_idx <- j } if (end_idx == start_idx) return(character(0)) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2ba475c0..5c962523 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -11,6 +11,6 @@ reference: - gedcomLatToNumeric - gedcomLonToNumeric -- title: Other functions - contents: - - matches(".*") + - title: Other functions + contents: + - matches(".*") diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index 4d8124ed..1eb3cf48 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -659,9 +659,9 @@ gedcom7_header <- c( ) test_that("detectGedcomVersion returns correct version string", { - expect_equal(detectGedcomVersion(gedcom55_header), "5.5.1") - expect_equal(detectGedcomVersion(gedcom7_header), "7.0") - expect_equal(detectGedcomVersion(c("0 @I1@ INDI", "1 NAME No /Head/")), "unknown") + expect_equal(BGmisc:::detectGedcomVersion(gedcom55_header), "5.5.1") + expect_equal(BGmisc:::detectGedcomVersion(gedcom7_header), "7.0") + expect_equal(BGmisc:::detectGedcomVersion(c("0 @I1@ INDI", "1 NAME No /Head/")), "unknown") }) test_that("readGedcom attaches gedcom_version attribute", { @@ -681,7 +681,7 @@ test_that("detectGedcomVersion returns unknown when GEDC is present but VERS is "1 NAME Test /Person/", "1 SEX M" ) - expect_equal(detectGedcomVersion(lines), "unknown") + expect_equal(BGmisc:::detectGedcomVersion(lines), "unknown") }) test_that("readGedcom attaches gedcom_version attribute with post_process = FALSE", { From 872530982edcc7ff809113f76dfc12d9dccef0ae Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:17:38 -0400 Subject: [PATCH 09/10] cleanup --- NAMESPACE | 2 - R/helpReadGedcom.R | 6 +-- R/readGedcom.R | 57 ++++++++++++++++++-------- man/gedcomLatToNumeric.Rd | 1 + man/gedcomLonToNumeric.Rd | 1 + man/postProcessGedcom.Rd | 17 +++++--- man/readGedcom.Rd | 20 +++++---- tests/testthat/test-readGedcom.R | 8 ++-- tests/testthat/test-readGedcomlegacy.R | 4 +- 9 files changed, 75 insertions(+), 41 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b543f584..86ddec85 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,8 +25,6 @@ export(dropLink) export(findLeaves) export(fitComponentModel) export(fitPedigreeModel) -export(gedcomLatToNumeric) -export(gedcomLonToNumeric) export(getGenDist) export(getWikiTreeSummary) export(identifyComponentModel) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 2207ae22..0a128f33 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -18,7 +18,7 @@ initializeRecord <- function(all_var_names) { #' @param df_temp A data frame containing the columns to be combined. #' @return A data frame with the combined columns. collapseNames <- function(verbose, df_temp) { - if (verbose == TRUE) message("Combining Duplicate Columns") + if (verbose == TRUE) message("Combining Duplicate Name Columns...") if (!all(is.na(df_temp$name_given_pieces)) || !all(is.na(df_temp$name_given))) { result <- combineColumns(df_temp$name_given, df_temp$name_given_pieces) @@ -74,7 +74,7 @@ detectGedcomVersion <- function(lines) { #' @return Numeric vector of decimal degrees (positive = N, negative = S). #' @examples #' gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) -#' @export +#' @keywords internal gedcomLatToNumeric <- function(x) { out <- rep(NA_real_, length(x)) ok <- !is.na(x) & (startsWith(x, "N") | startsWith(x, "S")) @@ -91,7 +91,7 @@ gedcomLatToNumeric <- function(x) { #' @return Numeric vector of decimal degrees (positive = E, negative = W). #' @examples #' gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) -#' @export +#' @keywords internal gedcomLonToNumeric <- function(x) { out <- rep(NA_real_, length(x)) ok <- !is.na(x) & (startsWith(x, "E") | startsWith(x, "W")) diff --git a/R/readGedcom.R b/R/readGedcom.R index 5359cfa6..6f4a8c0f 100644 --- a/R/readGedcom.R +++ b/R/readGedcom.R @@ -47,20 +47,22 @@ #' #' @param file_path Character string. Path to the GEDCOM file. #' @param verbose Logical. If `TRUE`, print progress messages. -#' @param add_parents Logical. If `TRUE`, infer `momID` and `dadID` from `FAMC` -#' and `FAMS` mappings during post-processing. #' @param remove_empty_cols Logical. If `TRUE`, drop columns that are entirely #' `NA` during post-processing. -#' @param combine_cols Logical. If `TRUE`, combine redundant name columns, such -#' as `name_given` with `name_given_pieces` and `name_surn` with -#' `name_surn_pieces`, when their values do not conflict. -#' @param parse_dates Logical. If `TRUE`, attempt to parse date columns (e.g., `birth_date`, `death_date`) into Date objects, after removing common GEDCOM date qualifiers like "ABT", "BEF", and "AFT". #' @param skinny Logical. If `TRUE`, return a slimmer data frame by dropping #' `FAMC`, `FAMS`, and columns that are entirely `NA` during post-processing. #' @param update_rate Numeric. Intended rate at which progress messages should #' be printed. Currently unused. #' @param post_process Logical. If `TRUE`, apply post-processing steps controlled #' by `add_parents`, `combine_cols`, `remove_empty_cols`, `skinny`, and `parse_dates`. +#' @param remove_empty_cols Logical indicating whether to remove columns that are entirely missing. +#' @param combine_cols Logical. If `TRUE`, combine redundant name columns, such +#' as `name_given` with `name_given_pieces` and `name_surn` with +#' `name_surn_pieces`, when their values do not conflict. +#' @param add_parents Logical. If `TRUE`, infer `momID` and `dadID` from `FAMC` +#' and `FAMS` mappings during post-processing. +#' @param parse_dates Logical. If `TRUE`, attempt to parse date columns (e.g., `birth_date`, `death_date`) into Date objects, after removing common GEDCOM date qualifiers like "ABT", "BEF", and "AFT". +#' @param clean_names Logical indicating whether to clean name columns by removing trailing slashes and squishing whitespace. #' @param ... Additional arguments. Currently unused. #' @return A data frame containing information about individuals, with the following potential columns: #' \describe{ @@ -109,13 +111,15 @@ #' readGedcom <- function(file_path, verbose = FALSE, + post_process = TRUE, add_parents = TRUE, remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, parse_dates = FALSE, + clean_names = TRUE, update_rate = 1000, - post_process = TRUE, + ...) { # Ensure the file exists and read all lines. if (!file.exists(file_path)) { @@ -188,6 +192,7 @@ readGedcom <- function(file_path, combine_cols = combine_cols, parse_dates = parse_dates, add_parents = add_parents, + clean_names = clean_names, skinny = skinny, verbose = verbose ) @@ -460,8 +465,11 @@ processEventLine <- function(event, block, i, record, pattern_rows) { applyTagMappings <- function(line, record, pattern_rows, tag_mappings) { for (mapping in tag_mappings) { extractor <- if (is.null(mapping$extractor)) NULL else mapping$extractor - result <- processTag(mapping$tag, mapping$field, pattern_rows, line, record, - extractor = extractor, mode = mapping$mode + result <- processTag(mapping$tag, + mapping$field, + pattern_rows, line, record, + extractor = extractor, + mode = mapping$mode ) record <- result$vars if (result$matched) { @@ -589,20 +597,16 @@ processTag <- function(tag, #' #' @description This function optionally adds parent information, combines duplicate columns, #' and removes empty columns from the GEDCOM data frame. It is called by \code{readGedcom()} if \code{post_process = TRUE}. -#' +#' @inheritParams readGedcom #' @param df_temp A data frame produced by \code{readGedcom()}. -#' @param remove_empty_cols Logical indicating whether to remove columns that are entirely missing. -#' @param combine_cols Logical indicating whether to combine columns with duplicate values. -#' @param add_parents Logical indicating whether to add parent information. -#' @param parse_dates Logical indicating whether to parse date columns into Date objects. -#' @param skinny Logical indicating whether to slim down the data frame. #' @param verbose Logical indicating whether to print progress messages. #' @return The post-processed data frame. postProcessGedcom <- function(df_temp, remove_empty_cols = TRUE, combine_cols = TRUE, - parse_dates = FALSE, add_parents = TRUE, + parse_dates = FALSE, + clean_names = TRUE, skinny = TRUE, verbose = FALSE) { if (add_parents == TRUE) { @@ -621,14 +625,16 @@ postProcessGedcom <- function(df_temp, calendar_escape_regex <- "@#D[A-Z ]+@\\s*" date_qualifier_regex <- "\\b(?:[aA][bBfF][tT]|[bB][eE][tTfF])\\.?\\b\\s*" - if (verbose == TRUE) message("Parsing date columns: ", paste(date_cols[date_cols %in% colnames(df_temp)], collapse = ", ")) + if (verbose == TRUE) { + message("Parsing date columns: ", paste(date_cols[date_cols %in% colnames(df_temp)], collapse = ", ")) + } if (verbose == TRUE && any(date_cols %in% colnames(df_temp))) { has_qualifiers <- any(sapply( df_temp[date_cols[date_cols %in% colnames(df_temp)]], function(col) any(grepl(date_qualifier_regex, col, perl = TRUE)) )) - if (has_qualifiers) { + if (has_qualifiers==TRUE) { message("Found date qualifiers in date columns. They will be removed before parsing.") } } @@ -647,6 +653,21 @@ postProcessGedcom <- function(df_temp, }) } } + if (clean_names == TRUE) { + if (verbose == TRUE) message("Cleaning column names") + name_cols <- grep("^name", colnames(df_temp), value = TRUE) + if (verbose == TRUE && any(name_cols %in% colnames(df_temp))) { + message("Cleaning name columns: ", paste(name_cols, collapse = ", ")) + } + df_temp[name_cols] <- lapply(df_temp[name_cols], function(x) { + if (is.character(x)) { # remove / at end of names if present, and squish whitespace + stringr::str_squish(stringr::str_replace(x,"/+$", "")) + } else { + x + } + } + ) + } if (skinny == TRUE) { if (verbose == TRUE) message("Slimming down the data frame") # Remove raw family relationship columns diff --git a/man/gedcomLatToNumeric.Rd b/man/gedcomLatToNumeric.Rd index a677b133..07252bf2 100644 --- a/man/gedcomLatToNumeric.Rd +++ b/man/gedcomLatToNumeric.Rd @@ -19,3 +19,4 @@ signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. \examples{ gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) } +\keyword{internal} diff --git a/man/gedcomLonToNumeric.Rd b/man/gedcomLonToNumeric.Rd index d524d083..37a92d4a 100644 --- a/man/gedcomLonToNumeric.Rd +++ b/man/gedcomLonToNumeric.Rd @@ -19,3 +19,4 @@ signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. \examples{ gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) } +\keyword{internal} diff --git a/man/postProcessGedcom.Rd b/man/postProcessGedcom.Rd index f1449735..c91179e8 100644 --- a/man/postProcessGedcom.Rd +++ b/man/postProcessGedcom.Rd @@ -8,8 +8,9 @@ postProcessGedcom( df_temp, remove_empty_cols = TRUE, combine_cols = TRUE, - parse_dates = FALSE, add_parents = TRUE, + parse_dates = FALSE, + clean_names = TRUE, skinny = TRUE, verbose = FALSE ) @@ -19,13 +20,19 @@ postProcessGedcom( \item{remove_empty_cols}{Logical indicating whether to remove columns that are entirely missing.} -\item{combine_cols}{Logical indicating whether to combine columns with duplicate values.} +\item{combine_cols}{Logical. If `TRUE`, combine redundant name columns, such +as `name_given` with `name_given_pieces` and `name_surn` with +`name_surn_pieces`, when their values do not conflict.} + +\item{add_parents}{Logical. If `TRUE`, infer `momID` and `dadID` from `FAMC` +and `FAMS` mappings during post-processing.} -\item{parse_dates}{Logical indicating whether to parse date columns into Date objects.} +\item{parse_dates}{Logical. If `TRUE`, attempt to parse date columns (e.g., `birth_date`, `death_date`) into Date objects, after removing common GEDCOM date qualifiers like "ABT", "BEF", and "AFT".} -\item{add_parents}{Logical indicating whether to add parent information.} +\item{clean_names}{Logical indicating whether to clean name columns by removing trailing slashes and squishing whitespace.} -\item{skinny}{Logical indicating whether to slim down the data frame.} +\item{skinny}{Logical. If `TRUE`, return a slimmer data frame by dropping +`FAMC`, `FAMS`, and columns that are entirely `NA` during post-processing.} \item{verbose}{Logical indicating whether to print progress messages.} } diff --git a/man/readGedcom.Rd b/man/readGedcom.Rd index 8005f773..63cd9079 100644 --- a/man/readGedcom.Rd +++ b/man/readGedcom.Rd @@ -9,39 +9,42 @@ readGedcom( file_path, verbose = FALSE, + post_process = TRUE, add_parents = TRUE, remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, parse_dates = FALSE, + clean_names = TRUE, update_rate = 1000, - post_process = TRUE, ... ) readGed( file_path, verbose = FALSE, + post_process = TRUE, add_parents = TRUE, remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, parse_dates = FALSE, + clean_names = TRUE, update_rate = 1000, - post_process = TRUE, ... ) readgedcom( file_path, verbose = FALSE, + post_process = TRUE, add_parents = TRUE, remove_empty_cols = TRUE, combine_cols = TRUE, skinny = FALSE, parse_dates = FALSE, + clean_names = TRUE, update_rate = 1000, - post_process = TRUE, ... ) } @@ -50,11 +53,13 @@ readgedcom( \item{verbose}{Logical. If `TRUE`, print progress messages.} +\item{post_process}{Logical. If `TRUE`, apply post-processing steps controlled +by `add_parents`, `combine_cols`, `remove_empty_cols`, `skinny`, and `parse_dates`.} + \item{add_parents}{Logical. If `TRUE`, infer `momID` and `dadID` from `FAMC` and `FAMS` mappings during post-processing.} -\item{remove_empty_cols}{Logical. If `TRUE`, drop columns that are entirely -`NA` during post-processing.} +\item{remove_empty_cols}{Logical indicating whether to remove columns that are entirely missing.} \item{combine_cols}{Logical. If `TRUE`, combine redundant name columns, such as `name_given` with `name_given_pieces` and `name_surn` with @@ -65,12 +70,11 @@ as `name_given` with `name_given_pieces` and `name_surn` with \item{parse_dates}{Logical. If `TRUE`, attempt to parse date columns (e.g., `birth_date`, `death_date`) into Date objects, after removing common GEDCOM date qualifiers like "ABT", "BEF", and "AFT".} +\item{clean_names}{Logical indicating whether to clean name columns by removing trailing slashes and squishing whitespace.} + \item{update_rate}{Numeric. Intended rate at which progress messages should be printed. Currently unused.} -\item{post_process}{Logical. If `TRUE`, apply post-processing steps controlled -by `add_parents`, `combine_cols`, `remove_empty_cols`, `skinny`, and `parse_dates`.} - \item{...}{Additional arguments. Currently unused.} } \value{ diff --git a/tests/testthat/test-readGedcom.R b/tests/testthat/test-readGedcom.R index 1eb3cf48..a8102611 100644 --- a/tests/testthat/test-readGedcom.R +++ b/tests/testthat/test-readGedcom.R @@ -80,14 +80,14 @@ test_that("readGedcom reads and parses a GEDCOM file correctly", { expect_equal(nrow(df), 2) expect_equal(df$name_given[1], "John") expect_equal(df$name_surn[1], "Doe") - expect_equal(df$name[1], "John Doe/") + expect_equal(df$name[1], "John Doe") expect_equal(df$sex[1], "M") expect_equal(df$birth_date[1], "1 JAN 1900") expect_equal(df$birth_place[1], "Someplace") expect_equal(df$attribute_children[1], NA_character_) expect_equal(df$name_given[2], "Jane") expect_equal(df$name_surn[2], "Smith") - expect_equal(df$name[2], "Jane Smith/") + expect_equal(df$name[2], "Jane Smith") expect_equal(df$sex[2], "F") expect_equal(df$birth_date[2], "2 FEB 1910") expect_equal(df$birth_place[2], "Anotherplace") @@ -213,7 +213,7 @@ test_that("readGedcom parses death event correctly", { temp_file <- tempfile(fileext = ".ged") writeLines(gedcom_content, temp_file) - df <- readGedcom(temp_file, verbose = TRUE) + df <- readGedcom(temp_file, verbose = TRUE, clean_names = FALSE) expect_true("death_date" %in% colnames(df)) expect_true("death_place" %in% colnames(df)) @@ -354,7 +354,7 @@ test_that("readGedcom parses all supported name component tags", { expect_equal(df$name_nick[1], "Jack") expect_equal(df$name_surn_pieces[1], "Doe") expect_equal(df$name_nsfx[1], "Jr.") - expect_equal(df$name_marriedsurn[1], "John Quincy /MarriedDoe/") + expect_equal(df$name_marriedsurn[1], "John Quincy /MarriedDoe") unlink(temp_file) }) diff --git a/tests/testthat/test-readGedcomlegacy.R b/tests/testthat/test-readGedcomlegacy.R index 70fe4251..24ee0fe1 100644 --- a/tests/testthat/test-readGedcomlegacy.R +++ b/tests/testthat/test-readGedcomlegacy.R @@ -14,7 +14,9 @@ test_that("readGedcom parses death event correctly for legacy", { temp_file <- tempfile(fileext = ".ged") writeLines(gedcom_content, temp_file) - df <- readGedcom(temp_file, verbose = TRUE) + df <- readGedcom(temp_file, verbose = TRUE, + clean_names = FALSE + ) df_leg <- .readGedcom.legacy(temp_file, verbose = TRUE) expect_true("death_date" %in% colnames(df_leg)) From 704417a492b2681739594e995bdae37bbe62a7c3 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:54:55 -0400 Subject: [PATCH 10/10] Update helpReadGedcom.R --- R/helpReadGedcom.R | 4 ++-- man/gedcomLatToNumeric.Rd | 2 +- man/gedcomLonToNumeric.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/helpReadGedcom.R b/R/helpReadGedcom.R index 0a128f33..ac6efd49 100644 --- a/R/helpReadGedcom.R +++ b/R/helpReadGedcom.R @@ -73,7 +73,7 @@ detectGedcomVersion <- function(lines) { #' @param x Character vector of GEDCOM latitude values. #' @return Numeric vector of decimal degrees (positive = N, negative = S). #' @examples -#' gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) +#' BGmisc:::gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) #' @keywords internal gedcomLatToNumeric <- function(x) { out <- rep(NA_real_, length(x)) @@ -90,7 +90,7 @@ gedcomLatToNumeric <- function(x) { #' @param x Character vector of GEDCOM longitude values. #' @return Numeric vector of decimal degrees (positive = E, negative = W). #' @examples -#' gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) +#' BGmisc:::gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) #' @keywords internal gedcomLonToNumeric <- function(x) { out <- rep(NA_real_, length(x)) diff --git a/man/gedcomLatToNumeric.Rd b/man/gedcomLatToNumeric.Rd index 07252bf2..b81ebced 100644 --- a/man/gedcomLatToNumeric.Rd +++ b/man/gedcomLatToNumeric.Rd @@ -17,6 +17,6 @@ Converts GEDCOM-style latitude strings like `"N51.5074"` or `"S33.8688"` to signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. } \examples{ -gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) +BGmisc:::gedcomLatToNumeric(c("N51.5074", "S33.8688", NA)) } \keyword{internal} diff --git a/man/gedcomLonToNumeric.Rd b/man/gedcomLonToNumeric.Rd index 37a92d4a..7914d7c0 100644 --- a/man/gedcomLonToNumeric.Rd +++ b/man/gedcomLonToNumeric.Rd @@ -17,6 +17,6 @@ Converts GEDCOM-style longitude strings like `"E151.2093"` or `"W0.1278"` to signed decimal degrees. Returns `NA` for `NA` or unrecognised-prefix input. } \examples{ -gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) +BGmisc:::gedcomLonToNumeric(c("E151.2093", "W0.1278", NA)) } \keyword{internal}