Here is a base R option, using lapply
+ cbind
cbind(
dat[1],
do.call(
cbind,
lapply(
seq_along(dat)[-1],
\(k) {
d <- dat[k]
nms <- paste0(gsub("\\s+\\(.*$", "", names(d)), c("_val", "_unit"))
setNames(cbind(d, gsub(".*\\((.*)\\)", "\\1", names(d))), nms)
}
)
)
)
which gives
SUBJID Dose_val Dose_unit Tmax_val Tmax_unit
1 2001 200 mg 5 h
2 2002 200 mg 5 h
benchmark
Given list of solution
stefan <- function() {
dat |>
pivot_longer(-SUBJID,
names_to = "name_unit"
) |>
separate_wider_regex(name_unit,
patterns = c(name = "^.*?", " \\(", unit = ".*", "\\)")
) |>
pivot_wider(
names_from = name,
values_from = c(unit, value),
names_glue = "{name}_{.value}",
names_vary = "slowest"
)
}
issac <- function() {
dat |>
mutate(
Dose_value = sub("\\D", "", `Dose (mg)`),
Dose_unit = "mg",
Tmax_value = sub("\\D", "", `Tmax (h)`),
Tmax_unit = "h"
) |>
select(SUBJID, Dose_value, Dose_unit, Tmax_value, Tmax_unit)
}
mael <- function() {
dat |>
mutate(across(-SUBJID, \(x) str_extract(cur_column(), "(?<=\\().+?(?=\\))"),
.names = "{gsub(' .*', '', .col)}_unit"
)) |>
rename_with(.fn = \(x) word(x, 1), .col = matches("\\(")) %>%
relocate(SUBJID, order(colnames(.)))
}
tic <- function() {
cbind(
dat[1],
do.call(
cbind,
lapply(
seq_along(dat)[-1],
\(k) {
d <- dat[k]
nms <- paste0(gsub("\\s+\\(.*$", "", names(d)), c("_val", "_unit"))
setNames(cbind(d, gsub(".*\\((.*)\\)", "\\1", names(d))), nms)
}
)
)
)
}
and the benchmarking template
dat <- data.frame(
SUBJID = c(2001L, 2002L),
`Dose (mg)` = c(200L, 200L),
`Tmax (h)` = c(5L, 5L),
check.names = FALSE
)
microbenchmark(
stefan = stefan(),
issac = issac(),
mael = mael(),
tic = tic(),
unit = "relative"
)
we see that
Unit: relative
expr min lq mean median uq max neval
stefan 40.50826 31.763595 28.900405 32.131738 32.396251 4.452021 100
issac 8.80123 7.150539 6.919996 7.108832 7.719263 1.362840 100
mael 12.34542 10.347826 10.030679 10.533903 11.225731 2.257146 100
tic 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000 100