I can never resist a timing test, so...
Benchmark
> foo = paste0(sample(c('M', 'G','R','Y','S','G','T','K'),1e3,replace=TRUE),collapse='')
> library(microbenchmark)
> microbenchmark(tomcod(foo),friede(foo),samr(foo),tmfnk(foo), lotus(foo))
Unit: microseconds
expr min lq mean median uq max neval
tomcod(foo) 39.442 44.3620 49.82894 48.380 53.3000 78.515 100
friede(foo) 98.031 114.1030 119.77289 118.203 123.2665 193.807 100
samr(foo) 6497.598 6819.5710 7866.11609 7001.836 8198.3190 44381.229 100
tmfnk(foo) 3461.958 3582.1700 3749.58981 3651.604 3716.8755 5675.999 100
lotus(foo) 235.176 253.2365 276.51466 270.600 290.1365 458.011 100
EDIT TWO friede submitted modded code.
microbenchmark(tomcod(foo),friede(foo),friede2(foo),lotus(foo),samr(foo),tmfnk(foo))
Unit: microseconds
expr min lq mean median uq max neval cld
tomcod(foo) 65.941 73.6810 82.22718 80.0645 88.2710 122.788 100 a
friede(foo) 178.945 184.1595 192.02320 189.6940 196.6435 224.228 100 a
friede2(foo) 178.467 184.9555 193.34795 190.8725 200.9290 222.221 100 a
lotus(foo) 461.368 480.9220 535.46494 501.1280 523.5945 3717.288 100 a
samr(foo) 14625.145 14917.4965 19662.97214 15052.5340 15470.6325 161749.196 100 b
tmfnk(foo) 5746.362 5845.9695 7358.02076 5883.5455 5949.1420 150083.881 100 c
code used
tomcod <- function(s, keep = "G") {
sint <- utf8ToInt(s)
idx <- sint != utf8ToInt(keep)
intToUtf8(replace(sint, which(idx), sample(sint[idx])))
}
friede <- function(s, keep = 'G'){
x = strsplit(s, "")[[1L]]
x[i] = sample(x[i <- x != "G"])
paste(x, collapse = "")
}
friede2 = \(sequence) {
x = strsplit(sequence, "")[[1L]]
x[i] = sample(x[i <- x != "G"])
paste(x, collapse = "")
}
lotus <- function(s){
g <- gregexpr("[^G]", s)
m <- regmatches(s, g)
regmatches(s, g) <- lapply(m, sample)
return(s)
}
samr <- function(str, hold_letter = "G") {
x <- unlist(strsplit(str, ""))
swap <- iter(sample(x[x != hold_letter]))
lapply(
x,
\(
letter,
s = swap,
hl = hold_letter
) if (letter == hl) letter else nextElem(s)
) |>
paste(collapse = "")
}
tmfnk <- function(input, target_letter ='G') {
non_target_positions <- grepRaw(paste(setdiff(LETTERS, target_letter), collapse = "|"), input, all = TRUE)
non_target_letters <- substring(input, non_target_positions, non_target_positions)
sampled_non_target_letters <- sample(non_target_letters, length(non_target_letters))
for(i in seq_along(non_target_positions)) {
substring(input, non_target_positions[i], non_target_positions[i]) <- sampled_non_target_letters[i]
}
return(input)
}