I have the following situation:
A variable moves +1 with prob=0.5 and -1 with prob=-0.5 ... if this Markov Chain starts at position=5
- Situation 1: What is the expected time at which the variable will first reach position = 0?
- Situation 2: What is the expected time at which the variable will first reach position= 0 AFTER having gone to position=10 at least once?
I tried to do this as follows (I created a tracking variable to see where the simulation gets stuck):
# the simulations can take a long time to run, I interrupted them
library(ggplot2)
library(gridExtra)
n_sims <- 100
times_to_end_0 <- numeric(n_sims)
times_to_end_0_after_10 <- numeric(n_sims)
paths_0 <- vector("list", n_sims)
paths_0_after_10 <- vector("list", n_sims)
for (i in 1:n_sims) {
print(paste("Running simulation", i, "for situation 1..."))
y <- 5
time <- 0
path_0 <- c(y)
while(y > 0) {
step <- sample(c(-1, 1), 1, prob = c(0.5, 0.5))
y <- y + step
path_0 <- c(path_0, y)
time <- time + 1
if (y == 0) {
times_to_end_0[i] <- time
paths_0[[i]] <- data.frame(time = 1:length(path_0), y = path_0, sim = i)
break
}
}
print(paste("Running simulation", i, "for situation 2..."))
y <- 5
time <- 0
reached_10 <- FALSE
path_0_after_10 <- c(y)
while(y > 0 || !reached_10) {
step <- sample(c(-1, 1), 1, prob = c(0.5, 0.5))
y <- y + step
path_0_after_10 <- c(path_0_after_10, y)
time <- time + 1
if (y == 10) {
reached_10 <- TRUE
}
if (y == 0 && reached_10) {
times_to_end_0_after_10[i] <- time
paths_0_after_10[[i]] <- data.frame(time = 1:length(path_0_after_10), y = path_0_after_10, sim = i)
break
}
}
}
df1 <- data.frame(time = times_to_end_0)
df2 <- data.frame(time = times_to_end_0_after_10[times_to_end_0_after_10 > 0])
mean1 <- mean(log(df1$time))
mean2 <- mean(log(df2$time))
p1 <- ggplot(df1, aes(x = log(time))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean1)), color = "red", linetype = "dotted") +
labs(title = paste("Density of Times to Reach 0 - Mean Time:", round(exp(mean1), 2)), x = "Time", y = "Density") + theme_bw()
p2 <- ggplot(df2, aes(x = log(time))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean2)), color = "red", linetype = "dotted") +
labs(title = paste("Density of Times to Reach 0 After Reaching 10 - Mean Time:", round(exp(mean2), 2)), x = "Time", y = "Density") + theme_bw()
plot_df_0 <- do.call(rbind, paths_0)
p3 <- ggplot(plot_df_0, aes(x = log(time), y = y, group = sim)) +
geom_line() +
labs(title = "Paths of First Simulation", x = "Time", y = "Y") +
theme_bw()
plot_df_0_after_10 <- do.call(rbind, paths_0_after_10)
p4 <- ggplot(plot_df_0_after_10, aes(x = log(time), y = y, group = sim)) +
geom_line() +
labs(title = "Paths of Second Simulation", x = "Time", y = "Y") +
theme_bw()
grid.arrange(p1, p2, p3, p4, ncol = 2)
My Question: Is there anything I can do to improve the efficiency of this simulation?
Thanks!