Simulation of Ecological Data: The 4 parameter logistic

I want to make a simple and simulated version of Will’s logistic model.

thinking about treating L_o on the log scale – ie the log of a number between 0 and 1

curve(log(x))

\[ \begin{align} y &= L + \frac{1 - L}{1 + e^{-\beta}} \\ &= L + (1 - L) \times \frac{1}{1 + e^{-\beta}}\\ \text{log}(y) &= \text{log}\left(L + (1-L)\times \frac{1}{1 + e^{-\beta}}\right) \end{align} \]

Return the log mixture of the log densities lp1 and lp2 with mixing proportion theta, defined by \[\begin{eqnarray*} \mathrm{log\_mix}(\theta, \lambda_1, \lambda_2) & = & \log \!\left( \theta \exp(\lambda_1) + \left( 1 - \theta \right) \exp(\lambda_2) \right) \\[3pt] & = & \mathrm{log\_sum\_exp}\!\left(\log(\theta) + \lambda_1, \ \log(1 - \theta) + \lambda_2\right). \end{eqnarray*}\] Available since 2.6

log_mix_logistic <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/log_mix_logistic.stan"))

datlist <- list(n = 50,
                x = runif(50, min = -40, max = 40))

demo_samples <- log_mix_logistic$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.1 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.8 seconds.

Very VERY interesting note – if you do this in tranformed parameters it doesn’t work at all!

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y = exp(.value), group = .draw)) + geom_line()

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y = .value, group = .draw)) + geom_line()

log_sum_exp_logistic <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/log_sum_exp_logistic.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- log_sum_exp_logistic$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.2 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.7 seconds.

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y = exp(.value), group = .draw)) + geom_line()

could the expression be vectorized easily? does that matter?

vec_logistic <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/vec_logistic.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- vec_logistic$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.0 seconds.
Chain 4 finished in 0.1 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.6 seconds.

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y =.value, group = .draw)) + geom_line()

prior simulations in R

x <-  seq(from = -40, to = 40, length.out = 50)
L <- rnorm(1, -1, 1)
alpha <- exp(rnorm(1, -.8, 1))
y <- plogis(L) + (1 - plogis(L))*plogis(alpha*x)
plot(x, y, ylim = c(0, 1))

over_vectorized <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/over_vectorized.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- over_vectorized$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.2 seconds.
Chain 2 finished in 0.3 seconds.
Chain 3 finished in 0.3 seconds.
Chain 4 finished in 0.2 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.2 seconds.
Total execution time: 1.2 seconds.

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y =exp(.value), group = .draw)) + geom_line()

adding a maximum

Here I’m adding another parameter to control the (log) of the maximum:

vec_logistic_max <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/vec_logistic_max.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- vec_logistic_max$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.1 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.1 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.6 seconds.

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y =.value, group = .draw)) + geom_line()

That looks about right! We can take the same approach with the approach using log_sum_exp

log_sum_exp_logistic_max <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/log_sum_exp_logistic_max.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- log_sum_exp_logistic_max$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.1 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.1 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.5 seconds.

demo_draws <- gather_draws(demo_samples, y[i], ndraws = 50)

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y =exp(.value), group = .draw)) + geom_line()

simulate lognormal data

I’d like to try to tack on a logistic simulation to each of these, just to see!

log_sum_exp_ln_rng <- cmdstanr::cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/log_sum_exp_ln_rng.stan"))

datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50))

demo_samples <- log_sum_exp_ln_rng$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.1 seconds.
Chain 2 finished in 0.1 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.1 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.1 seconds.
Total execution time: 0.8 seconds.

get_variables(demo_samples)

  [1] "lp__"            "L"               "alpha"          
  [4] "U"               "log_sigma"       "log_mu[1]"      
  [7] "log_mu[2]"       "log_mu[3]"       "log_mu[4]"      
 [10] "log_mu[5]"       "log_mu[6]"       "log_mu[7]"      
 [13] "log_mu[8]"       "log_mu[9]"       "log_mu[10]"     
 [16] "log_mu[11]"      "log_mu[12]"      "log_mu[13]"     
 [19] "log_mu[14]"      "log_mu[15]"      "log_mu[16]"     
 [22] "log_mu[17]"      "log_mu[18]"      "log_mu[19]"     
 [25] "log_mu[20]"      "log_mu[21]"      "log_mu[22]"     
 [28] "log_mu[23]"      "log_mu[24]"      "log_mu[25]"     
 [31] "log_mu[26]"      "log_mu[27]"      "log_mu[28]"     
 [34] "log_mu[29]"      "log_mu[30]"      "log_mu[31]"     
 [37] "log_mu[32]"      "log_mu[33]"      "log_mu[34]"     
 [40] "log_mu[35]"      "log_mu[36]"      "log_mu[37]"     
 [43] "log_mu[38]"      "log_mu[39]"      "log_mu[40]"     
 [46] "log_mu[41]"      "log_mu[42]"      "log_mu[43]"     
 [49] "log_mu[44]"      "log_mu[45]"      "log_mu[46]"     
 [52] "log_mu[47]"      "log_mu[48]"      "log_mu[49]"     
 [55] "log_mu[50]"      "common_term[1]"  "common_term[2]" 
 [58] "common_term[3]"  "common_term[4]"  "common_term[5]" 
 [61] "common_term[6]"  "common_term[7]"  "common_term[8]" 
 [64] "common_term[9]"  "common_term[10]" "common_term[11]"
 [67] "common_term[12]" "common_term[13]" "common_term[14]"
 [70] "common_term[15]" "common_term[16]" "common_term[17]"
 [73] "common_term[18]" "common_term[19]" "common_term[20]"
 [76] "common_term[21]" "common_term[22]" "common_term[23]"
 [79] "common_term[24]" "common_term[25]" "common_term[26]"
 [82] "common_term[27]" "common_term[28]" "common_term[29]"
 [85] "common_term[30]" "common_term[31]" "common_term[32]"
 [88] "common_term[33]" "common_term[34]" "common_term[35]"
 [91] "common_term[36]" "common_term[37]" "common_term[38]"
 [94] "common_term[39]" "common_term[40]" "common_term[41]"
 [97] "common_term[42]" "common_term[43]" "common_term[44]"
[100] "common_term[45]" "common_term[46]" "common_term[47]"
[103] "common_term[48]" "common_term[49]" "common_term[50]"
[106] "y_obs[1]"        "y_obs[2]"        "y_obs[3]"       
[109] "y_obs[4]"        "y_obs[5]"        "y_obs[6]"       
[112] "y_obs[7]"        "y_obs[8]"        "y_obs[9]"       
[115] "y_obs[10]"       "y_obs[11]"       "y_obs[12]"      
[118] "y_obs[13]"       "y_obs[14]"       "y_obs[15]"      
[121] "y_obs[16]"       "y_obs[17]"       "y_obs[18]"      
[124] "y_obs[19]"       "y_obs[20]"       "y_obs[21]"      
[127] "y_obs[22]"       "y_obs[23]"       "y_obs[24]"      
[130] "y_obs[25]"       "y_obs[26]"       "y_obs[27]"      
[133] "y_obs[28]"       "y_obs[29]"       "y_obs[30]"      
[136] "y_obs[31]"       "y_obs[32]"       "y_obs[33]"      
[139] "y_obs[34]"       "y_obs[35]"       "y_obs[36]"      
[142] "y_obs[37]"       "y_obs[38]"       "y_obs[39]"      
[145] "y_obs[40]"       "y_obs[41]"       "y_obs[42]"      
[148] "y_obs[43]"       "y_obs[44]"       "y_obs[45]"      
[151] "y_obs[46]"       "y_obs[47]"       "y_obs[48]"      
[154] "y_obs[49]"       "y_obs[50]"       "treedepth__"    
[157] "divergent__"     "energy__"        "accept_stat__"  
[160] "stepsize__"      "n_leapfrog__"

demo_dots <- gather_draws(demo_samples, y_obs[i], ndraws = 5)

demo_dots |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, y = .value, group = .draw)) + geom_point() + 
  facet_wrap(~.draw)

could also draw the line ribbon

demo_draws <- gather_rvars(demo_samples, y_obs[i])

demo_draws |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, dist = .value)) + stat_dist_lineribbon()

Now try to fit it, why not!?

log_sum_exp_ln_model <- cmdstan_model(stan_file = here::here("_posts/2022-06-28-nonlinear-functions-for-growth/log_sum_exp_ln_model.stan"))


datlist <- list(n = 50,
                x = seq(from = -40, to = 40, length.out = 50),
                yy = demo_dots |> filter(.draw == max(.draw)) |> pull(.value)
                )
model_samples <- log_sum_exp_ln_model$sample(data = datlist, refresh = 0)

Running MCMC with 4 sequential chains...

Chain 1 finished in 0.8 seconds.
Chain 2 finished in 0.7 seconds.
Chain 3 finished in 0.7 seconds.
Chain 4 finished in 0.8 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.8 seconds.
Total execution time: 3.4 seconds.

model_samples

  variable  mean median   sd  mad    q5   q95 rhat ess_bulk ess_tail
 lp__       8.19   8.47 1.53 1.45  5.32 10.06 1.00     1289     2187
 L         -1.50  -1.44 0.41 0.34 -2.26 -0.98 1.01     1290      948
 alpha     -1.69  -1.86 0.72 0.49 -2.50 -0.08 1.00     1216     1340
 U          0.11   0.11 0.07 0.07 -0.01  0.23 1.00     1536     1916
 log_sigma -1.05  -1.05 0.12 0.12 -1.24 -0.83 1.00     2045     2177
 log_mu[1] -1.55  -1.53 0.22 0.21 -1.94 -1.23 1.00     1536     1542
 log_mu[2] -1.54  -1.52 0.21 0.21 -1.92 -1.23 1.00     1571     1623
 log_mu[3] -1.53  -1.52 0.20 0.20 -1.89 -1.23 1.00     1615     1798
 log_mu[4] -1.52  -1.51 0.19 0.19 -1.86 -1.23 1.00     1663     1930
 log_mu[5] -1.51  -1.50 0.19 0.19 -1.83 -1.23 1.00     1728     2100

 # showing 10 of 155 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

model_mean <- gather_rvars(model_samples, log_mu[i])

model_mean |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, dist = exp(.value))) + 
  stat_dist_lineribbon() + 
  geom_point(aes(x = x, y = yy), inherit.aes = FALSE,
             data = as.data.frame(datlist))

model_pred <- gather_rvars(model_samples, y_obs[i])

model_pred |> 
  ungroup() |> 
  mutate(x = datlist$x[i]) |> 
  ggplot(aes(x = x, dist = .value)) + 
  stat_dist_lineribbon() + 
  geom_point(aes(x = x, y = yy), inherit.aes = FALSE,
             data = as.data.frame(datlist), pch = 21, fill = "orange") + 
  scale_fill_brewer(palette = "Greens")