how to fit and visualize small multiples
substrate <- c("soil", "onion", "radish", "broccoli", "soy", "corn")
genotypes <- c("H", "N")
library(tidyverse)
treatment <- expand_grid(substrate, genotypes)
fly_ids <- paste0("fly", 1:(30*12))
# assign flies to treatments
trt_id <- rep(1:12, times = 30)
design_matrix <- tibble(fly_ids, trt_id) |>
rowwise() |>
mutate(trt = list(treatment[trt_id,])) |>
unnest("trt")
obs_flies <- design_matrix |>
rowwise() |>
mutate(time = list(c("t1", "t2", "t3"))) |>
unnest(time) |>
mutate(obs = rnorm(length(fly_ids), mean = 40, sd = sqrt(40)))
# faster simpler
expand_grid(genotype = c("H", "N"),
substrate = c("control", "onion", "radish", "broco", "soy", "corn"),
flies = 1:30,
time = c("4", "24", "48")) %>%
group_by(genotype, substrate) %>%
mutate(fly_id = cur_group_id()) %>%
ungroup()
# A tibble: 1,080 x 5
genotype substrate flies time fly_id
<chr> <chr> <int> <chr> <int>
1 H control 1 4 2
2 H control 1 24 2
3 H control 1 48 2
4 H control 2 4 2
5 H control 2 24 2
6 H control 2 48 2
7 H control 3 4 2
8 H control 3 24 2
9 H control 3 48 2
10 H control 4 4 2
# ... with 1,070 more rows
Fit many small models to each one!
fly_model_df <- obs_flies |>
mutate(time = factor(time, levels = c("t3", "t1", "t2"))) |>
nest_by(substrate, genotypes, .key = "flydata")
fly_model_coefs <- fly_model_df |>
# fit models
mutate(model = list(lm(obs ~ time, data = flydata))) |>
# mutate(coef = list(coef(model))) # there's a cooler way!
mutate(tb = list(broom::tidy(model))) |>
unnest(cols = c("tb"))
fly_model_coefs |>
filter(term == "(Intercept)") |>
ggplot(aes(y = estimate, x = substrate, col = genotypes, ymin = estimate - std.error, ymax = estimate + std.error)) +
geom_pointrange(position = position_dodge(width = .1))
could do it again for another kind of preference: the contrast between t1 and t3
fly_model_coefs |>
filter(term == "timet1") |>
ggplot(aes(y = estimate, x = substrate, col = genotypes,
ymin = estimate - std.error, ymax = estimate + std.error)) +
geom_pointrange(position = position_dodge(width = .1))
what about for a zero-inflated model?
library(glmmTMB)
obs_flies_zi <- obs_flies |>
mutate(time = factor(time, levels = c("t3", "t1", "t2")),
obs = if_else(runif(1080)<.1, true = 0, false = obs))
obs_flies_zi |>
nest_by(substrate, genotypes, .key = "flydata") |>
mutate(zi_mod = list(glmmTMB(obs ~ time, ziformula = ~time, data = flydata)))
# A tibble: 12 x 4
# Rowwise: substrate, genotypes
substrate genotypes flydata zi_mod
<chr> <chr> <list<tibble[,4]>> <list>
1 broccoli H [90 x 4] <glmmTMB>
2 broccoli N [90 x 4] <glmmTMB>
3 corn H [90 x 4] <glmmTMB>
4 corn N [90 x 4] <glmmTMB>
5 onion H [90 x 4] <glmmTMB>
6 onion N [90 x 4] <glmmTMB>
7 radish H [90 x 4] <glmmTMB>
8 radish N [90 x 4] <glmmTMB>
9 soil H [90 x 4] <glmmTMB>
10 soil N [90 x 4] <glmmTMB>
11 soy H [90 x 4] <glmmTMB>
12 soy N [90 x 4] <glmmTMB>
From easystats documentation
library(parameters)
model <- glmmTMB(
count ~ spp + mined + (1 | site),
ziformula = ~mined,
family = poisson(),
data = Salamanders
)
simulate_parameters(model, centrality = "mean")
# Fixed Effects
Parameter | Coefficient | 95% CI | p
---------------------------------------------------
(Intercept) | -0.36 | [-0.89, 0.21] | 0.222
sppPR | -1.27 | [-1.72, -0.80] | < .001
sppDM | 0.27 | [ 0.00, 0.53] | 0.054
sppEC-A | -0.56 | [-0.98, -0.17] | 0.002
sppEC-L | 0.67 | [ 0.42, 0.91] | < .001
sppDES-L | 0.63 | [ 0.38, 0.86] | < .001
sppDF | 0.12 | [-0.16, 0.39] | 0.438
minedno | 1.26 | [ 0.75, 1.79] | < .001
# Zero-Inflated
Parameter | Coefficient | 95% CI | p
------------------------------------------------------
(Intercept)_zi | 0.79 | [ 0.28, 1.30] | 0.008
minedno_zi | -1.85 | [-2.43, -1.27] | < .001
simulate_parameters(model, ci = c(.8, .95), component = "zero_inflated")
# Fixed Effects
Parameter | Coefficient | 80% CI | 95% CI | p
--------------------------------------------------------------------
(Intercept) | 0.80 | [ 0.44, 1.14] | [ 0.24, 1.33] | 0.006
minedno | -1.85 | [-2.25, -1.45] | [-2.51, -1.20] | < .001
one_sample <- obs_flies_zi |>
nest(timeobs = c(time, obs)) |>
# group by experimental factors
group_by(substrate, genotypes) |>
sample_frac(size = 1, replace = TRUE)
one_sample |>
filter(trt_id == 1) |>
arrange(fly_ids)
# A tibble: 30 x 5
# Groups: substrate, genotypes [1]
fly_ids trt_id substrate genotypes timeobs
<chr> <int> <chr> <chr> <list>
1 fly109 1 soil H <tibble [3 x 2]>
2 fly109 1 soil H <tibble [3 x 2]>
3 fly121 1 soil H <tibble [3 x 2]>
4 fly13 1 soil H <tibble [3 x 2]>
5 fly133 1 soil H <tibble [3 x 2]>
6 fly145 1 soil H <tibble [3 x 2]>
7 fly145 1 soil H <tibble [3 x 2]>
8 fly169 1 soil H <tibble [3 x 2]>
9 fly193 1 soil H <tibble [3 x 2]>
10 fly205 1 soil H <tibble [3 x 2]>
# ... with 20 more rows
# A tibble: 1,080 x 6
fly_ids trt_id substrate genotypes time obs
<chr> <int> <chr> <chr> <fct> <dbl>
1 fly271 7 broccoli H t1 39.9
2 fly271 7 broccoli H t2 33.2
3 fly271 7 broccoli H t3 27.4
4 fly283 7 broccoli H t1 41.8
5 fly283 7 broccoli H t2 41.3
6 fly283 7 broccoli H t3 50.9
7 fly7 7 broccoli H t1 42.8
8 fly7 7 broccoli H t2 34.6
9 fly7 7 broccoli H t3 31.2
10 fly187 7 broccoli H t1 41.0
# ... with 1,070 more rows
that’s one rep, let’s do a lot!
now just use map
to apply a function to each of
these
model_list <- boot_list |>
map(~ glmmTMB(obs ~ time, ziformula = ~time, data = .x))
# use bootstrapped models to predict on original dataset
model_predictions <- map(model_list,
predict,
type = "response",
newdata = obs_flies_zi)
# model_predictions |> map(head)
# rowMeans(do.call(cbind, model_predictions))
# get more information though!
boot_pred_df <- model_predictions |>
map_dfr(enframe, name = "rownum", value = "pred", .id = "boot_id")
summarize each of these
summarized_data <- boot_pred_df |>
nest_by(rownum) |>
mutate(med = median(data$pred),
quant_lo = quantile(data$pred, 0.025),
quant_hi = quantile(data$pred, 0.975)) |>
select(-data)
bind_cols(obs_flies_zi, summarized_data) |>
ggplot(aes(x = substrate, y = obs)) +
geom_count() +
facet_grid(genotypes ~ time) +
geom_count(aes(y = med, x = substrate), col = "orange") +
geom_pointrange(aes(y = med, ymin = quant_lo, ymax = quant_hi), col = "green")