Bootstrap how do

A short description of the post.

Andrew and Salix true
2022-07-20

sampling distribution of the median

ll <- rpois(15, 4) # lnorm(7, log(6), 1)

hist(ll)

whats the uncertainty around the median

median(ll)
[1] 4
plot(density(replicate(5000, median(sample(ll, 7, replace = TRUE)))))

abline(v = median(ll), col = "pink")

What about a variance.

boot_my_fun <- function(fn, ll){
  
  plot(density(replicate(5000, fn(sample(ll, 7, replace = TRUE)))))
  
  abline(v = fn(ll), col = "pink")
}


boot_my_fun(var, ll)

how about around an R2

means <- c(7,11)

df <- data.frame(fac = gl(2, 4))

df$y <- rnorm(nrow(df), mean = means[df$fac], sd = 2)

with(df, plot(fac, y))

get that R2

lil_sum <- lm(y ~ fac, data = df)
summary(lil_sum) |> str()
List of 11
 $ call         : language lm(formula = y ~ fac, data = df)
 $ terms        :Classes 'terms', 'formula'  language y ~ fac
  .. ..- attr(*, "variables")= language list(y, fac)
  .. ..- attr(*, "factors")= int [1:2, 1] 0 1
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:2] "y" "fac"
  .. .. .. ..$ : chr "fac"
  .. ..- attr(*, "term.labels")= chr "fac"
  .. ..- attr(*, "order")= int 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. ..- attr(*, "predvars")= language list(y, fac)
  .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
  .. .. ..- attr(*, "names")= chr [1:2] "y" "fac"
 $ residuals    : Named num [1:8] -1.07 -2.18 1.529 1.721 0.751 ...
  ..- attr(*, "names")= chr [1:8] "1" "2" "3" "4" ...
 $ coefficients : num [1:2, 1:4] 8.556 1.684 0.731 1.033 11.71 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:2] "(Intercept)" "fac2"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"
 $ aliased      : Named logi [1:2] FALSE FALSE
  ..- attr(*, "names")= chr [1:2] "(Intercept)" "fac2"
 $ sigma        : num 1.46
 $ df           : int [1:3] 2 6 2
 $ r.squared    : num 0.307
 $ adj.r.squared: num 0.191
 $ fstatistic   : Named num [1:3] 2.66 1 6
  ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"
 $ cov.unscaled : num [1:2, 1:2] 0.25 -0.25 -0.25 0.5
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:2] "(Intercept)" "fac2"
  .. ..$ : chr [1:2] "(Intercept)" "fac2"
 - attr(*, "class")= chr "summary.lm"
summary(lil_sum)$r.squared
[1] 0.3068527
r2_fr_data <- function(df){
  
  lil_sum <- lm(y ~ fac, data = df)
  summary(lil_sum)$r.squared

}

df[c(sample(1:4, replace = TRUE), 
     sample(5:8, replace = TRUE)),]
    fac         y
2     1  6.376472
2.1   1  6.376472
1     1  7.485775
2.2   1  6.376472
6     2 10.646681
8     2  9.333165
8.1   2  9.333165
5     2 10.991705

replcate it

many_repped_models <- replicate(500, {
  df[c(sample(1:4, replace = TRUE), 
       sample(5:8, replace = TRUE)),] |> 
    r2_fr_data()
})  

hist(many_repped_models)

abline(v = summary(lil_sum)$r.squared, lty = 2, col = "darkblue")
mean(many_repped_models)
[1] 0.3492681
median(many_repped_models)
[1] 0.3247549
summary(lil_sum)$r.squared
[1] 0.3068527
library(vegan)
# ?adonis
data(dune)
data(dune.env)
## default test by terms
adon_demo <- adonis2(dune ~ Management, data = dune.env)

sample_r2 <- adon_demo$R2[1]

bootstrap the factor levels

ids <- with(dune.env, split(seq_along(Management), Management)) |> 
  sapply(sample, replace = TRUE) |> 
  c(recursive = TRUE, use.names = FALSE)
ids
 [1] 10  2 11  6  8  8  9  6 18 17 20 19 14 18 16 13 13 12  3  1
adon_demo <- adonis2(dune[ids,] ~ Management, data = dune.env[ids,])
adon_demo$R2[1]
[1] 0.3194172

put these together into a loop

boot_adonis <- replicate(500, {
  ids <- with(dune.env, split(seq_along(Management), Management)) |> 
    sapply(sample, replace = TRUE) |> 
    c(recursive = TRUE, use.names = FALSE)
  
  adon_demo <- adonis2(dune[ids,] ~ Management, data = dune.env[ids,])
  adon_demo$R2[1]
})

hist(boot_adonis)

abline(v = sample_r2)

hmmm! they are mostly higher! how many are not

sum(boot_adonis < sample_r2)
[1] 16

how about a “rarefied” sample, where only 1 row per group is found

boot_adonis <- replicate(500, {
  ids <- with(dune.env, split(seq_along(Management), Management)) |> 
    sapply(sample, size  = 1, replace = TRUE) |> 
    c(recursive = TRUE, use.names = FALSE)
  
  adon_demo <- adonis2(dune[ids,] ~ Management, data = dune.env[ids,])
  adon_demo$R2[1]
})

hist(boot_adonis)

abline(v = sample_r2)