A short description of the post.
sampling distribution of the median
whats the uncertainty around the median
median(ll)
[1] 4
What about a variance.
how about around an R2
get that R2
List of 11
$ call : language lm(formula = y ~ fac, data = df)
$ terms :Classes 'terms', 'formula' language y ~ fac
.. ..- attr(*, "variables")= language list(y, fac)
.. ..- attr(*, "factors")= int [1:2, 1] 0 1
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:2] "y" "fac"
.. .. .. ..$ : chr "fac"
.. ..- attr(*, "term.labels")= chr "fac"
.. ..- attr(*, "order")= int 1
.. ..- attr(*, "intercept")= int 1
.. ..- attr(*, "response")= int 1
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. ..- attr(*, "predvars")= language list(y, fac)
.. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "factor"
.. .. ..- attr(*, "names")= chr [1:2] "y" "fac"
$ residuals : Named num [1:8] -1.07 -2.18 1.529 1.721 0.751 ...
..- attr(*, "names")= chr [1:8] "1" "2" "3" "4" ...
$ coefficients : num [1:2, 1:4] 8.556 1.684 0.731 1.033 11.71 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "(Intercept)" "fac2"
.. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"
$ aliased : Named logi [1:2] FALSE FALSE
..- attr(*, "names")= chr [1:2] "(Intercept)" "fac2"
$ sigma : num 1.46
$ df : int [1:3] 2 6 2
$ r.squared : num 0.307
$ adj.r.squared: num 0.191
$ fstatistic : Named num [1:3] 2.66 1 6
..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"
$ cov.unscaled : num [1:2, 1:2] 0.25 -0.25 -0.25 0.5
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:2] "(Intercept)" "fac2"
.. ..$ : chr [1:2] "(Intercept)" "fac2"
- attr(*, "class")= chr "summary.lm"
summary(lil_sum)$r.squared
[1] 0.3068527
r2_fr_data <- function(df){
lil_sum <- lm(y ~ fac, data = df)
summary(lil_sum)$r.squared
}
df[c(sample(1:4, replace = TRUE),
sample(5:8, replace = TRUE)),]
fac y
2 1 6.376472
2.1 1 6.376472
1 1 7.485775
2.2 1 6.376472
6 2 10.646681
8 2 9.333165
8.1 2 9.333165
5 2 10.991705
replcate it
many_repped_models <- replicate(500, {
df[c(sample(1:4, replace = TRUE),
sample(5:8, replace = TRUE)),] |>
r2_fr_data()
})
hist(many_repped_models)
abline(v = summary(lil_sum)$r.squared, lty = 2, col = "darkblue")
mean(many_repped_models)
[1] 0.3492681
median(many_repped_models)
[1] 0.3247549
summary(lil_sum)$r.squared
[1] 0.3068527
bootstrap the factor levels
ids <- with(dune.env, split(seq_along(Management), Management)) |>
sapply(sample, replace = TRUE) |>
c(recursive = TRUE, use.names = FALSE)
ids
[1] 10 2 11 6 8 8 9 6 18 17 20 19 14 18 16 13 13 12 3 1
adon_demo <- adonis2(dune[ids,] ~ Management, data = dune.env[ids,])
adon_demo$R2[1]
[1] 0.3194172
put these together into a loop
hmmm! they are mostly higher! how many are not
sum(boot_adonis < sample_r2)
[1] 16
how about a “rarefied” sample, where only 1 row per group is found
boot_adonis <- replicate(500, {
ids <- with(dune.env, split(seq_along(Management), Management)) |>
sapply(sample, size = 1, replace = TRUE) |>
c(recursive = TRUE, use.names = FALSE)
adon_demo <- adonis2(dune[ids,] ~ Management, data = dune.env[ids,])
adon_demo$R2[1]
})
hist(boot_adonis)
abline(v = sample_r2)