library(ggplot2)
library(tidyverse)
set.seed(0.0)
<- 2000
nbInd = 14
deltaYear = 5
obsError
# Generate individual trees with random size in mm
<- tibble(
sizeGrowth_dt tree_id = 1:nbInd,
size_real0 = rgamma(nbInd, 190^2/1e4, 190/1e4)
|>
) # each indv increment every year with a N(2.3, 2.2) (values from dataset);
# here from size_t to size_t+1 is the sum of X years growth
mutate(
size_real1 = size_real0 + replicate(n(), sum(rnorm(deltaYear, 2.3, 2.2))),
size_real2 = size_real1 + replicate(n(), sum(rnorm(deltaYear, 2.3, 2.2))),
size_real3 = size_real2 + replicate(n(), sum(rnorm(deltaYear, 2.3, 2.2))),
size_real4 = size_real3 + replicate(n(), sum(rnorm(deltaYear, 2.3, 2.2)))
|>
) pivot_longer(
cols = contains('size'),
names_to = 'timeStep',
values_to = 'size_real'
|>
) # each observation has an error of measurement
mutate(
size_random = rnorm(n(), mean = size_real, sd = obsError)
|>
) # calculate growth from real and random (observed) values
group_by(tree_id) |>
mutate(
growth_real = (size_real - lag(size_real, 1))/deltaYear,
growth_random = (size_random - lag(size_random, 1))/deltaYear
|>
) ungroup() |>
pivot_longer(
cols = contains(c('real', 'random')),
names_to = c('var', 'type'),
names_sep = '_'
|>
) # remove NA for first time step
filter(!is.na(value))
<- sizeGrowth_dt |>
p1 pivot_wider(
names_from = type,
values_from = value
|>
) ggplot(aes(x = real, y = random)) +
geom_point(size = .5) +
facet_wrap(~var, scales = 'free') +
geom_abline(intercept = 0, slope = 1)
<- sizeGrowth_dt |>
p2 ggplot(aes(value, color = type)) +
geom_density() +
facet_wrap(~var, scales = 'free')
library(patchwork)
+ p2 p1
The following is a simulation by Will Vieira which explores how process uncertainty and measurement error combine when we measure trees.
The model imagines a simple linear growth scenario: every year, individuals grow by a random amount \(g_i\). This growth increment is random and varies each year according to a normal distribution
\[ \begin{align} L_{\text{year}[i]} \sim N \\ \end{align} \]
\[ L = lo \]
Here is Will’s very clean and concise simulation code.
|>
sizeGrowth_dt filter(var == "growth") |>
ggplot(aes(x = value, colour = type)) +
geom_density() +
stat_function(fun = function(x) dnorm(x, mean = 2.3, sd = 2.2),
colour = "black") +
stat_function(fun = function(x) dnorm(x, mean = 2.3, sd = sqrt(deltaYear)/2.2),
colour = "black", lty = 2) +
stat_function(fun = function(x) dnorm(x, mean = 2.3,
sd = sqrt(deltaYear)/sqrt(2.2^2 + obsError^2)),
colour = "black", lty = 3)
<- 2.2
sd_g
|>
sizeGrowth_dt filter(var == "growth") |>
ggplot(aes(x = value, colour = type)) +
geom_density(lwd = 2) +
stat_function(fun = function(x) dnorm(x, mean = 2.3, sd = 2.2),
colour = "black", lty = 3) +
stat_function(fun = function(x) dnorm(x, mean = 2.3, sd = 2.2/sqrt(deltaYear)),
colour = "black", lty = 2) +
stat_function(fun = function(x) dnorm(x, mean = 2.3,
sd = sqrt(
^2 * deltaYear + 2*obsError^2)/deltaYear^2
(sd_g
)
),colour = "black", lty = 1) +
theme_bw()
but why are these correlated?
Distribution of a difference
<- 8
u1 <- 15
u2 <- 1.5
sdx <- 1e5
n <- rnorm(n, mean = u1, sd = sdx)
x1 <- rnorm(n, mean = u2, sd = sdx)
x2 tibble(diff = x2 - x1) |>
ggplot(aes(x = diff)) +
stat_function(fun = \(x) dnorm(x, mean = u2 - u1, sd = sqrt(2) * sdx),
size = 3, col = "darkgreen") +
geom_density(size = 1, col = "orange")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This is what happens when you have two constant values which are measured with error and then contrasted.
if another process adds to the variation, then the two variances add – THEN get scaled but sqrt(2)