biglmm.Rmd
library(biglmmz)
#> Loading required package: bigstatsr
The linear mixed model (LMM) is expected to recover the true heritability of 0.8 given the sample size of 1,500 individuals and 200 causal genetic variants.
N <- 1500; M <- 200; h2 <- 0.8
Zg <- sapply(1:M, function(i) rbinom(N, 2, 0.5)) # allele freq. = 0.5
col_means <- colMeans(Zg, na.rm = TRUE)
col_freq <- col_means / 2 # col_means = 2 * col_freq
col_sd <- sqrt(2 * col_freq * (1 - col_freq))
Z <- sweep(Zg, 2, col_means, "-")
Z <- sweep(Z, 2, col_sd , "/")
b <- rnorm(M, 0, sqrt(h2/M))
y <- Z %*% b + rnorm(N, 0, sqrt(1 - h2))
# biglmmg accepts genotypes in the FBM format
G <- as_FBM(Zg)
mod <- biglmmg(y, G = G)
mod$gamma
#> [1] 0.808142
# varianace components = s2 * c(h2, 1 - h2)
h2 <- mod$gamma
s2 <- mod$s2
ess(G, h2 = h2, s2 = s2)
#> N M h2 s2 mult ESS
#> 1 1500 200 0.808142 1.041866 4.359118 6538.677