finemapr
finemapr
finemapr
is an R package that provides an interface to fine-mapping tools:
By using finemapr
, your input files are automatically prepared for each tool, the analysis workflow is tool-independent; and exploration of fine-mapping results is powered by R in printing/plotting/data export.
# set up
options(finemapr_<tool> = "<path to fine-mapping tool>")
# read input files
my_zscores <- read_zscores("<my_scores.tab>")
my_ld <- read_ld("<my_ld.tab>")
# run analysis
out <- run_<tool>(my_zscores, my_ld, args = "<custom arguments>")
# explore results
print(out)
head(out$snp) # main table of results
plot(out)
# export results
write.table(out$snp, "<my_results.tab>")
The user needs to download and install a fine-mapping tool before the analysis. An example of installation commands used in finemapr
by default is given here.
After installing, for example, the FINEMAP tool, the user specify for finemapr
where the tool is located:
options(finemapr_finemap = "~/apps/finemap/finemap")
We load packages for the analysis conducted in this document.
library(devtools)
load_all("~/git/variani/finemapr")
#> Loading finemapr
library(magrittr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
theme_set(theme_linedraw())
We load example data copied from the FINEMAP website (http://www.christianbenner.com/). This simulated dataset has two causal variants rs15
and rs47
.
file1_z <- system.file("extdata/region1.z", package = "finemapr")
file1_ld <- system.file("extdata/region1.ld", package = "finemapr")
z1 <- read_zscore(file1_z)
ld1 <- read_ld(file1_ld, snps = z1$snp)
n1 <- 5363
Top 5 z-scores:
z1 %>% arrange(-abs(zscore)) %>% head(5) %>% kable(digits = 1)
snp | zscore |
---|---|
rs15 | 10.9 |
rs47 | 8.8 |
rs23 | 5.8 |
rs17 | -5.6 |
rs42 | -5.0 |
ggplot(z1, aes(zscore)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mutate(z1, pval = pchisq(zscore^2, df = 1, lower.tail = FALSE)) %>%
ggplot(aes(pval)) + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
options(finemapr_finemap = "~/apps/finemap/finemap")
out_finemap <- run_finemap(z1, ld1, n1, args = "--n-causal-max 3")
#> Parsed with column specification:
#> cols(
#> index = col_integer(),
#> snp = col_character(),
#> snp_prob = col_double(),
#> snp_log10bf = col_double()
#> )
#> Parsed with column specification:
#> cols(
#> rank = col_integer(),
#> config = col_character(),
#> config_prob = col_double(),
#> config_log10bf = col_double()
#> )
print(out_finemap)
#> - command: ~/apps/finemap/finemap --sss --log --n-causal-max 3 --in-files region.master
#> - see log output in `log`
#> - tables of results: `config`, `snp`, `ncausal`
#> - config:
#> # A tibble: 9,007 x 4
#> rank config config_prob config_log10bf
#> <int> <chr> <dbl> <dbl>
#> 1 1 rs15,rs47 0.607 42.7
#> 2 2 rs15,rs42,rs47 0.0326 43.1
#> 3 3 rs15,rs34,rs47 0.0222 42.9
#> # ... with 9,004 more rows
plot(out_finemap, label_size = 3, grid_ncol = 1)
options(finemapr_caviar = "~/apps/caviar/CAVIAR")
out_caviar <- run_caviar(z1, ld1, args = "-c 3")
#> Parsed with column specification:
#> cols(
#> SNP_ID = col_character(),
#> Prob_in_pCausalSet = col_double(),
#> Causal_Post._Prob. = col_double()
#> )
print(out_caviar)
#> - command: ~/apps/caviar/CAVIAR -c 3 -z region.z -l region.ld -o log
#> - tables of results: `snp`
#> - snp:
#> # A tibble: 50 x 4
#> rank snp snp_prob_set snp_prob
#> <int> <chr> <dbl> <dbl>
#> 1 1 rs15 0.439 1.00
#> 2 2 rs47 0.439 1.00
#> 3 3 rs42 0.0120 0.0274
#> # ... with 47 more rows
#> - 95%-causal set (ordered): rs15, rs47, rs42, rs34, rs20, rs5, rs27, rs45, rs38, rs17, rs25, rs18, rs11, rs19, rs44, rs40, rs8, rs24
plot(out_caviar, label_size = 3)
options(finemapr_paintor = "~/apps/paintor/PAINTOR")
out_paintor <- run_paintor(z1, ld1, n1, args = "-enumerate 3")
#> Parsed with column specification:
#> cols(
#> snp = col_character(),
#> zscore = col_double(),
#> Posterior_Prob = col_double()
#> )
print(out_paintor)
#> - command: ~/apps/paintor/PAINTOR -input region.master -Zhead zscore -LDname ld -annotations dummy_ones -enumerate 3 -num_samples 5363
#> - tables of results: `snp`
#> - snp:
#> # A tibble: 50 x 4
#> rank snp zscore snp_prob
#> <int> <chr> <dbl> <dbl>
#> 1 1 rs15 10.9 1.00
#> 2 2 rs47 8.80 1.00
#> 3 3 rs25 - 0.133 0.0200
#> # ... with 47 more rows
#> - annotations: dummy_ones
#> - logBF (proportional to the model likelihood): 100.7274
plot(out_paintor, label_size = 3)
All three fine-mapping tools estimated the poterior causal probabilities of the two variants, rs15
and rs47
, very close to 1.