Browse

Scripts of Guo, et al, Mendelian randomization analyses support causal relationships between brain imaging-derived phenotypes and risk of psychiatric disorders.

The pipeline has four components, including:
1.Data sets preparation
2.MR Analysis
   a)Selection of Significantly independent SNPs;
   b)Removing SNPs associated with confounders;
   c)Removing SNPs directly or in LD indirectly associated with outcome;
   d)Quality control of instrument variants;
   e)Two-sample MR Analyses
3.Sensitivity Analysis
   f)MR-PRESSO
   g)MR-Egger regression
   h)LOO
4. Statistical power calculation
   i)Power statistic in a binary outcome
   j)Power statistic in a continuous outcome

The following detailed information:

1.Data sets preparation
Require two GWAS summary-level data with variants minor allele frequency (MAF) > 0.01; Removing the palindromic SNPs; Removing the long-range LD in the genome. (i.e. exposure_dat_0; outcome_dat_0)

2.MR Analysis
a)Selection of conditional independent SNPs
Selection of significantly independent SNPs (SNPs are present within the outcome or highly correlated proxy SNP (r2>0.8))
Code:
plink --bfile ref_data --clump exposure_dat_0 --clump-field P --clump-p1 5e-8 --clump-p2 0.01 --clump-kb 1000 --clump-r2 0.001 --out exposure_clumped

b)Removing SNPs associated with confounders
Suggested use: PhenoScanner database, GWAS catalog, the latest GWAS statistics of confounding factors.

c)Removing SNPs directly or in LD indirectly associated with outcome
(Note: After removing the unsatisfactory SNPs, get the data: exposure_dat_1 outcome_dat_1)
Code (for LD calculation):
plink --bfile ref_data --r2 --ld-snp-list outcome_dat_0.p5e8.snp --ld-window-r2 0.8 --ld-window-kb 1000 --ld-window 99999 --out outcome_dat.plink

d)Quality control of instrument variants
Heterogeneity test: Cochran’s Q test and Rucker’s Q’ test;
F-statistics: greater than 10。
(Note: After removing the unsatisfactory SNPs, get the data: exposure_dat_2 outcome_dat_2)
Code:
Rscript RadialMR_v1.0_.R Input Output
Input: exposure_dat_1 outcome_dat_1
Output: exp-out.ivw.RadialMR exp-out.egger.RadialMR exp-out.outliers

Code (RadialMR_v1.0_.R):
Args<-commandArgs(TRUE)
input1<-Args[1] #exposure data
input2<-Args[2] #outcome data

output1<-Args[3] #output data IVW
output2<-Args[4] #output data MR-egger
output3<-Args[5] #output data outlier snp

library(RadialMR)

data1<-read.table(input1,header=T)
data2<-read.table(input2,header=T)

data3<-format_radial(data1$B,data2$B,data1$SE,data2$SE,data1$SNP)

res1<-ivw_radial(data3,0.05,1,0.0001)
res2<-egger_radial(data3,0.05,1)
d1<-res1$data
d2<-res2$data
d1<-as.data.frame(d1)
d2<-as.data.frame(d2)
write.table(d1,output1,quote=F,row.names=F,col.names=T,sep="\t")
write.table(d2,output2,quote=F,row.names=F,col.names=T,sep="\t")
a1<-cbind(res1$outliers[1])
a2<-cbind(res2$outliers[1])
a1<-as.matrix(a1)
a2<-as.matrix(a2)
a3<-rbind(a1,a2)
a3<-as.data.frame(a3)
a3<-unique(a3)
write.table(a3,output3,quote=F,row.names=F,col.names=F,sep="\t")

e)Two-sample MR Analyses
Code:
Rscript MR_twmr_v1.0_.R Input Output
Input: exposure_dat_2 outcome_dat_2
Output:
exp-out.weight_median exp-out.ivw exp-out.egger_slope exp-out.weight_mode exp-out.egger_intercept exp-out.egger_raps exp-out.wr

Code (MR_twmr_v1.0_.R):
Args <- commandArgs(TRUE)
input1 <- Args[1] #exposure data
input2 <- Args[2] #outcome data

output1 <- Args[3] #weight median
output2 <- Args[4] #ivw
output3 <- Args[5] #egger slope
output4 <- Args[6] #weight mode
output5 <- Args[7] #Robust adjusted profile score
output6 <- Args[8] #Wald Ratio

library(TwoSampleMR)

exp_dat <- read_exposure_data( filename = input1,sep = "\t",snp_col = "SNP",beta_col ="B",se_col = "SE",effect_allele_col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")

outcome_dat <- read_outcome_data(snps = exp_dat$SNP,filename = input2,sep = "\t",snp_col = "SNP",beta_col = "B",se_col = "SE",effect_allele_col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")

dat <- harmonise_data(exposure_dat = exp_dat,outcome_dat = outcome_dat,action=1)

tsmr1<-mr(dat, method_list=c("mr_weighted_median"))
tsmr2<-mr(dat, method_list=c("mr_ivw"))
tsmr5<-mr(dat, method_list=c("mr_raps"))
tsmr6<-mr(dat, method_list=c("mr_wald_ratio"))
tsmr3<-mr(dat, method_list=c("mr_egger_regression"))
tsmr4<-mr(dat, method_list=c("mr_weighted_mode"))
a1<-cbind(tsmr1$nsnp,tsmr1$b,(tsmr1$b-1.96*tsmr1$se),(tsmr1$b+1.96*tsmr1$se),tsmr1$pval) #weight median
a2<-cbind(tsmr2$nsnp,tsmr2$b,(tsmr2$b-1.96*tsmr2$se),(tsmr2$b+1.96*tsmr2$se),tsmr2$pval) #ivw
a5<-cbind(tsmr5$nsnp,tsmr5$b,(tsmr5$b-1.96*tsmr5$se),(tsmr5$b+1.96*tsmr5$se),tsmr5$pval) #raps
a6<-cbind(tsmr6$nsnp,tsmr6$b,(tsmr6$b-1.96*tsmr6$se),(tsmr6$b+1.96*tsmr6$se),tsmr6$pval) #WR
CI <- 0.95
lowerCI <- function(beta,df,SE){
return(beta - (qt((1-CI)/2, df, lower.tail = FALSE) * SE))
}
upperCI <- function(beta,df,SE){
return(beta + (qt((1-CI)/2, df, lower.tail = FALSE) * SE))
}
a3 <- cbind(tsmr3$nsnp,tsmr3$b, mapply(lowerCI, tsmr3$b, tsmr3$nsnp - 2, tsmr3$se), mapply(upperCI, tsmr3$b, tsmr3$nsnp - 2, tsmr3$se), tsmr3$pval) #egger slope
a4 <- cbind(tsmr4$nsnp,tsmr4$b, mapply(lowerCI, tsmr4$b, tsmr4$nsnp - 1, tsmr4$se), mapply(upperCI, tsmr4$b, tsmr4$nsnp - 1, tsmr4$se), tsmr4$pval) #weight mode
write.table(a1,output1,quote=F,row.names=F,col.names=F,sep="\t") #weight median
write.table(a2,output2,quote=F,row.names=F,col.names=F,sep="\t") #ivw
write.table(a5,output5,quote=F,row.names=F,col.names=F,sep="\t") #raps
write.table(a6,output6,quote=F,row.names=F,col.names=F,sep="\t") #wr
write.table(a3,output3,quote=F,row.names=F,col.names=F,sep="\t") #egger slope
write.table(a4,output4,quote=F,row.names=F,col.names=F,sep="\t") #weight mode

3.Sensitivity Analysis
f)MR-PRESSO
Code:
Rscript MR_PRESSO_v1.0_.R Input Output
Input: exposure_dat_2 outcome_dat_2
Output: exp-out.distortion_test exp-out.global_test exp-out.main_MR_results exp-out.eachsnp_outlier_test

Code (MR_PRESSO_v1.0_.R):
Args<-commandArgs(TRUE)
input1<-Args[1] #exposure data
input2<-Args[2] #outcome data
output1<-Args[3] #distortion_test
output2<-Args[4] #global_test
output3<-Args[5] #main_MR_results
output4<-Args[6] #eachsnp.outlier_test

library(MRPRESSO)
data1<-read.table(input1,header=T)
data2<-read.table(input2,header=T)

bzx<-data1$B
sebzx<-data1$SE
bzy<-data2$B
sebzy<-data2$SE
data3<-cbind(bzx,sebzx,bzy,sebzy)
data3<-as.data.frame(data3)

results<-mr_presso(BetaOutcome = "bzy", BetaExposure = "bzx", SdOutcome= "sebzy", SdExposure = "sebzx", OUTLIERtest = TRUE, DISTORTIONtest= TRUE, data = data3, NbDistribution = 10000, SignifThreshold = 0.05)

a1<-results$`MR-PRESSO results`$`Distortion Test` #testing of significant distortion in the causal estimates before and after outlier removal
a2<-results$`Main MR results`
a3<-results$`MR-PRESSO results`$`Global Test`$Pvalue #detection of horizontal pleiotropy
a4<-results$`MR-PRESSO results`$`Global Test`$RSSobs
a5<-results$`MR-PRESSO results`$`Outlier Test` #correction of horizontal pleiotropy via outlier removal
b2<-cbind(a4,a3)
a2A<-cbind(a2[2],a2[3],a2[4],a2[5],a2[6])
write.table(a1,output1,quote=F,row.names=F,col.names=F,sep="\t")
write.table(b2,output2,quote=F,row.names=F,col.names=F,sep="\t")
write.table(a2A,output3,quote=F,row.names=T,col.names=T,sep="\t")
write.table(a5,output4,quote=F,row.names=F,col.names=T,sep="\t")

g)MR-Egger regression
Code:
Rscript MR_egger-intercept_v1.0_.R Input Output
Input: exposure_dat_2 outcome_dat_2
Output: exp-out.egger_intercept

Code (MR_egger-intercept_v1.0_.R):
Args <- commandArgs(TRUE)
input1 <- Args[1] #exposure data
input2 <- Args[2] #outcome data

output1 <- Args[3] #egger intercept

library(TwoSampleMR)

exp_dat <- read_exposure_data( filename = input1,sep = "\t",snp_col = "SNP",beta_col = "B",se_col = "SE",effect_allele_col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")
outcome_dat <- read_outcome_data(snps = exp_dat$SNP,filename = input2,sep = "\t",snp_col = "SNP",beta_col = "B",se_col = "SE",effect_allele_col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")
dat <- harmonise_data(exposure_dat = exp_dat, outcome_dat = outcome_dat, action=1)

Inter <- mr_egger_regression(dat$beta.exposure, dat$beta.outcome, dat$se.exposure, dat$se.outcome)
out <- cbind(Inter$ b_i, Inter$ se_i, Inter$ pval_i) #egger intercept
write.table(out,output1,quote=F,row.names=F,col.names=F,sep="\t") #egger intercept

h)LOO
Code:
Rscript MR_egger-intercept_v1.0_.R Input Output
Input: exposure_dat_2 outcome_dat_2
Output: exp-out.egger_intercept

Code (MR_egger-intercept_v1.0_.R):
Args <- commandArgs(TRUE)
input1 <- Args[1] #exposure data
input2 <- Args[2] #outcome data

output1 <- Args[3] #LOO

library(TwoSampleMR)

exp_dat <- read_exposure_data( filename = input1,sep = "\t",snp_col = "SNP",beta_col = "B",se_col = "SE",effect_allele _col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")
outcome_dat <- read_outcome_data(snps = exp_dat$SNP,filename = input2,sep = "\t",snp_col = "SNP",beta_col = "B",se_col = "SE",effect_allele_col = "A1",other_allele_col = "A2",eaf_col = "MAF",pval_col = "P",samplesize_col = "N")
dat <- harmonise_data(exposure_dat = exp_dat,outcome_dat = outcome_dat)

loo <- mr_leaveoneout(dat)
b <- data.frame(loo$SNP,loo$b,loo$se,loo$p)
write.table(b,output1,quote=F,row.names=F,col.names=T,sep="\t") #leave one out results

4. Statistical power calculation
i)Power statistic in a binary outcome
Args<-commandArgs(TRUE)
input1<-Args[1] # A file include n (Sample size of outcome), pve (explained variance of genetic instruments on exposure), ratio (Case to Control Ratio of outcome), PSY (psychiatric disorder name)
output2<-Args[2] # Output file
data = read.table(input1,header=T,sep='\t',na.strings = "NA")

pow = qnorm(0.8) # MR test at the 80% power
sig1 = 0.05 # significance level (alpha)
sig2 = 4.49e-5

testFunc <- function(n,pve,ratio,PSY,sig){
if(PSY=="PTSD_MVP"){
return(((pow + qnorm(1-sig/2))/sqrt(n*pve)))
}else{
return( ((pow + qnorm(1-sig/2))/sqrt(n*pve*(ratio/(1+ratio))*(1/(1+ratio)))))}
}

data$b1 = mapply(testFunc, data$n, data$pve,data$ratio,data$PSY,sig1)
data$b2 = mapply(testFunc, data$n, data$pve,data$ratio,data$PSY,sig2)
write.table(data, output2, col.name=c("exposure","outcome","pve","N_outcome","ratio","beta_05","beta_4.49e5"), row.name=F, sep="\t")

j)Power statistic in a continuous outcome
Args<-commandArgs(TRUE)
input1<-Args[1] # A file include n (Sample size of outcome), pve (explained variance of genetic instruments on exposure)
output2<-Args[2] # Output file
data = read.table(input1,header=T,sep='\t',na.strings = "NA")

pow = qnorm(0.8) # MR test at the 80% power
sig1 = 0.05 # significance level (alpha)
sig2 = 4.49e-5

testFunc <- function(n,pve,sig){
return(((pow + qnorm(1-sig/2))/sqrt(n*pve)))
}

data$b1 = mapply(testFunc, data$n, data$pve, sig1)
data$b2 = mapply(testFunc, data$n, data$pve, sig2)

write.table(data, output2, col.name=c("exposure","outcome","pve","N_outcome","ratio","beta_05","beta_4.49e5"), row.name=F, sep="\t")

.