library(qtl) library(graph) library(qpgraph) library(Rmpi) library(parallel) library(doParallel) ## parameters controling the entire simulation experiment useMPIcluster <- TRUE ## set n.cores to an appropriate value below if this is set to TRUE n.cores <- detectCores() ## use all available cores (you might want to change this!!) if (useMPIcluster) ## number of cores to use in parallel in an MPI cluster n.cores <- 120 ## THIS NUMBER SHOULD MATCH THE SGE SCRIPT LINE #$ -pe ompi 120 n <- 100 ## number of individuals per cross data set p <- 100 ## number of genes rho <- 0.50 ## mean marginal correlation of the confounding factor to the genes a <- 2.5 ## eQTL additive effect map <- sim.map(len=100, ## genetic map consisting of one chromosome 100 cM long n.mar=10, ## with 10 markers equally spaced along the chromosome anchor.tel=FALSE, eq.spacing=TRUE) n.simxeffect <- 10 ## number of simulated eQTL models per effect size n.sim <- 100 ## number of simulated data sets per simulated eQTL model qOrders <- 0:50 ## conditioning orders to examine cl <- NULL if (useMPIcluster) { cl <- makeCluster(n.cores, type="MPI") clusterEvalQ(cl, library(qtl)) clusterEvalQ(cl, library(graph)) clusterEvalQ(cl, library(qpgraph)) clusterSetRNGStream(cl) clusterExport(cl, list("n", "p", "rho", "a", "map", "n.sim", "qOrders")) registerDoParallel(cl=cl) } else registerDoParallel(cores=n.cores) ## ## Confounding effect affects all genes ## ## simulate an eQTL network with one cis-eQTL and no gene-gene associations sim.eqtl <- reQTLcross(eQTLcrossParam(map=map, genes=p, cis=1L, networkParam=dRegularGraphParam(d=0)), rho=rho, a=a) ## we want to test for eQTL1->g2 and g1-g3 eqtl.g1 <- ciseQTL(sim.eqtl)$QTL ## take the eQTL for testing g1 <- ciseQTL(sim.eqtl)$gene ## take the gene with the eQTL g2 <- setdiff(paste0("g", 1:p), g1)[1] ## gene g2 will be the first other gene g3 <- setdiff(paste0("g", 1:p), c(g1, g2))[1] ## gene g3 will be the second other gene bd.g1 <- edges(sim.eqtl$model$g, g1)[[1]] ## take the gene boundary of each of bd.g2 <- edges(sim.eqtl$model$g, g2)[[1]] ## the previous three genes bd.g3 <- edges(sim.eqtl$model$g, g3)[[1]] ## add a confounding factor as a continuous variable (it'll be the p+1 gene) sim.eqtl <- addGenes(sim.eqtl, 1) gC <- paste0("g", p+1) ## the confounding factor affects all genes for (i in 1:p) sim.eqtl <- qpgraph:::addGeneAssociation(sim.eqtl, gC, paste0("g", i)) pvalsAll <- vector(mode="list", length=n.simxeffect) for (i in 1:n.simxeffect) { cat("i=", i, "\n") pvalsAll[[i]] <- vector(mode="list", length=4) pvalsAll[[i]][["ConfoundedQTL"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsAll[[i]][["ConfoundedGenes"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsAll[[i]][["AdjustedQTL"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsAll[[i]][["AdjustedGenes"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) ## simulate the parameters again according to the updated model sim.eqtl <- reQTLcross(sim.eqtl, rho=rho, a=a) if (useMPIcluster) clusterExport(cl, list("sim.eqtl", "g1", "g2", "g3", "gC", "bd.g1", "bd.g2", "bd.g3")) for (j in seq(along=qOrders)) { q <- qOrders[j] cat("q=", q, "\n") if (q == 0) ## q=0 no conditioning Qq <- Qg <- Qqa <- Qga <- NULL else { ## q > 0 conditioning on a larger random subset Qq <- c(bd.g2, sample(setdiff(paste0("g", 1:p), c(bd.g2, g2, gC)), size=q, replace=FALSE)) Qqa <- c(Qq, gC) Qg <- c(bd.g3, sample(setdiff(paste0("g", 1:p), c(bd.g3, g1, g3, gC)), size=q, replace=FALSE)) Qga <- c(Qg, gC) } if (useMPIcluster) clusterExport(cl, list("q", "Qq", "Qg")) res <- foreach (icount(n.sim), .combine=rbind) %dopar% { cross <- sim.cross(map, sim.eqtl, n.ind=n) pq <- qpCItest(cross, i=eqtl.g1, j=g2, Q=Qq)$p.value pqa <- qpCItest(cross, i=eqtl.g1, j=g2, Q=Qqa)$p.value pg <- qpCItest(cross, i=g1, j=g3, Q=Qg)$p.value pga <- qpCItest(cross, i=g1, j=g3, Q=Qga)$p.value c(confoundedQTL=as.vector(pq), confoundedGenes=as.vector(pg), adjustedQTL=as.vector(pqa), adjustedGenes=as.vector(pga)) } pvalsAll[[i]][["ConfoundedQTL"]][j, ] <- res[, "confoundedQTL"] pvalsAll[[i]][["ConfoundedGenes"]][j, ] <- res[, "confoundedGenes"] pvalsAll[[i]][["AdjustedQTL"]][j, ] <- res[, "adjustedQTL"] pvalsAll[[i]][["AdjustedGenes"]][j, ] <- res[, "adjustedGenes"] } } ## ## Confounding effect affects only tested genes ## sim.eqtl <- reQTLcross(eQTLcrossParam(map=map, genes=p, cis=1L, networkParam=dRegularGraphParam(d=0)), rho=rho, a=a) eqtl.g1 <- ciseQTL(sim.eqtl)$QTL g1 <- ciseQTL(sim.eqtl)$gene g2 <- setdiff(paste0("g", 1:p), g1)[1] g3 <- setdiff(paste0("g", 1:p), c(g1, g2))[1] bd.g1 <- edges(sim.eqtl$model$g, g1)[[1]] bd.g2 <- edges(sim.eqtl$model$g, g2)[[1]] bd.g3 <- edges(sim.eqtl$model$g, g3)[[1]] bd.g1 <- edges(sim.eqtl$model$g, g1)[[1]] bd.g2 <- edges(sim.eqtl$model$g, g2)[[1]] bd.g3 <- edges(sim.eqtl$model$g, g3)[[1]] ## add a confounding factor as a continuous variable sim.eqtl <- qpgraph:::addGenes(sim.eqtl, 1) gC <- paste0("g", p+1) ## afecting only the tested genes sim.eqtl <- qpgraph:::addGeneAssociation(sim.eqtl, gC, g1) sim.eqtl <- qpgraph:::addGeneAssociation(sim.eqtl, gC, g2) sim.eqtl <- qpgraph:::addGeneAssociation(sim.eqtl, gC, g3) pvalsTested <- vector(mode="list", length=n.simxeffect) for (i in 1:n.simxeffect) { cat("i=", i, "\n") pvalsTested[[i]] <- vector(mode="list", length=4) pvalsTested[[i]][["ConfoundedQTL"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsTested[[i]][["ConfoundedGenes"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsTested[[i]][["AdjustedQTL"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) pvalsTested[[i]][["AdjustedGenes"]] <- matrix(NA, nrow=length(qOrders), ncol=n.sim) ## simulate the parameters again according to the updated model sim.eqtl <- reQTLcross(sim.eqtl, rho=rho, a=a) if (useMPIcluster) clusterExport(cl, list("sim.eqtl", "g1", "g2", "g3", "gC", "bd.g1", "bd.g2", "bd.g3")) for (j in seq(along=qOrders)) { q <- qOrders[j] cat("q=", q, "\n") if (q == 0) ## q=0 no conditioning Qq <- Qg <- Qqa <- Qga <- NULL else { ## q > 0 conditioning on a larger random subset Qq <- c(bd.g2, sample(setdiff(paste0("g", 1:p), c(bd.g2, g2, gC)), size=q, replace=FALSE)) Qqa <- c(Qq, gC) Qg <- c(bd.g3, sample(setdiff(paste0("g", 1:p), c(bd.g3, g1, g3, gC)), size=q, replace=FALSE)) Qga <- c(Qg, gC) } if (useMPIcluster) clusterExport(cl, list("q", "Qq", "Qg")) res <- foreach (icount(n.sim), .combine=rbind) %dopar% { cross <- sim.cross(map, sim.eqtl, n.ind=n) pq <- qpCItest(cross, i=eqtl.g1, j=g2, Q=Qq)$p.value pqa <- qpCItest(cross, i=eqtl.g1, j=g2, Q=Qqa)$p.value pg <- qpCItest(cross, i=g1, j=g3, Q=Qg)$p.value pga <- qpCItest(cross, i=g1, j=g3, Q=Qga)$p.value c(confoundedQTL=as.vector(pq), confoundedGenes=as.vector(pg), adjustedQTL=as.vector(pqa), adjustedGenes=as.vector(pga)) } pvalsTested[[i]][["ConfoundedQTL"]][j, ] <- res[, "confoundedQTL"] pvalsTested[[i]][["ConfoundedGenes"]][j, ] <- res[, "confoundedGenes"] pvalsTested[[i]][["AdjustedQTL"]][j, ] <- res[, "adjustedQTL"] pvalsTested[[i]][["AdjustedGenes"]][j, ] <- res[, "adjustedGenes"] } } ## calculate empirical type-I error rates type1errorAllConfoundedQTL <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorAllConfoundedGenes <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorTestedConfoundedQTL <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorTestedConfoundedGenes <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorAllAdjustedQTL <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorAllAdjustedGenes <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorTestedAdjustedQTL <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) type1errorTestedAdjustedGenes <- matrix(NA, nrow=n.simxeffect, ncol=length(qOrders)) for (i in 1:n.simxeffect) { type1errorAllConfoundedQTL[i, ] <- rowSums(pvalsAll[[i]][["ConfoundedQTL"]] < 0.05, na.rm=TRUE) / n.sim type1errorAllConfoundedGenes[i, ] <- rowSums(pvalsAll[[i]][["ConfoundedGenes"]] < 0.05, na.rm=TRUE) / n.sim type1errorTestedConfoundedQTL[i, ] <- rowSums(pvalsTested[[i]][["ConfoundedQTL"]] < 0.05, na.rm=TRUE) / n.sim type1errorTestedConfoundedGenes[i, ] <- rowSums(pvalsTested[[i]][["ConfoundedGenes"]] < 0.05, na.rm=TRUE) / n.sim type1errorAllAdjustedQTL[i, ] <- rowSums(pvalsAll[[i]][["AdjustedQTL"]] < 0.05, na.rm=TRUE) / n.sim type1errorAllAdjustedGenes[i, ] <- rowSums(pvalsAll[[i]][["AdjustedGenes"]] < 0.05, na.rm=TRUE) / n.sim type1errorTestedAdjustedQTL[i, ] <- rowSums(pvalsTested[[i]][["AdjustedQTL"]] < 0.05, na.rm=TRUE) / n.sim type1errorTestedAdjustedGenes[i, ] <- rowSums(pvalsTested[[i]][["AdjustedGenes"]] < 0.05, na.rm=TRUE) / n.sim } ## plot the figure labelPanel <- function(lab, font=2, cex=2, offsetx=0.05, offsety=0.05) { par(xpd=TRUE) w <- par("usr")[2] - par("usr")[1] h <- par("usr")[4] - par("usr")[3] text(par("usr")[1]-w*offsetx, par("usr")[4]+h*offsety, lab, font=font, cex=cex) par(xpd=FALSE) } pdf(file="Figure3.pdf", useDingbats=FALSE, height=5, width=12) par(mfrow=c(1, 2), mar=c(4, 4, 2, 2)) plot(qOrders, colMeans(type1errorAllConfoundedQTL), type="l", ylim=c(0, 1), cex.lab=1.2, xlab="Conditioning order q", ylab="Empirical type-I error", lty=2, lwd=3, las=1) lines(qOrders, colMeans(type1errorTestedConfoundedQTL), type="l", lwd=3, lty=1) lines(qOrders, colMeans(type1errorAllAdjustedQTL), type="l", lwd=3, lty=3) lines(qOrders, colMeans(type1errorTestedAdjustedQTL), type="l", lwd=3, lty=3) abline(h=0.05) legend("topright", c("Tested genes only are confounded", "All genes affected by confounding", "Confounding adjusted both cases"), lwd=3, lty=1:3, inset=0.05, bg="white") labelPanel(LETTERS[1], offsetx=0.1, offsety=0.05) plot(qOrders, colMeans(type1errorAllConfoundedGenes), type="l", ylim=c(0, 1), cex.lab=1.2, xlab="Conditioning order q", ylab="Empirical type-I error", lty=2, lwd=3, las=1) lines(qOrders, colMeans(type1errorTestedConfoundedGenes), type="l", lty=1, lwd=3) lines(qOrders, colMeans(type1errorAllAdjustedGenes), type="l", lwd=3, lty=3) lines(qOrders, colMeans(type1errorTestedAdjustedGenes), type="l", lwd=3, lty=3) abline(h=0.05) legend("center", c("Tested genes only are confounded", "All genes affected by confounding", "Confounding adjusted both cases"), lwd=3, lty=1:3, inset=0.05, bg="white") labelPanel(LETTERS[2], offsetx=0.1, offsety=0.05) dev.off()