library(graph) library(qpgraph) library(GenomicRanges) library(BiocParallel) library(rrBLUP) library(RColorBrewer) library(org.Sc.sgd.db) library(xtable) library(GOstats) load("param.RData") load("eqtlnet.q0.fdr.nrr.sel.RData") ## ## calculate for each gene the variance explained by its eQTLs ## varExp <- varExplained(param, eqtlnet.q0.fdr.nrr.sel) ## ## add the number of connections with other genes ## edg <- edges(eqtlnet.q0.fdr.nrr.sel@qpg@g)[rownames(varExp)] edg <- lapply(edg, function(x, g) intersect(x, g), geneNames(param)) varExp <- cbind(varExp, degree=sapply(edg, length)) ## ## calculate narrow sense heritability (h2) ## ## first, calculate the realized matrix of genetic similarity genotypes <- ggData(param)[, markerNames(param)] genotypes <- genotypes * 2 -3 ## recode from {1, 2} to {-1, +1} A <- A.mat(genotypes, shrink=FALSE) / 2 exprxgene <- as.data.frame(ggData(param)[, rownames(varExp)]) ## second, fit the mixed-effects model for each gene to estimate variance components h2 <- lapply(exprxgene, function(x, A) mixed.solve(x, K=A, method="REML"), A) ## third calculate h2 from the variance components h2 <- sapply(h2, function(x) x$Vu / (x$Vu + x$Ve)) stopifnot(identical(rownames(varExp), names(h2))) ## QC varExp <- cbind(varExp, h2=h2) ## ## calculate average distance of gene to its eQTLs ## eqtls <- alleQTL(eqtlnet.q0.fdr.nrr.sel, map="physical", gene.loc=TRUE) eqtlsdistxgene <- split(eqtls$distance, eqtls$gene) ## check whether all genes have their eQTLs either all in the same chromosome or all in different chromosomes to his one maskallfinitedists <- sapply(eqtlsdistxgene, function(d) all(is.finite(d))) maskallinfinitedists <- sapply(eqtlsdistxgene, function(d) all(!is.finite(d))) all(maskallfinitedists | maskallinfinitedists) ## [1] TRUE ## since all eQTLs in every gene are either in the same chromosome of the gene ## or in a different chromosomes (distance is Inf) we can simply take the mean distance eqtlsdistxgene <- sapply(eqtlsdistxgene, mean) varExp$avgdist2eQTL <- eqtlsdistxgene[rownames(varExp)] save(varExp, eqtls, file="varExp.RData") ## ## some descriptive analyses on the eQTLs ## ## distribution of genes with eQTLs located in the same or different chromosomes table(c("different chromosome", "same chromosome")[is.finite(varExp$avgdist2eQTL)+1]) ## ## different chromosome same chromosome ## 59 391 ## set variance explained eta2 to the expected maximum h2 when eta2 > h2 eta2 <- varExp$eta2eQTLs h2 <- varExp$h2 eta2[eta2 > h2] <- h2[eta2 > h2] ## distribution of missing h2 summary(h2-eta2) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.00000 0.04114 0.15330 0.17200 0.27670 0.61880 ## distribution of variance explained for genes connected to 9 or more other genes sum(varExp$degree >= 9) ## total number of genes connected to 9 or more other genes ## [1] 24 summary(varExp$eta2eQTLs[varExp$degree >= 9]) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.6805 0.7728 0.8204 0.8006 0.8419 0.8952 ## distribution of average distance between TSS and eQTLs for genes connected to 9 or more other genes dist.breaks <- c(0, 500, 1000, 10000, 20000, max(varExp$avgdist2eQTL[is.finite(varExp$avgdist2eQTL)]), Inf) table(cut(varExp$avgdist2eQTL[varExp$degree > 8], dist.breaks)) ## (0,500] (500,1e+03] (1e+03,1e+04] (1e+04,2e+04] (2e+04,1.88e+05] (1.88e+05,Inf] ## 1 2 1 3 5 12 ## ## generate table of genes connected to 9 or more other genes (Table 2 in the paper) ## tabGenesDegGe9 <- varExp[varExp$degree >= 9, ] tabGenesDegGe9$Gene <- select(org.Sc.sgd.db, keys=geneAnnotation(param)[rownames(tabGenesDegGe9)]$tx_name, columns="GENENAME")$GENENAME tabGenesDegGe9$Gene[is.na(tabGenesDegGe9$Gene)] <- geneAnnotation(param)[rownames(tabGenesDegGe9)[is.na(tabGenesDegGe9$Gene)]]$tx_name tabGenesDegGe9$Chr <- gsub("chr", "", IRanges::as.vector(seqnames(geneAnnotation(param)[rownames(tabGenesDegGe9)]))) ## the Pathway column has to be manually entered later tabGenesDegGe9$Pathway <- rep(NA_character_, times=nrow(tabGenesDegGe9)) eqtlschr <- split(eqtls$chrom, eqtls$gene)[rownames(tabGenesDegGe9)] eqtlschr <- sapply(eqtlschr, function(x) paste(unique(as.character(as.roman(x))), collapse=", ")) tabGenesDegGe9$Location <- eqtlschr tabGenesDegGe9$avgdist2eQTL[!is.finite(tabGenesDegGe9$avgdist2eQTL)] <- NA tabGenesDegGe9 <- tabGenesDegGe9[, c("Gene", "Chr", "Pathway", "Location", "avgdist2eQTL", "h2", "eta2eQTLs", "degree")] tabGenesDegGe9 <- tabGenesDegGe9[order(tabGenesDegGe9$degree, decreasing=TRUE), ] colnames(tabGenesDegGe9) <- c("Gene", "Chr", "Pathway", "Location", "Distance", "h2", "eta2", "Deg.") rownames(tabGenesDegGe9) <- 1:nrow(tabGenesDegGe9) xtabGenesDegGe9 <- xtable(tabGenesDegGe9, caption="Hub genes connected to 9 or more other genes in the eQTL network.", digits=c(-1, -1, -1, -1, -1, 0, 2, 2, 0), label="tabGenesDegGe9") print(xtabGenesDegGe9, file="tabGenesDegGe9.tex", sanitize.tex.function=function(x) x, caption.placement="top", include.rownames=FALSE, NA.string="NA") ## ## three genes from the previous table have unknown function in http://www.yeastgenome.org as of Sept. 2014 ## we perform a GO enrichment analysis on their network partners to predict their function ## unkfungenes <- c("YCL065W", "YCR041W", "YCR097W.A") hgOverGOBP <- vector(mode="list", length=length(unkfungenes)) names(hgOverGOBP) <- unkfungenes ## perform GO enrichment analysis selecting GO categories at Holm's FWER < 0.05 for (g in unkfungenes) { partnerGenes <- intersect(edges(graph(eqtlnet.q0.fdr.nrr.sel))[[g]], geneNames(eqtlnet.q0.fdr.nrr.sel)) partnerGenes <- geneAnnotation(eqtlnet.q0.fdr.nrr.sel)[partnerGenes]$tx_name GOparams <- new("GOHyperGParams", geneIds=partnerGenes, conditional=TRUE, universeGeneIds=geneNames(param), annotation="org.Sc.sgd.db", ontology="BP", pvalueCutoff=0.01, testDirection="over") hgOverGOBP[[g]] <- hyperGTest(GOparams) hgOverGOBP[[g]] <- summary(hgOverGOBP[[g]]) hgOverGOBP[[g]] <- hgOverGOBP[[g]][p.adjust(hgOverGOBP[[g]]$Pvalue, method="holm") < 0.05, ] hgOverGOBP[[g]][p.adjust(hgOverGOBP[[g]]$Pvalue, method="holm") < 0.01, ] } ## all three genes seem to be involved in mating regulation lapply(hgOverGOBP, head, n=3) ## $YCL065W ## GOBPID Pvalue OddsRatio ExpCount Count Size Term ## 1 GO:0019236 1.428296e-12 135.60000 0.1914431 8 88 response to pheromone ## 2 GO:0022414 2.181951e-12 449.36585 0.1329847 7 89 reproductive process ## 3 GO:0000746 5.427084e-10 77.22857 0.2284264 7 105 conjugation ## ## $YCR041W ## GOBPID Pvalue OddsRatio ExpCount Count Size Term ## 1 GO:0019236 3.282120e-10 78.09465 0.20739666 7 88 response to pheromone ## 2 GO:0022414 4.855815e-10 190.22892 0.13298468 6 89 reproductive process ## 3 GO:0007530 9.484603e-09 114.02083 0.08248731 5 35 sex determination ## ## $YCR097W.A ## GOBPID Pvalue OddsRatio ExpCount Count Size Term ## 1 GO:0019236 2.386086e-11 156.27572 0.1595359 7 88 response to pheromone ## 2 GO:0022414 6.857009e-10 86.63374 0.4550399 8 251 reproductive process ## 3 GO:0000746 8.141075e-09 81.92424 0.1903553 6 105 conjugation ## ## plot Figure 5 ## labelPanel <- function(lab, font=2, cex=2, offsetx=0.05, offsety=0.05) { par(xpd=TRUE) w <- par("usr")[2] - par("usr")[1] h <- par("usr")[4] - par("usr")[3] text(par("usr")[1]-w*offsetx, par("usr")[4]+h*offsety, lab, font=font, cex=cex) par(xpd=FALSE) } pdf(file="Figure5.pdf", height=4, width=12, useDingbats=FALSE) par(mfrow=c(1, 3), mar=c(5, 5, 3, 2)) h <- hist(varExp$eta2eQTLs, main="", xlab="% Variance explained by eQTL", ylab="Number of genes", prob=TRUE, xaxt="n", yaxt="n", col=gray(0.8), cex.lab=1.4) axis(1, at=seq(0.1, 0.9, by=0.2), labels=seq(10, 90, by=20), cex.axis=1.1) axis(2, at=seq(0, 2.5, by=0.5), labels=seq(0, 100, by=20), las=1, cex.axis=1.1) par(xpd=TRUE) text(h$mids, h$density, sprintf("%.0f%%", 100*cumsum(h$counts)/sum(h$counts)), pos=3) par(xpd=FALSE) labelPanel(LETTERS[1], offsetx=0.15, offsety=0.05) cols <- rep(NA, times=max(varExp$degree)+1) cols[sort(unique(varExp$degree+1), decreasing=TRUE)] <- colorRampPalette(c("red2", "skyblue"))(length(unique(varExp$degree))) plot(varExp$eta2eQTLs, varExp$h2, col="white", pch=1, cex=1.3, xlim=c(0, 1), ylim=c(0, 1), cex.lab=1.4, ylab=expression(paste("Narrow-sense heritability (", h^2, ")")), xlab="% Variance explained by eQTL", xaxt="n", yaxt="n") axis(1, at=seq(0, 1.0, by=0.1), labels=seq(0, 100, by=10), cex.axis=1.1) axis(2, at=seq(0, 1.0, by=0.1), labels=seq(0, 100, by=10), cex.axis=1.1, las=1) segments(seq(0, 1, by=0.1), par("usr")[3], seq(0, 1, by=0.1), par("usr")[4], col=gray(0.75), lty="dotted", lwd=1) segments(par("usr")[1], seq(0, 1, by=0.1), par("usr")[2], seq(0, 1, by=0.1), col=gray(0.75), lty="dotted", lwd=1) abline(0, 1, lwd=2, col=gray(0.70)) points(varExp$eta2eQTLs[varExp$degree > 0], varExp$h2[varExp$degree > 0], pch=19, cex=1, col=cols[varExp$degree[varExp$degree > 0]+1]) points(varExp$eta2eQTLs[varExp$degree == 0], varExp$h2[varExp$degree == 0], pch=1, cex=1.3, col=cols[1]) legend("bottomright", legend=sort(unique(varExp$degree)), inset=0.01, title="Deg.", cex=0.85, pch=c(1, rep(19, length(unique(varExp$degree))-1)), col=cols[sort(unique(varExp$degree))+1], bg="white") labelPanel(LETTERS[2], offsetx=0.15, offsety=0.05) dist.breaks <- c(0, 500, 1000, 10000, 20000, max(varExp$avgdist2eQTL[is.finite(varExp$avgdist2eQTL)])) cols <- colorRampPalette(c("red2", "skyblue"))(length(dist.breaks)) cols <- cols[length(cols):1] typeQTLpch <- typeQTLcol <- as.integer(cut(varExp$avgdist2eQTL, dist.breaks)) typeQTLpch[!is.na(typeQTLpch)] <- 1 typeQTLpch[is.na(typeQTLpch)] <- 19 names(typeQTLpch) <- names(varExp$avgdist2eQTL) typeQTLcol[!is.na(typeQTLcol)] <- cols[typeQTLcol[!is.na(typeQTLcol)]] typeQTLcol[is.na(typeQTLcol)] <- cols[length(dist.breaks)] names(typeQTLcol) <- names(varExp$avgdist2eQTL) stripchart(100*eta2eQTLs ~ degree, data=varExp, vertical=TRUE, cex.lab=1.4, las=1, cex.axis=1.1, col="white", cex=1, xlab="Connectivity degree with other genes", ylab="% Variance explained by eQTL", xaxt="n", yaxt="n", xlim=c(range(varExp$degree))) axis(1, at=seq(0, max(varExp$degree), by=2), labels=seq(0, max(varExp$degree), by=2), cex.axis=1.1) axis(2, at=seq(10, 90, by=10), labels=seq(10, 90, by=10), cex.axis=1.1, las=1) segments(seq(0, max(varExp$degree), by=2), 0, seq(0, max(varExp$degree), by=2), 100, col=gray(0.75), lty="dotted", lwd=1) segments(par("usr")[1], seq(10, 90, by=10), par("usr")[2], seq(10, 90, by=10), col=gray(0.75), lty="dotted", lwd=1) deg2x <- varExp[is.finite(varExp$avgdist2eQTL), "degree"] points(jitter(deg2x, factor=1.5), 100*varExp[is.finite(varExp$avgdist2eQTL), "eta2eQTLs"], pch=typeQTLpch[is.finite(varExp$avgdist2eQTL)], col=typeQTLcol[is.finite(varExp$avgdist2eQTL)], cex=1.3) deg2x <- varExp[!is.finite(varExp$avgdist2eQTL), "degree"] points(jitter(deg2x, factor=1.5), 100*varExp[!is.finite(varExp$avgdist2eQTL), "eta2eQTLs"], pch=19, col=cols[length(dist.breaks)], cex=1) legend("bottomright", c("TSS < 500bp", "TSS < 1Kb", "TSS < 10Kb", "TSS < 20Kb", "TSS > 20Kb", "TSS other chr."), inset=0.05, pch=c(rep(1, 5), 19), col=cols, bg="white") labelPanel(LETTERS[3], offsetx=0.15, offsety=0.05) dev.off()