# this script reproduces the results on functional coherence from # the paper "Reverse engineering molecular regulatory networks from # microarray data with qp-graphs" by R. Castelo and A. Roverato, # Journal of Computational Biology, vol. 16, nr.2, 2008. library(Biobase) library(tools) library(GOstats) library(org.EcK12.eg.db) library(qpgraph) data(EcoliOxygen) p <- dim(gds680.eset)["Features"] # set the levels to use in the three strategies to reverse-engineer # a transcriptional regulatory network from microarray data recall_level <- 0.03 # a nominal recall level of 3% precision_level <- 0.50 # a nominal precision level of 50% size_level <- 1000 # the top-scoring 1000 interactions # load measurements taken from CLR, ARACNE and GENENET load("GDS680.clr.aracne.genenet.RData") # import previously calculated non-rejection rates for different # q-orders and calculate the average non-rejection rate as the # arithmetic mean avgnrr.estimates <- matrix(as.double(0), nrow=p, ncol=p) nrrfiles <- list.files("NRRsGDS680", full.names=TRUE) for (filename in nrrfiles) { # we knew before hand that we used nTests=100 nrr.estimates <- qpImportNrr(filename, nTests=100) avgnrr.estimates <- avgnrr.estimates + (1 / length(nrrfiles)) * nrr.estimates } rownames(avgnrr.estimates) <- featureNames(gds680.eset) colnames(avgnrr.estimates) <- featureNames(gds680.eset) # estimate Pearson correlation coefficients as a reverse-engineering # method pcc.estimates <- qpPCC(gds680.eset) # assign a random correlation as a reverse-engineering method rndcor <- matrix(runif(p*p, min=-1, max=+1), nrow=p, ncol=p) rownames(rndcor) <- colnames(rndcor) <- featureNames(gds680.eset) # build an incidence matrix of the transcriptional network from # RegulonDB as a reference to calculate later precision-recall curves allOrderedGenes <- featureNames(gds680.eset) regulonTFgenes <- as.character(unique(filtered.regulon6.1[, "EgID_TF"])) regulonTGgenes <- as.character(unique(c(regulonTFgenes, filtered.regulon6.1[, "EgID_TG"]))) filtered.regulon6.1.I <- matrix(FALSE, nrow=p, ncol=p) rownames(filtered.regulon6.1.I) <- allOrderedGenes colnames(filtered.regulon6.1.I) <- allOrderedGenes TFi <- match(filtered.regulon6.1[, "EgID_TF"], allOrderedGenes) TGi <- match(filtered.regulon6.1[, "EgID_TG"], allOrderedGenes) filtered.regulon6.1.I[cbind(TFi,TGi)] <- TRUE filtered.regulon6.1.I[cbind(TGi,TFi)] <- TRUE # calculate precision-recal curves avgnrr.pr <- qpPrecisionRecall(avgnrr.estimates, filtered.regulon6.1.I, decreasing=FALSE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) abspcc.pr <- qpPrecisionRecall(abs(pcc.estimates$R), filtered.regulon6.1.I, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) absrnd.pr <- qpPrecisionRecall(abs(rndcor), filtered.regulon6.1.I, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) clr.pr <- qpPrecisionRecall(abs(clr), filtered.regulon6.1.I, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) aracne.pr <- qpPrecisionRecall(aracne, filtered.regulon6.1.I, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) genenet.pr <- qpPrecisionRecall(abs(genenet), filtered.regulon6.1.I, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=regulonTGgenes, recallSteps=c(seq(0,0.1,0.005), seq(0.1,1.0,0.1))) # calculate score threshold at the given precision level for # each method and precision-recall curve avgnrr.thr <- qpPRscoreThreshold(avgnrr.pr, precision_level, recall.level=FALSE, max.score=0) abspcc.thr <- qpPRscoreThreshold(abspcc.pr, precision_level, recall.level=FALSE, max.score=1) absrnd.thr <- qpPRscoreThreshold(absrnd.pr, precision_level, recall.level=FALSE, max.score=1) clr.thr <- qpPRscoreThreshold(clr.pr, precision_level, recall.level=FALSE, max.score=max(abs(clr),na.rm=TRUE)) aracne.thr <- qpPRscoreThreshold(aracne.pr, precision_level, recall.level=FALSE, max.score=max(aracne,na.rm=TRUE)) genenet.thr <- qpPRscoreThreshold(genenet.pr, precision_level, recall.level=FALSE, max.score=max(abs(genenet),na.rm=TRUE)) # build the network at the given precision level using the previously # calculated threshold score for each method qpg50pre <- qpGraph(avgnrr.estimates, avgnrr.thr, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) pcc50pre <- qpAnyGraph(abs(pcc.estimates$R), abspcc.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) rnd50pre <- qpAnyGraph(abs(rndcor), absrnd.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) clr50pre <- qpAnyGraph(abs(clr), clr.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) ara50pre <- qpAnyGraph(aracne, aracne.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) gne50pre <- qpAnyGraph(abs(genenet), genenet.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) # estimate functional coherence of each of the previous networks # obtained at the given precision level qpg50preFC <- qpFunctionalCoherence(A=qpg50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) pcc50preFC <- qpFunctionalCoherence(A=pcc50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) rnd50preFC <- qpFunctionalCoherence(A=rnd50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) clr50preFC <- qpFunctionalCoherence(A=clr50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) ara50preFC <- qpFunctionalCoherence(A=ara50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) gne50preFC <- qpFunctionalCoherence(A=gne50pre, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) # calculate score threshold at the given recall level for # each method and precision-recall curve avgnrr.thr <- qpPRscoreThreshold(avgnrr.pr, recall_level, recall.level=TRUE, max.score=0) abspcc.thr <- qpPRscoreThreshold(abspcc.pr, recall_level, recall.level=TRUE, max.score=1) absrnd.thr <- qpPRscoreThreshold(absrnd.pr, recall_level, recall.level=TRUE, max.score=1) clr.thr <- qpPRscoreThreshold(clr.pr, recall_level, recall.level=TRUE, max.score=max(abs(clr),na.rm=TRUE)) aracne.thr <- qpPRscoreThreshold(aracne.pr, recall_level, recall.level=TRUE, max.score=max(aracne,na.rm=TRUE)) genenet.thr <- qpPRscoreThreshold(genenet.pr, recall_level, recall.level=TRUE, max.score=max(abs(genenet),na.rm=TRUE)) # build the network at the given recall level using the previously # calculated threshold score for each method qpg3rec <- qpGraph(avgnrr.estimates, avgnrr.thr, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) pcc3rec <- qpAnyGraph(abs(pcc.estimates$R), abspcc.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) rnd3rec <- qpAnyGraph(abs(rndcor), absrnd.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) clr3rec <- qpAnyGraph(abs(clr), clr.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) ara3rec <- qpAnyGraph(aracne, aracne.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) gne3rec <- qpAnyGraph(abs(genenet), genenet.thr, remove="below", pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) # estimate functional coherence of each of the previous networks # obtained at the given recal level qpg3recFC <- qpFunctionalCoherence(A=qpg3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) pcc3recFC <- qpFunctionalCoherence(A=pcc3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) rnd3recFC <- qpFunctionalCoherence(A=rnd3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) clr3recFC <- qpFunctionalCoherence(A=clr3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) ara3recFC <- qpFunctionalCoherence(A=ara3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) gne3recFC <- qpFunctionalCoherence(A=gne3rec, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) # build the network of the given size for each method qpg1000sze <- qpGraph(avgnrr.estimates, threshold=NULL, topPairs=size_level, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) pcc1000sze <- qpAnyGraph(abs(pcc.estimates$R), threshold=NULL, topPairs=size_level, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) rnd1000sze <- qpAnyGraph(abs(rndcor), threshold=NULL, topPairs=size_level, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) clr1000sze <- qpAnyGraph(abs(clr), threshold=NULL, topPairs=size_level, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) ara1000sze <- qpAnyGraph(aracne, threshold=NULL, topPairs=size_level, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) gne1000sze <- qpAnyGraph(abs(genenet), threshold=NULL, topPairs=size_level, decreasing=TRUE, pairup.i=regulonTFgenes, pairup.j=allOrderedGenes) # estimate functional coherence of each of the previous networks # obtained at the given size qpg1000szeFC <- qpFunctionalCoherence(A=qpg1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) pcc1000szeFC <- qpFunctionalCoherence(A=pcc1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) rnd1000szeFC <- qpFunctionalCoherence(A=rnd1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) clr1000szeFC <- qpFunctionalCoherence(A=clr1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) ara1000szeFC <- qpFunctionalCoherence(A=ara1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) gne1000szeFC <- qpFunctionalCoherence(A=gne1000sze, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) # estimate functional coherence on the RegulonDB transcriptional network # introducing different levels of noise # the noise will be introduced by replacing a fraction (the noise level) # of the target genes in each regulatory module defined as a transcription # factor and the set of target genes that regulates. We will use 5 different # levels of noise: 0% (no noise), 25%, 50%, 75% and 100%. The replacement # of targets will be done by using genes outside the regulatory module, # concretly those with the highest Pearson correlation with the transcription # factor. In order to accomplish this we will set to the minimum PCC value (0) # those pairs of genes corresponding to non-transcription factor relationships nonTFTGgenes <- setdiff(allOrderedGenes,regulonTFgenes) nonTFTGgenes <- match(nonTFTGgenes, allOrderedGenes) allpairs_nonTFTGgenes <- t(combn(nonTFTGgenes,2)) pcc.estimates <- qpPCC(gds680.eset) pcc.estimates$R[allpairs_nonTFTGgenes] <- 0.0 pcc.estimates$R[cbind(allpairs_nonTFTGgenes[,2],allpairs_nonTFTGgenes[,1])] <- 0.0 # we will store the resulting functional coherence values in one list and # also the values of the correlations of the genes replacing the targets # in other list regulonFCwithNoise <- list() regulonFCwithNoiseCor <- list() # we also need the index positions of the transcription factor genes in our # ordered list of genes regulonTFgenesIdx <- match(regulonTFgenes, allOrderedGenes) # we are set now to start the calculation for (noise in c(0.00, 0.25, 0.50, 0.75, 1.00)) { realIwithNoise <- filtered.regulon6.1.I regulonFCwithNoiseCor[[as.character(noise)]] <- c() if (noise > 0.0) { # only when there is noise for (TFi in regulonTFgenesIdx) { # for each regulatory module nTGs <- sum(filtered.regulon6.1.I[TFi,]) # take the number of targets if (nTGs >= 5) { # if they are more than 5 TGs <- (1:p)[filtered.regulon6.1.I[TFi,]] # replace the noise-fraction nTGs2replace <- floor(noise * nTGs) # of targets by the genes outside TGs2replace <- TGs[sample(1:nTGs, size=nTGs2replace, replace=FALSE)] otherTGs <- (1:p)[-c(TGs, TFi)] # the regulatory module with highest PCC sampleFromOtherTGs <- otherTGs[sort(abs(pcc.estimates$R[TFi, otherTGs]), decreasing=TRUE, index.return=TRUE)$ix[1:nTGs2replace]] cors <- sort(abs(pcc.estimates$R[TFi, otherTGs]), decreasing=TRUE, index.return=TRUE)$x[1:nTGs2replace] regulonFCwithNoiseCor[[as.character(noise)]] <- c(regulonFCwithNoiseCor[[as.character(noise)]], cors) # the matrix should remain symmetric realIwithNoise[TFi, TGs2replace] <- FALSE realIwithNoise[TGs2replace, TFi] <- FALSE realIwithNoise[TFi, sampleFromOtherTGs] <- TRUE realIwithNoise[sampleFromOtherTGs, TFi] <- TRUE } } } # estimate functional coherence regulonFCwithNoise[[as.character(noise)]] <- qpFunctionalCoherence(A=realIwithNoise, TFgenes=regulonTFgenes, chip="org.EcK12.eg.db", minRMsize=5, verbose=TRUE) } save(qpg3rec,qpg3recFC,pcc3rec,pcc3recFC,rnd3rec,rnd3recFC, clr3rec,clr3recFC,ara3rec,ara3recFC,gne3rec,gne3recFC, qpg50pre,qpg50preFC,pcc50pre,pcc50preFC,rnd50pre,rnd50preFC, clr50pre,clr50preFC,ara50pre,ara50preFC,gne50pre,gne50preFC, qpg1000sze,qpg1000szeFC,pcc1000sze,pcc1000szeFC,rnd1000sze,rnd1000szeFC, clr1000sze,clr1000szeFC,ara1000sze,ara1000szeFC,gne1000sze,gne1000szeFC, regulonFCwithNoise,regulonFCwithNoiseCor,file="FC4AllMethods.RData")