# this script reproduces the results on functional coherence from
  # the paper "Reverse engineering molecular regulatory networks from
  # microarray data with qp-graphs" by R. Castelo and A. Roverato,
  # Journal of Computational Biology, vol. 16, nr.2, 2008.

  library(Biobase)
  library(tools)
  library(GOstats)
  library(org.EcK12.eg.db)
  library(qpgraph)

  data(EcoliOxygen)
  p <- dim(gds680.eset)["Features"]

  # set the levels to use in the three strategies to reverse-engineer
  # a transcriptional regulatory network from microarray data
  recall_level <- 0.03      # a nominal recall level of 3%
  precision_level <- 0.50   # a nominal precision level of 50%
  size_level <- 1000        # the top-scoring 1000 interactions

  # load measurements taken from CLR, ARACNE and GENENET
  load("GDS680.clr.aracne.genenet.RData")

  # import previously calculated non-rejection rates for different
  # q-orders and calculate the average non-rejection rate as the
  # arithmetic mean
  avgnrr.estimates <- matrix(as.double(0), nrow=p, ncol=p)
  nrrfiles <- list.files("NRRsGDS680", full.names=TRUE)
  for (filename in nrrfiles) {
    # we knew before hand that we used nTests=100
    nrr.estimates <- qpImportNrr(filename, nTests=100)
    avgnrr.estimates <- avgnrr.estimates +
                        (1 / length(nrrfiles)) * nrr.estimates
  }
  rownames(avgnrr.estimates) <- featureNames(gds680.eset)
  colnames(avgnrr.estimates) <- featureNames(gds680.eset)

  # estimate Pearson correlation coefficients as a reverse-engineering
  # method
  pcc.estimates <- qpPCC(gds680.eset)

  # assign a random correlation as a reverse-engineering method
  rndcor <- matrix(runif(p*p, min=-1, max=+1), nrow=p, ncol=p)
  rownames(rndcor) <- colnames(rndcor) <- featureNames(gds680.eset)

  # build an incidence matrix of the transcriptional network from
  # RegulonDB as a reference to calculate later precision-recall curves
  allOrderedGenes <- featureNames(gds680.eset)
  regulonTFgenes <- as.character(unique(filtered.regulon6.1[, "EgID_TF"]))
  regulonTGgenes <- as.character(unique(c(regulonTFgenes,
                                        filtered.regulon6.1[, "EgID_TG"])))

  filtered.regulon6.1.I <- matrix(FALSE, nrow=p, ncol=p)
  rownames(filtered.regulon6.1.I) <- allOrderedGenes
  colnames(filtered.regulon6.1.I) <- allOrderedGenes
  TFi <- match(filtered.regulon6.1[, "EgID_TF"],
               allOrderedGenes)
  TGi <- match(filtered.regulon6.1[, "EgID_TG"],
               allOrderedGenes)
  filtered.regulon6.1.I[cbind(TFi,TGi)] <- TRUE
  filtered.regulon6.1.I[cbind(TGi,TFi)] <- TRUE

  # calculate precision-recal curves
  avgnrr.pr <- qpPrecisionRecall(avgnrr.estimates,
                 filtered.regulon6.1.I, decreasing=FALSE,
                 pairup.i=regulonTFgenes,
                 pairup.j=regulonTGgenes,
                 recallSteps=c(seq(0,0.1,0.005),
                               seq(0.1,1.0,0.1)))

  abspcc.pr <- qpPrecisionRecall(abs(pcc.estimates$R),
                 filtered.regulon6.1.I, decreasing=TRUE,
                 pairup.i=regulonTFgenes,
                 pairup.j=regulonTGgenes,
                 recallSteps=c(seq(0,0.1,0.005),
                               seq(0.1,1.0,0.1)))

  absrnd.pr <- qpPrecisionRecall(abs(rndcor),
                 filtered.regulon6.1.I, decreasing=TRUE,
                 pairup.i=regulonTFgenes,
                 pairup.j=regulonTGgenes,
                 recallSteps=c(seq(0,0.1,0.005),
                               seq(0.1,1.0,0.1)))

  clr.pr <- qpPrecisionRecall(abs(clr),
              filtered.regulon6.1.I, decreasing=TRUE,
              pairup.i=regulonTFgenes,
              pairup.j=regulonTGgenes,
              recallSteps=c(seq(0,0.1,0.005),
                            seq(0.1,1.0,0.1)))

  aracne.pr <- qpPrecisionRecall(aracne,
                 filtered.regulon6.1.I, decreasing=TRUE,
                 pairup.i=regulonTFgenes,
                 pairup.j=regulonTGgenes,
                 recallSteps=c(seq(0,0.1,0.005),
                               seq(0.1,1.0,0.1)))

  genenet.pr <- qpPrecisionRecall(abs(genenet),
                  filtered.regulon6.1.I, decreasing=TRUE,
                  pairup.i=regulonTFgenes,
                  pairup.j=regulonTGgenes,
                  recallSteps=c(seq(0,0.1,0.005),
                                seq(0.1,1.0,0.1)))

  # calculate score threshold at the given precision level for
  # each method and precision-recall curve

  avgnrr.thr <- qpPRscoreThreshold(avgnrr.pr, precision_level,
                  recall.level=FALSE, max.score=0)

  abspcc.thr <- qpPRscoreThreshold(abspcc.pr, precision_level,
                  recall.level=FALSE, max.score=1)

  absrnd.thr <- qpPRscoreThreshold(absrnd.pr, precision_level,
                  recall.level=FALSE, max.score=1)

  clr.thr <- qpPRscoreThreshold(clr.pr, precision_level,
                  recall.level=FALSE, max.score=max(abs(clr),na.rm=TRUE))

  aracne.thr <- qpPRscoreThreshold(aracne.pr, precision_level,
                  recall.level=FALSE, max.score=max(aracne,na.rm=TRUE))

  genenet.thr <- qpPRscoreThreshold(genenet.pr, precision_level,
                   recall.level=FALSE, max.score=max(abs(genenet),na.rm=TRUE))

  # build the network at the given precision level using the previously
  # calculated threshold score for each method

  qpg50pre <- qpGraph(avgnrr.estimates, avgnrr.thr,
                      pairup.i=regulonTFgenes,
                      pairup.j=allOrderedGenes)

  pcc50pre <- qpAnyGraph(abs(pcc.estimates$R), abspcc.thr, remove="below",
                         pairup.i=regulonTFgenes,
                         pairup.j=allOrderedGenes)

  rnd50pre <- qpAnyGraph(abs(rndcor), absrnd.thr, remove="below",
                      pairup.i=regulonTFgenes,
                      pairup.j=allOrderedGenes)

  clr50pre <- qpAnyGraph(abs(clr), clr.thr, remove="below",
                         pairup.i=regulonTFgenes,
                         pairup.j=allOrderedGenes)

  ara50pre <- qpAnyGraph(aracne, aracne.thr, remove="below",
                         pairup.i=regulonTFgenes,
                         pairup.j=allOrderedGenes)

  gne50pre <- qpAnyGraph(abs(genenet), genenet.thr, remove="below",
                      pairup.i=regulonTFgenes,
                      pairup.j=allOrderedGenes)

  # estimate functional coherence of each of the previous networks
  # obtained at the given precision level

  qpg50preFC <- qpFunctionalCoherence(A=qpg50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)

  pcc50preFC <- qpFunctionalCoherence(A=pcc50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)
  rnd50preFC <- qpFunctionalCoherence(A=rnd50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)

  clr50preFC <- qpFunctionalCoherence(A=clr50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)

  ara50preFC <- qpFunctionalCoherence(A=ara50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)

  gne50preFC <- qpFunctionalCoherence(A=gne50pre,
                  TFgenes=regulonTFgenes,
                  chip="org.EcK12.eg.db",
                  minRMsize=5, verbose=TRUE)

  # calculate score threshold at the given recall level for
  # each method and precision-recall curve

  avgnrr.thr <- qpPRscoreThreshold(avgnrr.pr, recall_level,
                  recall.level=TRUE, max.score=0)

  abspcc.thr <- qpPRscoreThreshold(abspcc.pr, recall_level,
                  recall.level=TRUE, max.score=1)

  absrnd.thr <- qpPRscoreThreshold(absrnd.pr, recall_level,
                  recall.level=TRUE, max.score=1)

  clr.thr <- qpPRscoreThreshold(clr.pr, recall_level,
                  recall.level=TRUE, max.score=max(abs(clr),na.rm=TRUE))

  aracne.thr <- qpPRscoreThreshold(aracne.pr, recall_level,
                  recall.level=TRUE, max.score=max(aracne,na.rm=TRUE))

  genenet.thr <- qpPRscoreThreshold(genenet.pr, recall_level,
                  recall.level=TRUE, max.score=max(abs(genenet),na.rm=TRUE))

  # build the network at the given recall level using the previously
  # calculated threshold score for each method

  qpg3rec <- qpGraph(avgnrr.estimates, avgnrr.thr,
                     pairup.i=regulonTFgenes,
                     pairup.j=allOrderedGenes)

  pcc3rec <- qpAnyGraph(abs(pcc.estimates$R), abspcc.thr, remove="below",
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  rnd3rec <- qpAnyGraph(abs(rndcor), absrnd.thr, remove="below",
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  clr3rec <- qpAnyGraph(abs(clr), clr.thr, remove="below",
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  ara3rec <- qpAnyGraph(aracne, aracne.thr, remove="below",
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  gne3rec <- qpAnyGraph(abs(genenet), genenet.thr, remove="below",
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  # estimate functional coherence of each of the previous networks
  # obtained at the given recal level

  qpg3recFC <- qpFunctionalCoherence(A=qpg3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  pcc3recFC <- qpFunctionalCoherence(A=pcc3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  rnd3recFC <- qpFunctionalCoherence(A=rnd3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  clr3recFC <- qpFunctionalCoherence(A=clr3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  ara3recFC <- qpFunctionalCoherence(A=ara3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  gne3recFC <- qpFunctionalCoherence(A=gne3rec,
                 TFgenes=regulonTFgenes,
                 chip="org.EcK12.eg.db",
                 minRMsize=5, verbose=TRUE)

  # build the network of the given size for each method

  qpg1000sze <- qpGraph(avgnrr.estimates, threshold=NULL,
                        topPairs=size_level,
                        pairup.i=regulonTFgenes,
                        pairup.j=allOrderedGenes)

  pcc1000sze <- qpAnyGraph(abs(pcc.estimates$R), threshold=NULL,
                           topPairs=size_level, decreasing=TRUE,
                           pairup.i=regulonTFgenes,
                           pairup.j=allOrderedGenes)

  rnd1000sze <- qpAnyGraph(abs(rndcor), threshold=NULL,
                           topPairs=size_level, decreasing=TRUE,
                           pairup.i=regulonTFgenes,
                           pairup.j=allOrderedGenes)

  clr1000sze <- qpAnyGraph(abs(clr), threshold=NULL, topPairs=size_level,
                           decreasing=TRUE, pairup.i=regulonTFgenes,
                           pairup.j=allOrderedGenes)

  ara1000sze <- qpAnyGraph(aracne, threshold=NULL, topPairs=size_level,
                           decreasing=TRUE, pairup.i=regulonTFgenes,
                           pairup.j=allOrderedGenes)

  gne1000sze <- qpAnyGraph(abs(genenet), threshold=NULL, topPairs=size_level,
                           decreasing=TRUE, pairup.i=regulonTFgenes,
                           pairup.j=allOrderedGenes)

  # estimate functional coherence of each of the previous networks
  # obtained at the given size

  qpg1000szeFC <- qpFunctionalCoherence(A=qpg1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  pcc1000szeFC <- qpFunctionalCoherence(A=pcc1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  rnd1000szeFC <- qpFunctionalCoherence(A=rnd1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  clr1000szeFC <- qpFunctionalCoherence(A=clr1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  ara1000szeFC <- qpFunctionalCoherence(A=ara1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  gne1000szeFC <- qpFunctionalCoherence(A=gne1000sze,
                    TFgenes=regulonTFgenes,
                    chip="org.EcK12.eg.db",
                    minRMsize=5, verbose=TRUE)

  # estimate functional coherence on the RegulonDB transcriptional network
  # introducing different levels of noise

  # the noise will be introduced by replacing a fraction (the noise level)
  # of the target genes in each regulatory module defined as a transcription
  # factor and the set of target genes that regulates. We will use 5 different
  # levels of noise: 0% (no noise), 25%, 50%, 75% and 100%. The replacement
  # of targets will be done by using genes outside the regulatory module,
  # concretly those with the highest Pearson correlation with the transcription
  # factor. In order to accomplish this we will set to the minimum PCC value (0)
  # those pairs of genes corresponding to non-transcription factor relationships

  nonTFTGgenes <- setdiff(allOrderedGenes,regulonTFgenes)
  nonTFTGgenes <- match(nonTFTGgenes, allOrderedGenes)
  allpairs_nonTFTGgenes <- t(combn(nonTFTGgenes,2))

  pcc.estimates <- qpPCC(gds680.eset)
  pcc.estimates$R[allpairs_nonTFTGgenes] <- 0.0
  pcc.estimates$R[cbind(allpairs_nonTFTGgenes[,2],allpairs_nonTFTGgenes[,1])] <- 0.0

  # we will store the resulting functional coherence values in one list and
  # also the values of the correlations of the genes replacing the targets
  # in other list

  regulonFCwithNoise <- list()
  regulonFCwithNoiseCor <- list()

  # we also need the index positions of the transcription factor genes in our
  # ordered list of genes

  regulonTFgenesIdx <- match(regulonTFgenes, allOrderedGenes)

  # we are set now to start the calculation

  for (noise in c(0.00, 0.25, 0.50, 0.75, 1.00)) {
    realIwithNoise <- filtered.regulon6.1.I
    regulonFCwithNoiseCor[[as.character(noise)]] <- c()
    if (noise > 0.0) { # only when there is noise
      for (TFi in regulonTFgenesIdx) { # for each regulatory module
        nTGs <- sum(filtered.regulon6.1.I[TFi,]) # take the number of targets
        if (nTGs >= 5) { # if they are more than 5
          TGs <- (1:p)[filtered.regulon6.1.I[TFi,]] # replace the noise-fraction
          nTGs2replace <- floor(noise * nTGs)       # of targets by the genes outside
          TGs2replace <- TGs[sample(1:nTGs, size=nTGs2replace, replace=FALSE)]
          otherTGs <- (1:p)[-c(TGs, TFi)]           # the regulatory module with highest PCC
          sampleFromOtherTGs <- otherTGs[sort(abs(pcc.estimates$R[TFi, otherTGs]),
                                              decreasing=TRUE, index.return=TRUE)$ix[1:nTGs2replace]]
          cors <- sort(abs(pcc.estimates$R[TFi, otherTGs]), decreasing=TRUE,
                       index.return=TRUE)$x[1:nTGs2replace]
          regulonFCwithNoiseCor[[as.character(noise)]] <- c(regulonFCwithNoiseCor[[as.character(noise)]],
                                                            cors)
          # the matrix should remain symmetric
          realIwithNoise[TFi, TGs2replace] <- FALSE
          realIwithNoise[TGs2replace, TFi] <- FALSE
          realIwithNoise[TFi, sampleFromOtherTGs] <- TRUE
          realIwithNoise[sampleFromOtherTGs, TFi] <- TRUE
        }
      }
    }

    # estimate functional coherence
    regulonFCwithNoise[[as.character(noise)]] <- qpFunctionalCoherence(A=realIwithNoise,
                                                   TFgenes=regulonTFgenes,
                                                   chip="org.EcK12.eg.db",
                                                   minRMsize=5, verbose=TRUE)
  }


save(qpg3rec,qpg3recFC,pcc3rec,pcc3recFC,rnd3rec,rnd3recFC,
     clr3rec,clr3recFC,ara3rec,ara3recFC,gne3rec,gne3recFC,
     qpg50pre,qpg50preFC,pcc50pre,pcc50preFC,rnd50pre,rnd50preFC,
     clr50pre,clr50preFC,ara50pre,ara50preFC,gne50pre,gne50preFC,
     qpg1000sze,qpg1000szeFC,pcc1000sze,pcc1000szeFC,rnd1000sze,rnd1000szeFC,
     clr1000sze,clr1000szeFC,ara1000sze,ara1000szeFC,gne1000sze,gne1000szeFC,
     regulonFCwithNoise,regulonFCwithNoiseCor,file="FC4AllMethods.RData")