본문 바로가기

R관련/Rfunction

GSEA Enrichment Score calculation

728x90
반응형

#' Calculate Enrichment score for GSEA plot

#' 

#' This function is slightly modified from GSEA.1.0.R downloaded from msigDB.\cr

#' Specific site address is below.\cr

#' http://software.broadinstitute.org/gsea/msigdb/download_file.jsp?filePath=/resources/software/GSEA-P-R.1.0.zip\cr

#' Original functions name are 'GSEA.EnrichmentScore' and 'GSEA.GeneRanking'\cr

#' I combined two functions into single function calculate enrichment score vector.\cr

#' It won't produce any statics including p-values and normalized enrichment score. Parameters are default.

#' @param exp.mat(default=NULL) : Matrix/data.frame of gene expression values (rows are genes, columns are samples) 

#' @param cls(default=NULL) : List type of class\cr

#' (ex. list(GPC1=c('A','B','C','D'),GPC2=c('E','F','G','H')))

#' @param geneSet(default=NULL) :  GeneSet list\cr

#' (ex. list(GeneSetA=c('geneA','geneB','geneC','geneD'),GeneSetB=c('geneE','geneF','geneG')))

#' @keywords enrichment score, GSEA, ES

#' @export

#' @examples

#' exp.mat=matrix(rnorm(10000),1000,10)

#' rownames(exp.mat)=paste0('gene',1:nrow(exp.mat))

#' colnames(exp.mat)=paste0('sample',1:ncol(exp.mat))

#' genes=rownames(exp.mat)

#' geneSet=list('A'=sample(genes,100),'B'=sample(genes,50),'C'=sample(genes,150))

#' samples=sample(colnames(exp.mat),10)

#' cls=list('groupA'=samples[1:5],'groupB'=samples[6:10])

#' EScore=calc.GSEA_EScore(geneSet=geneSet,exp.mat=exp.mat,cls=cls)

#' barplot(EScore$S2N)

#' plot(1:1000,EScore$ES[[1]]$EnrichmentScore,'l')

#' abline(v=EScore$ES[[1]]$Location.gene.tag)


calc.GSEA_EScore=function(geneSet=NULL,exp.mat=NULL,cls=NULL){

  exp.mat=as.matrix(exp.mat[,unlist(cls)])

  # Calculate signal2Noise across all genes (Group1.mean-Group2.mean)/(Group1.std+Group2.std)

    message('Calculate Signal2Noise')

    # Group1 statics

      group1.mean=rowMeans(exp.mat[,cls[[1]]])

      group1.std=rowSds(exp.mat[,cls[[1]]])

    # Group2 statics

      group2.mean=rowMeans(exp.mat[,cls[[2]]])

      group2.std=rowSds(exp.mat[,cls[[2]]])

    # Numerator (Group1.mean-Group2.mean)

      numerator=group1.mean-group2.mean

    # Denominator (Group1.std+Group2.std)

      # According to GSEA homepage, they give 0.2, where a gene's mean is equal to 0.

        # Adjusting Group.1 std

          group1.std=ifelse(0.2*abs(group1.mean)<group1.std,group1.std,0.2*abs(group1.mean))

          group1.std=ifelse(group1.std==0,0.2,group1.std)

        # Adjusting Group.2 std

          group2.std=ifelse(0.2*abs(group2.mean)<group2.std,group2.std,0.2*abs(group2.mean))

          group2.std=ifelse(group2.std==0,0.2,group2.std)

        # Calculate denominator

          denominator=group1.std+group2.std

        # Calculate Signal2Noise

          S2N=sort(numerator/denominator,decreasing = T)

  # Calculate enrichment score

    message('Calculate enrichment score vector')

    k=geneSet[[3]]

    EScore=llply(geneSet,.progress='text',function(k){

        tag.indicator <- sign(match(names(S2N), k, nomatch=0))    # notice that the sign is 0 (no tag) or 1 (tag) 

        no.tag.indicator <- 1 - tag.indicator

        N <- length(S2N) 

        Nh <- length(k)

        Nm <-  N - Nh

        alpha <- 1

        correl.vector <- abs(S2N**alpha)

        sum.correl.tag    <- sum(correl.vector[tag.indicator == 1])

        norm.tag    <- 1.0/sum.correl.tag

        norm.no.tag <- 1.0/Nm

        RES <- cumsum(tag.indicator * correl.vector * norm.tag - no.tag.indicator * norm.no.tag)

        location.tag=which(tag.indicator==1)

        return(list(EnrichmentScore=RES,Location.gene.tag=location.tag))

      }

    )

    return(list(ES=EScore,S2N=S2N))

}



728x90
반응형

'R관련 > Rfunction' 카테고리의 다른 글

Hexbin scatter plot  (0) 2020.03.23
survival analysis function  (0) 2019.03.21
scanning ks-test  (0) 2018.03.02
domain_annotation  (0) 2017.07.21
IC50, drc  (0) 2017.07.21