#' Calculate Enrichment score for GSEA plot
#'
#' This function is slightly modified from GSEA.1.0.R downloaded from msigDB.\cr
#' Specific site address is below.\cr
#' http://software.broadinstitute.org/gsea/msigdb/download_file.jsp?filePath=/resources/software/GSEA-P-R.1.0.zip\cr
#' Original functions name are 'GSEA.EnrichmentScore' and 'GSEA.GeneRanking'\cr
#' I combined two functions into single function calculate enrichment score vector.\cr
#' It won't produce any statics including p-values and normalized enrichment score. Parameters are default.
#' @param exp.mat(default=NULL) : Matrix/data.frame of gene expression values (rows are genes, columns are samples)
#' @param cls(default=NULL) : List type of class\cr
#' (ex. list(GPC1=c('A','B','C','D'),GPC2=c('E','F','G','H')))
#' @param geneSet(default=NULL) : GeneSet list\cr
#' (ex. list(GeneSetA=c('geneA','geneB','geneC','geneD'),GeneSetB=c('geneE','geneF','geneG')))
#' @keywords enrichment score, GSEA, ES
#' @export
#' @examples
#' exp.mat=matrix(rnorm(10000),1000,10)
#' rownames(exp.mat)=paste0('gene',1:nrow(exp.mat))
#' colnames(exp.mat)=paste0('sample',1:ncol(exp.mat))
#' genes=rownames(exp.mat)
#' geneSet=list('A'=sample(genes,100),'B'=sample(genes,50),'C'=sample(genes,150))
#' samples=sample(colnames(exp.mat),10)
#' cls=list('groupA'=samples[1:5],'groupB'=samples[6:10])
#' EScore=calc.GSEA_EScore(geneSet=geneSet,exp.mat=exp.mat,cls=cls)
#' barplot(EScore$S2N)
#' plot(1:1000,EScore$ES[[1]]$EnrichmentScore,'l')
#' abline(v=EScore$ES[[1]]$Location.gene.tag)
calc.GSEA_EScore=function(geneSet=NULL,exp.mat=NULL,cls=NULL){
exp.mat=as.matrix(exp.mat[,unlist(cls)])
# Calculate signal2Noise across all genes (Group1.mean-Group2.mean)/(Group1.std+Group2.std)
message('Calculate Signal2Noise')
# Group1 statics
group1.mean=rowMeans(exp.mat[,cls[[1]]])
group1.std=rowSds(exp.mat[,cls[[1]]])
# Group2 statics
group2.mean=rowMeans(exp.mat[,cls[[2]]])
group2.std=rowSds(exp.mat[,cls[[2]]])
# Numerator (Group1.mean-Group2.mean)
numerator=group1.mean-group2.mean
# Denominator (Group1.std+Group2.std)
# According to GSEA homepage, they give 0.2, where a gene's mean is equal to 0.
# Adjusting Group.1 std
group1.std=ifelse(0.2*abs(group1.mean)<group1.std,group1.std,0.2*abs(group1.mean))
group1.std=ifelse(group1.std==0,0.2,group1.std)
# Adjusting Group.2 std
group2.std=ifelse(0.2*abs(group2.mean)<group2.std,group2.std,0.2*abs(group2.mean))
group2.std=ifelse(group2.std==0,0.2,group2.std)
# Calculate denominator
denominator=group1.std+group2.std
# Calculate Signal2Noise
S2N=sort(numerator/denominator,decreasing = T)
# Calculate enrichment score
message('Calculate enrichment score vector')
k=geneSet[[3]]
EScore=llply(geneSet,.progress='text',function(k){
tag.indicator <- sign(match(names(S2N), k, nomatch=0)) # notice that the sign is 0 (no tag) or 1 (tag)
no.tag.indicator <- 1 - tag.indicator
N <- length(S2N)
Nh <- length(k)
Nm <- N - Nh
alpha <- 1
correl.vector <- abs(S2N**alpha)
sum.correl.tag <- sum(correl.vector[tag.indicator == 1])
norm.tag <- 1.0/sum.correl.tag
norm.no.tag <- 1.0/Nm
RES <- cumsum(tag.indicator * correl.vector * norm.tag - no.tag.indicator * norm.no.tag)
location.tag=which(tag.indicator==1)
return(list(EnrichmentScore=RES,Location.gene.tag=location.tag))
}
)
return(list(ES=EScore,S2N=S2N))
}
'R관련 > Rfunction' 카테고리의 다른 글
Hexbin scatter plot (0) | 2020.03.23 |
---|---|
survival analysis function (0) | 2019.03.21 |
scanning ks-test (0) | 2018.03.02 |
domain_annotation (0) | 2017.07.21 |
IC50, drc (0) | 2017.07.21 |