본문 바로가기

카테고리 없음

Draw lolliplot function in R

728x90
반응형

draw_lolliplot.r
0.00MB

#' Draw lolliplot to visualize mutation distribution
#'
#' This function is to draw mutation distribution for each or single subtype\cr
#' @param x : matrix that contain following fields\cr
#' 1. symbol\cr
#' 2. location\cr
#' 3. consequence
#' 4. subtype\cr
#' 5. hotspot (O)
#' @param subtype.col (default=NULL) : Give subtype colors as vector with names (ex. c('GPC1'='skyblue','GPC2'='tomato'))\cr
#' if it is not provided, subtype.col will be black.
#' @param hotspot.col (default='yellow') : Give hotspot col to highlight hotspot.\cr
#' It will be applied to boundary of nodes
#' @import plyr
#' @import rjson
#' @import RCurl
#' @keywords domain, canonical protein
#' @export
#' @examples
#' variant.site=sample(200:300,10)
#' x=data.frame('symbol'=sample(c('EGFR','TP53'),10,replace=T),
#' 'location'=variant.site,
#' 'consequence'=paste0(sample(LETTERS,10),variant.site,sample(LETTERS,10)),
#' 'subtype'=paste0('GPC',sample(1:2,10,replace=T)),stringsAsFactors=F,
#' 'hotspot'=sample(c(NA,'O'),10,replace=T))
#' subtype.col=c('GPC1'='skyblue','GPC2'='tomato')
#' hotspot.col='yellow'
#' draw_lolliplot(x=x,subtype.col=subtype.col)
draw_lolliplot=function(x,subtype.col=NULL,hotspot.col='yellow'){
# Get domain structure information
genes=as.character(unique(x[,1]))
x1=ldply(genes,.progress='text',function(k){
x1=get_domain_info(symbol=k)
return(x1)
})
# Draw lolliplot for each gene
# Color setting for node
if(!is.null(subtype.col)){
col.mat=data.frame('subtype'=names(subtype.col),'col'=subtype.col,stringsAsFactors=F)
x2=mat.merge(x,col.mat,by='subtype')
}else{
x2=x1
x2$col='black'
x2$subtype=''
}
x2$hotspot=ifelse(x2$hotspot=='O',hotspot.col,'black')
# Calculate height of variants
tmp=count(paste0(x2$consequence,';',x2$subtype))
x2$x=paste0(x2$consequence,';',x2$subtype)
x2=mat.merge(x2,tmp,by='x')

# Draw the plot
library(trackViewer)
library('GenomicRanges')
library(grid)
l_ply(genes,.progress='text',function(k){
x3=x2[which(x2$symbol==k),]
# Variant protein range
snp_gr=GRanges(seqnames = x3$symbol,
IRanges(x3$location,width = 1,
names=x3$consequence),
color=x3$col,
border=x3$hotspot)
snp_gr$label.parameter.rot=45
snp_gr$label.parameter.gp=gpar(col='black')
snp_gr$label=NA;snp_gr$label[which(snp_gr$border=='yellow')]='H'
snp_gr$label.col='white'
snp_gr$score=as.numeric(x3$freq)
# Protein domain info
d.mat=x1[which(x1$hgnc_symbol==k),]
dom_gr=GRanges(seqnames = rep(k,nrow(d.mat)+2),
IRanges(start=c(1,d.mat$pfam_start,
as.numeric(unique(d.mat$length))),
width=c(1,d.mat$pfam_end-d.mat$pfam_start,1),
names=c("",d.mat$domain.name,"")),
fill=c(1,as.factor(d.mat$domain.name),1),
height=rep(0.05,nrow(d.mat)+2)
)
# Set legend
legend=list('labels'=c(names(subtype.col),'Hotspot'),fill=c(subtype.col,'white'),
col=c(rep('black',length(subtype.col)),hotspot.col))
trackViewer::lolliplot(snp_gr,dom_gr,ylab=k,legend = legend)
})
}

728x90
반응형