#==================================
# Get variant supporting read using Rsamtools
#==================================
get.SNV_support_Read=function(chr=NULL,pos=NULL,ref=NULL,alt=NULL,bamfile=NULL){
#BiocManager::install("Rsamtools")
# Set variant sites
pos.df=data.frame(chr=chr,pos=pos,ref=ref,alt=alt,stringsAsFactors=F)
library(Rsamtools)
# Setting variant position
message('Setting variant position')
pos.which=plyr::llply(1:nrow(pos.df),.progress='text',function(k){
k1=IRanges::IRangesList(x=IRanges(pos.df$pos[k]-200,pos.df$pos[k]+200))
names(k1)=pos.df$chr[k]
return(k1)
})
names(pos.which)=paste0(pos.df$chr,'@',pos.df$pos,'@',pos.df$ref,'@',pos.df$alt)
# Retrive variant region read
message('Retrive variant region read')
what=c('rname','strand','pos','qwidth','seq')
total.reads=plyr::llply(pos.which,.progress='text',function(k){
k1=ScanBamParam(which=k,what=what)
bam=scanBam(bamfile,param = k1)
return(bam)
})
summarized.read=plyr::llply(total.reads,.progress = 'text',function(k){
k1=data.frame(k,stringsAsFactors=F)
return(k1)
})
# Check whether retrieved read having the variants sites
message('Selecting read aligned on the variant position')
summarized.read2=plyr::llply(1:length(summarized.read),.progress='text',function(k){
k1=summarized.read[[k]]
k1$from=k1[,grep(colnames(k1),pattern = 'pos')]
k1$to=k1$from+k1[,grep(colnames(k1),pattern = 'qwidth')]-1
wh.pos=which(k1$to>=pos.df$pos[k] & k1$from<=pos.df$pos[k])
k2=k1[wh.pos,]
return(k2)
})
# Check whether a read supports reference or variant allele #k=2
summarized.read3=plyr::llply(1:length(summarized.read),.progress='text',function(k){
k1=summarized.read2[[k]]
k1$nsite=pos.df$pos[k]-k1$from+1
k1$nsite=ifelse(k1$nsite==0,1,k1$nsite)
colnames(k1)[grep(colnames(k1),pattern = 'seq')]='Seq'
k1$nsite.seq=unlist(plyr::llply(1:nrow(k1),function(j){
j1=unlist(strsplit(k1$Seq[j],''))[k1$nsite[j]]
return(j1)
}))
# Change anti-sense read into complement seq
colnames(k1)[grep(colnames(k1),pattern = 'strand')]='Strand'
k1$final.seq=k1$nsite.seq
# Determine whether the read support wildtype or variant or unknown
k1$group=ifelse(k1$final.seq==pos.df$ref[k],'Ref_supported',k1$final.seq)
k1$group=ifelse(k1$final.seq==pos.df$alt[k],'Alt_supported',k1$group)
k1$group=ifelse(k1$group %in% c('Ref_supported', 'Alt_supported'),k1$group,'Unknown')
# Select necessary columns
colnames(k1)=c('chr','strand','pos','seq.length','sequence','from','to','nsite','nsite.seq','final.seq','group')
k2=k1[,c('chr','strand','pos','seq.length','sequence','from','to','nsite','final.seq','group')]
# Return the result
return(k2)
})
# Set names for variant
names(summarized.read3)=names(pos.which)
return(summarized.read3)
}
카테고리 없음
Get variant supporting reads using R
728x90
반응형
728x90
반응형