본문 바로가기

카테고리 없음

Get variant supporting reads using R

728x90
반응형

#==================================
# Get variant supporting read using Rsamtools
#==================================
get.SNV_support_Read=function(chr=NULL,pos=NULL,ref=NULL,alt=NULL,bamfile=NULL){
  #BiocManager::install("Rsamtools")
  # Set variant sites
  pos.df=data.frame(chr=chr,pos=pos,ref=ref,alt=alt,stringsAsFactors=F)
  library(Rsamtools)
  # Setting variant position
  message('Setting variant position')
  pos.which=plyr::llply(1:nrow(pos.df),.progress='text',function(k){
    k1=IRanges::IRangesList(x=IRanges(pos.df$pos[k]-200,pos.df$pos[k]+200))
    names(k1)=pos.df$chr[k]
    return(k1)
  })
  names(pos.which)=paste0(pos.df$chr,'@',pos.df$pos,'@',pos.df$ref,'@',pos.df$alt)
  # Retrive variant region read
  message('Retrive variant region read')
  what=c('rname','strand','pos','qwidth','seq')
  total.reads=plyr::llply(pos.which,.progress='text',function(k){
    k1=ScanBamParam(which=k,what=what)
    bam=scanBam(bamfile,param = k1)
    return(bam)
  })
  
  summarized.read=plyr::llply(total.reads,.progress = 'text',function(k){
    k1=data.frame(k,stringsAsFactors=F)
    return(k1)
  })
  
  # Check whether retrieved read having the variants sites
  message('Selecting read aligned on the variant position')
  summarized.read2=plyr::llply(1:length(summarized.read),.progress='text',function(k){
    k1=summarized.read[[k]]
    k1$from=k1[,grep(colnames(k1),pattern = 'pos')]
    k1$to=k1$from+k1[,grep(colnames(k1),pattern = 'qwidth')]-1
    wh.pos=which(k1$to>=pos.df$pos[k] & k1$from<=pos.df$pos[k])
    k2=k1[wh.pos,]
    return(k2)
  })
  # Check whether a read supports reference or variant allele #k=2
  summarized.read3=plyr::llply(1:length(summarized.read),.progress='text',function(k){
    k1=summarized.read2[[k]]
    k1$nsite=pos.df$pos[k]-k1$from+1
    k1$nsite=ifelse(k1$nsite==0,1,k1$nsite)
    colnames(k1)[grep(colnames(k1),pattern = 'seq')]='Seq'
    k1$nsite.seq=unlist(plyr::llply(1:nrow(k1),function(j){
      j1=unlist(strsplit(k1$Seq[j],''))[k1$nsite[j]]
      return(j1)
    }))
    # Change anti-sense read into complement seq
      colnames(k1)[grep(colnames(k1),pattern = 'strand')]='Strand'
      k1$final.seq=k1$nsite.seq
    # Determine whether the read support wildtype or variant or unknown
      k1$group=ifelse(k1$final.seq==pos.df$ref[k],'Ref_supported',k1$final.seq)
      k1$group=ifelse(k1$final.seq==pos.df$alt[k],'Alt_supported',k1$group)
      k1$group=ifelse(k1$group %in% c('Ref_supported', 'Alt_supported'),k1$group,'Unknown')
    # Select necessary columns
      colnames(k1)=c('chr','strand','pos','seq.length','sequence','from','to','nsite','nsite.seq','final.seq','group')
      k2=k1[,c('chr','strand','pos','seq.length','sequence','from','to','nsite','final.seq','group')]
    # Return the result
      return(k2)
  })
  # Set names for variant
  names(summarized.read3)=names(pos.which)
  return(summarized.read3)
}

get.SNV_support_Read.r
0.00MB

728x90
반응형