#' Get symbol/ENSG/ENST/ENTREZ id using biomaRt
#' This function is to get symbol/ensg/enst id using biomaRt package
#'
#' @param gene_id (default=NULL) : Gene id character vector\cr
#' (ex. BRCA1 or ENSG100001 or 100001(ENTREZ))
#' @param id_type (default='symbol') : Gene id type. Select one among 'symbol','ENSG','ENST','ENTREZcr
#' @param grch (default=37) : version of GRCH
#' @param version (default=NULL) : version of ensembl db, if not provided recent version will be used.
#' @keywords biomaRt
#' @export
#' @examples
#' # Suppose u have VCF and VEP files exactly same number
#' gene_id=c('TP53','BRCA1');id_type='symbol';grch=37;version=NULL
#' get_symbol_by_biomaRt(gene_id=gene_id,id_type='symbol',grch=37,version=NULL)
#-----------------------------------------
# Get Symbol and IDs for genes
#-----------------------------------------
get_symbol_by_biomaRt=function(gene_id=NULL,id_type='symbol',grch=37,version=NULL){
# Creating ensemble db object
library(biomaRt)
grch.name=paste0('ensembl_GRCh',grch)
if(!exists(grch.name)){
ensembl=useEnsembl(biomart = 'ensembl',dataset = 'hsapiens_gene_ensembl',
GRCh = grch,version = version)
assign(grch.name,ensembl,envir = .GlobalEnv)
}
# Get gene id
gene_id_type=c('ensg'='ensembl_gene_id',
'enst'='ensembl_transcript_id',
'symbol'='hgnc_symbol',
'entrez'='entrezgene_id')
key_gene_id=gene_id_type[grep(names(gene_id_type),pattern = id_type)]
# Get gene ids
x=getBM(attributes = gene_id_type,filters = key_gene_id,
values = gene_id,mart = get(grch.name))
# Return the result
return(x)
}
카테고리 없음
get_symbol_by_biomaRt.r
728x90
반응형
728x90
반응형