#' Merge two tables using dplyr package
#'
#' This function is the wrapper to merge two tables into one using dplyr package's join function.\cr
#' It is much faster than base::merge function.\cr
#' The associations between option and dplyr function are described bellow.\cr
#' merge.mat(x,y,all.x=F,all.y=F) = dplyr::inner_join\cr
#' merge.mat(x,y,all.x=T,all.y=F) = dplyr::left_join\cr
#' merge.mat(x,y,all.x=F,all.y=T) = dplyr::right_join\cr
#' merge.mat(x,y,all.x=T,all.y=T) = dplyr::full_join\cr
#'
#' @param x : Input matrix
#' @param y : Input matrix
#' @param all (default=F) : all = L is shorthand for all.x = L and all.y = L, where L is either TRUE or FALSE.
#' @param all.x (default=F) : if TRUE, then extra rows will be added to the output, \cr
#' one for each row in x that has no matching row in y. \cr
#' These rows will have NAs in those columns that are usually filled with values from y. \cr
#' The default is FALSE, so that only rows with data from both x and y are included in \cr
#' the output.
#' @param all.y (default=F) : Identical to all.x except it is option for y.
#' @param by : specifications of the columns used for merging.
#' @param by.x : specifications of the columns used for merging for x.
#' @param by.y : specifications of the columns used for merging for y.
#' @keywords merge, join, dplyr, tables, data.frame
#' @export
#' @import dplyr
#' @examples
#'tmp=expand.grid('A'=LETTERS,'B'=LETTERS,'C'=LETTERS);head(tmp)
#'tmp$key=paste0(tmp[,1],tmp[,2],tmp[,3])
#'tmp2=expand.grid('A'=LETTERS,'B'=LETTERS,'C'=LETTERS);head(tmp2)
#'tmp2$key2=sample(tmp$key,nrow(tmp2),replace=T)
#'# Basic merge
#'system.time({merge(tmp,tmp2,all=T,by.x='key',by.y='key2')})
#'# dplyr modified
#'system.time({merge.mat(tmp,tmp2,all=T,by.x='key',by.y='key2')})
#-----------------------------------------
# Merge two tables
#-----------------------------------------
merge.mat=function(x,y,all.x=F,all.y=F,all=F,by.x=NULL,by.y=NULL,by=NULL){
# Which column is key to be used for merging two tables?
if(!is.null(by.x) & !is.null(by.y)){
colnames(y)=suppressMessages(multi_sub(colnames(y),pattern = by.y,replacement = by.x,exact = T))
by=by.x
}
# Determine merge pattern all True?
# All data should be kept
if(all){
all.x=all.y=T
x1=dplyr::full_join(x,y,by=by)
}
# Only intersect should be kept
if(!all.x & !all.y){
x1=dplyr::inner_join(x,y,by=by)
}
# Only matrix X should be kept
if(all.x & !all.y){
x1=dplyr::right_join(x,y,by=by)
}
# Only matrix Y should be kept
if(!all.x & all.y){
x1=dplyr::left_join(x,y,by=by)
}
# Return matrix
x1=as.data.frame(x1,stringsAsFactors=F)
# Sort the table 1st column should key column
wh=which(colnames(x1)==by)
x1=x1[,c(wh,setdiff(1:ncol(x1),wh))]
return(x1)
}
#' Merge two tables using dplyr package
#'
#' This function is the wrapper to merge two tables into one using dplyr package's join function.\cr
#' It is much faster than base::merge function.\cr
#' The associations between option and dplyr function are described bellow.\cr
#' #merge.mat(x,y,all.x=F,all.y=F) = dplyr::inner_join\cr
#' #merge.mat(x,y,all.x=T,all.y=F) = dplyr::left_join\cr
#' #merge.mat(x,y,all.x=F,all.y=T) = dplyr::right_join\cr
#' #merge.mat(x,y,all.x=T,all.y=T) = dplyr::full_join\cr
#' @param x : Input matrix
#' @param y : Input matrix
#' @param all (default=F) : all = L is shorthand for all.x = L and all.y = L, where L is either TRUE or FALSE.
#' @param all.x (default=F) : if TRUE, then extra rows will be added to the output, \cr
#' one for each row in x that has no matching row in y. \cr
#' These rows will have NAs in those columns that are usually filled with values from y. \cr
#' The default is FALSE, so that only rows with data from both x and y are included in \cr
#' the output.
#' @param all.y (default=F) : Identical to all.x except it is option for y.
#' @param by : specifications of the columns used for merging.
#' @param by.x : specifications of the columns used for merging for x.
#' @param by.y : specifications of the columns used for merging for y.
#' @keywords merge, join, dplyr, tables, data.frame
#' @import dplyr
#' @export
#' @examples
#'tmp=expand.grid('A'=LETTERS,'B'=LETTERS,'C'=LETTERS);head(tmp)
#'tmp$key=paste0(tmp[,1],tmp[,2],tmp[,3])
#'tmp2=expand.grid('A'=LETTERS,'B'=LETTERS,'C'=LETTERS);head(tmp2)
#'tmp2$key2=sample(tmp$key,nrow(tmp2),replace=T)
#'# Basic merge
#'system.time({merge(tmp,tmp2,all=T,by.x='key',by.y='key2')})
#'# dplyr modified
#'system.time({mat.merge(tmp,tmp2,all=T,by.x='key',by.y='key2')})
#-----------------------------------------
# Merge two tables
#-----------------------------------------
mat.merge=function(x,y,all=F,all.x=F,all.y=F,by=NULL,by.x=NULL,by.y=NULL){
# Which column is key to be used for merging two tables?
if(!is.null(by.x) & !is.null(by.y)){
colnames(y)=suppressMessages(multi_sub(colnames(y),pattern = by.y,replacement = by.x,exact = T))
by=by.x
}
# Determine merge pattern all True?
# All data should be kept
if(all){
all.x=all.y=T
x1=dplyr::full_join(x,y,by=by)
}
# Only intersect should be kept
if(!all.x & !all.y){
x1=dplyr::inner_join(x,y,by=by)
}
# Only matrix X should be kept
if(all.x & !all.y){
x1=dplyr::right_join(x,y,by=by)
}
# Only matrix Y should be kept
if(!all.x & all.y){
x1=dplyr::left_join(x,y,by=by)
}
# Return matrix
x1=as.data.frame(x1,stringsAsFactors=F)
# Sort the table 1st column should key column
wh=which(colnames(x1)==by)
x1=x1[,c(wh,setdiff(1:ncol(x1),wh))]
return(x1)
}