STAR alinger로 GRCh38 reference genome에 GTF는 94 version임.
# STAR alinger program path
star_path=/mnt/bigHDD/resource/ref_and_tools/tools/star_aligner/STAR-2.5.4b/source/STAR
gtf_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/Homo_sapiens.GRCh38.94.gtf
ref_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/STAR_gtf
# Get current directory
current_path=$(pwd)
echo $current_path
# Make output directory with bam files
mkdir out.bam.dir
outbam_dir=/out.bam.dir/
outbam_path=$current_path$outbam_dir
outfile_prefix=_GRCh38.84_aligned
# Get input file pairs
echo 'Provide file 1st suffix to recognize 1st fasta-files (ex : 1.fastq.gz)'
read suffix1
echo "The file 1st suffix is '$suffix1'"
echo 'Provide file 2nd suffix to recognize 2nd fasta-files (ex : 2.fastq.gz)'
read suffix2
echo "The file 2nd suffix is '$suffix2'"
#fasta file은 2개이므로 1P와 2P로 해둠. fasta.gz인 경우도 있지만 압축을 다 풀었다고 가정함.
# Generate input file list
ls | grep $suffix1 > input_file_1st_suffix_for_STAR.alinger.txt
ls | grep $suffix2 > input_file_2nd_suffix_for_STAR.alinger.txt
#cat input_file_1st_suffix_for_STAR.alinger.txt | tr --delete $suffix1 > input_file_prefix_for_STAR.alinger.txt
sed "s/$suffix1//" input_file_1st_suffix_for_STAR.alinger.txt > input_file_prefix_for_STAR.alinger.txt
# Generate input file pairs
paste -d',' input_file_1st_suffix_for_STAR.alinger.txt input_file_2nd_suffix_for_STAR.alinger.txt > input_file_pairs_for_STAR.alinger.txt
cat input_file_pairs_for_STAR.alinger.txt
echo 'Is every pair correct?(y/n)'
read yes
echo "Your decided $yes"
if [ "$yes" = "y" ] || [ "$yes" = "yes" ] || [ "$yes" = "Y" ]; then
echo "Initiating STAR for loop"
for j in $(cat input_file_pairs_for_STAR.alinger.txt)
do
echo $j
file1=$( echo $j | cut -d',' -f1)
file2=$( echo $j | cut -d',' -f2)
out_file=$( echo $file1 | sed "s/$suffix1//" )
#Do alignment
$star_path --runThreadN 12 --genomeDir $ref_path \
--sjdbGTFfile $gtf_path --twopassMode Basic \
--outSAMtype BAM SortedByCoordinate \
--outFileNamePrefix $outbam_path$out_file$outfile_prefix \
--readFilesIn $file1 $file2
done
fi