본문 바로가기

Bioinformatics(생정보학)

STAR alignment script in cluster

728x90
반응형

#!/bin/bash

# This script is to align samsung's single cell RNAseq data using STAR

#$ -S /bin/bash

#$ -N singleRseq

#$ -cwd


#========================================

# Set up parameters

#========================================

ref_genome_dir=/home/osj118/ref_and_tools/samsung_GTF

ref_GTF=/home/osj118/ref_and_tools/samsung_GTF/ensGene.20151111.new_name.gtf

star_path=/home/osj118/ref_and_tools/star_aligner/STAR-2.5.4b/source/STAR

out_dir=/scratch/sjoh/samsung/Singlecell_fastq/aligned_bam/

in_dir=/scratch/sjoh/samsung/Singlecell_fastq/

suffix_1=_RSq.1.fq.gz

suffix_2=_RSq.2.fq.gz

#=======================================

# Get file prefix

#=======================================

ls | grep fq.gz | awk -F_RSq '{print $1}' | sort | uniq > fq_prefix.txt

# 여기서 awk뒤의 _RSq부분은 delimiter부분으로 다른것으로 바꿀 때 사용할 것.


#======================================

# Do alignment using for loop

#======================================

for input in $(cat fq_prefix.txt);

do

        echo $input

        $star_path --runThreadN 20 --twopassMode Basic --outSAMtype BAM SortedByCoordinate --readFilesCommand zcat \

        --readFilesIn $in_dir$input$suffix_1 $in_dir$input$suffix_2 \

        --sjdbGTFfile $ref_GTF \

        --genomeDir $ref_genome_dir \

        --outFileNamePrefix $out_dir$input

done

728x90
반응형