본문 바로가기

카테고리 없음

star alinger for loop (star38.sh)

728x90
반응형

STAR alinger로 GRCh38 reference genome에 GTF는 94 version임.


# STAR alinger program path

star_path=/mnt/bigHDD/resource/ref_and_tools/tools/star_aligner/STAR-2.5.4b/source/STAR

gtf_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/Homo_sapiens.GRCh38.94.gtf

ref_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/STAR_gtf


# Get current directory

current_path=$(pwd)

echo $current_path


# Make output directory with bam files

mkdir out.bam.dir

outbam_dir=/out.bam.dir/

outbam_path=$current_path$outbam_dir

outfile_prefix=_GRCh38.84_aligned


# Get input file pairs

echo 'Provide file 1st suffix to recognize 1st fasta-files (ex : 1.fastq.gz)'

read suffix1

echo "The file 1st suffix is '$suffix1'"


echo 'Provide file 2nd suffix to recognize 2nd fasta-files (ex : 2.fastq.gz)'

read suffix2

echo "The file 2nd suffix is '$suffix2'"


#fasta file은 2개이므로 1P와 2P로 해둠. fasta.gz인 경우도 있지만 압축을 다 풀었다고 가정함.


# Generate input file list

ls | grep $suffix1 > input_file_1st_suffix_for_STAR.alinger.txt

ls | grep $suffix2 > input_file_2nd_suffix_for_STAR.alinger.txt

#cat input_file_1st_suffix_for_STAR.alinger.txt | tr --delete $suffix1 > input_file_prefix_for_STAR.alinger.txt

sed "s/$suffix1//" input_file_1st_suffix_for_STAR.alinger.txt > input_file_prefix_for_STAR.alinger.txt


# Generate input file pairs

paste -d',' input_file_1st_suffix_for_STAR.alinger.txt input_file_2nd_suffix_for_STAR.alinger.txt > input_file_pairs_for_STAR.alinger.txt

cat input_file_pairs_for_STAR.alinger.txt

echo 'Is every pair correct?(y/n)'

read yes

echo "Your decided $yes"


if [ "$yes" = "y" ] || [ "$yes" = "yes" ] || [ "$yes" = "Y" ]; then

        echo "Initiating STAR for loop"

        for j in $(cat input_file_pairs_for_STAR.alinger.txt)

        do

                echo $j

                file1=$( echo $j | cut -d',' -f1)

                file2=$( echo $j | cut -d',' -f2)

                out_file=$( echo $file1 | sed "s/$suffix1//" )

                #Do alignment

                $star_path --runThreadN 12 --genomeDir $ref_path \

                        --sjdbGTFfile $gtf_path --twopassMode Basic \

                        --outSAMtype BAM SortedByCoordinate \

                        --outFileNamePrefix $outbam_path$out_file$outfile_prefix \

                        --readFilesIn $file1 $file2

        done

fi



728x90
반응형