본문 바로가기

카테고리 없음

STAR aligner자동화 스크립트

728x90
반응형

#!/bin/bash
echo 'This script aligns single cell RNAseq fastq files to GRCH38 reference genome'

# STAR alinger program path

star_solo38.sh
0.00MB

star_path=/mnt/bigHDD/resource/ref_and_tools/tools/star_solo/STAR-2.7.9a/source/STAR
gtf_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/Homo_sapiens.GRCh38.94.gtf
ref_path=/mnt/bigHDD/resource/ref_and_tools/reference/human/ensembl/GRCh38/GTF_files/STAR_gtf/2.7.9a
barcode_path=/mnt/bigHDD/resource/ref_and_tools/tools/star_solo/barcodes/3M-february-2018.txt

# bbmap program path to cut adaptors
# bbmap_setting, adaptor cutting
bbmap_path=/mnt/bigHDD/resource/ref_and_tools/tools/bbmap/bbduk.sh
adaptor_seq_path=/mnt/bigHDD/resource/ref_and_tools/tools/bbmap/resources/adapters.fa
echo "You need to see whether bbmap's adaptor.fa has ur adaptor sequence"

# Get current directory
current_path=$(pwd)
echo $current_path

# Out-path for adaptor cutted fastq files.
#mkdir 1.adaptor_cutted_fastq
#outfq_dir=/1.adaptor_cutted_fastq/
#outfq_path=$current_path$outfq_dir
#outfq_suffix=.cutted.fq.gz

# Make output directory with bam files
mkdir out.bam.dir
outbam_dir=/out.bam.dir/
outbam_path=$current_path$outbam_dir
outfile_prefix=_GRCh38.84_
# Get input file pairs
echo 'Provide suffix of the 1st fastq.gz file to bring 1st fasta-files (ex : R1.fastq.gz)'
echo 'Standard 10X runs have cDNA as Read and barcode as Read1.'
read suffix1
echo "The suffix of the 1st file (barcode) is '$suffix1'"
echo '#######################################'

echo 'Provide suffix of the 2nd fastq.gz file to bring 2nd fasta-files (ex : R2.fastq.gz)'
read suffix2
echo "The suffix of the 2nd file (cDNA sequence) is '$suffix2'"
echo '#######################################'

echo 'Provide suffix of the lane fastq.file to recognize lane files (ex : I1.fastq.gz)'
read barcode
echo "The suffix of the lane info (illumina lane info) fastq.file is '$barcode'"
echo '#######################################'

# Generate input file list
ls | grep $suffix1 > input_file_1st_suffix_for_STAR_SOLO.aligner.txt
ls | grep $suffix2 > input_file_2nd_suffix_for_STAR_SOLO.aligner.txt
ls | grep $barcode > input_file_barcode_suffix_for_STAR_SOLO.aligner.txt
#cat input_file_1st_suffix_for_STAR.alinger.txt | tr --delete $suffix1 > input_file_prefix_for_STAR.alinger.txt
#sed "s/$suffix1//" input_file_1st_suffix_for_STAR.alinger.txt > input_file_prefix_for_STAR.alinger.txt

# Generate input file pairs
paste -d',' input_file_1st_suffix_for_STAR_SOLO.aligner.txt input_file_2nd_suffix_for_STAR_SOLO.aligner.txt input_file_barcode_suffix_for_STAR_SOLO.aligner.txt > input_file_pairs_for_STAR_SOLO.aligner.txt

cat input_file_pairs_for_STAR_SOLO.aligner.txt

echo 'Is every pair correct [fastq1,fastq2,lane]?(y/n)'
read yes
echo "Your decided $yes"

if [ "$yes" = "y" ] || [ "$yes" = "yes" ] || [ "$yes" = "Y" ]; then
echo "Initiating STAR solo for loop"
for j in $(cat input_file_pairs_for_STAR_SOLO.aligner.txt)
do
# Star alinger alignment process
echo $j
file1=$( echo $j | cut -d',' -f1) #Fastq file 1, Barcode file in standard
file2=$( echo $j | cut -d',' -f2) #Fastq file 2
# bfile=$( echo $j | cut -d',' -f2) #Barcode file
out_file=$( echo $file1 | sed "s/$suffix1//" )
#Do alignment
$star_path --runThreadN 12 --genomeDir $ref_path \
--sjdbGTFfile $gtf_path \ #--twopassMode Basic \
--soloType CB_UMI_Simple \
--soloUMIlen 12 \
--soloCBwhitelist $barcode_path \
--outSAMtype BAM SortedByCoordinate \
--outFileNamePrefix $outbam_path$out_file$outfile_prefix \
--readFilesCommand zcat \
--readFilesIn $file2 $file1 \
--quantMode GeneCounts
done
fi

#===============================================================
#===============================================================
# RNA quantification using cufflinks
#===============================================================

# Make output directory for RNA quant files
#mkdir cufflinks_quant

# Get input bam files
#ls $outbam_path | grep sortedByCoord.out.bam > $outbam_path$(echo input_file_bam_files.txt)

# Made by Sejin Oh
# Grammar check by Minjee Kim

728x90
반응형