#!/bin/bash
# This script is to quantify samsung's RNAseq data
#$ -S /bin/bash
#$ -N bulkCombine
#$ -cwd
#========================================
# Set up parameters
#========================================
out_dir=/path_to_FPKM_file_dir/
suffix_1=/genes.fpkm_tracking
com_dir=/path_to_FPKM_dir_header/header_dir
#=======================================
# Get file prefix
#=======================================
ls | sort | uniq | grep -v .sh | grep -v .txt | grep -v bulkCombine | grep -v tmp > fpkm_prefix.txt # Get list of directories having genes.fpkm_tracking
cut -f1-7 $com_dir$suffix_1 | sort -rn -k1,1 > GBM_bulkRseq_validation_20180521.txt
# Make and sort the header file according to ENSG id
#======================================
# Do combinining the FPKM values by for-looping
#======================================
for input in $(cat fpkm_prefix.txt);
do
echo $input
cut -f1 GBM_bulkRseq_validation_20180521.txt > tmp1
cut -f1 $out_dir$input$suffix_1 | sort -rn -k 1,1 | cut -f1 > tmp2
diff tmp1 tmp2 # Check whether sorting was properly conducted or not.
cat $out_dir$input$suffix_1 | sort -rn -k 1,1 | cut -f10 | sed "s/FPKM/$input/" > tmp # Replace FPKM into sample name
paste GBM_bulkRseq_validation_20180521.txt tmp > tmp2 # Column binding
mv tmp2 GBM_bulkRseq_validation_20180521.txt
done
'Bioinformatics(생정보학)' 카테고리의 다른 글
Cancer subclone calculation (0) | 2018.10.02 |
---|---|
protein localization관련 db (0) | 2018.06.22 |
STAR alignment script in cluster (0) | 2018.05.12 |
FPKM to TPM conversion (0) | 2018.05.01 |
bcftools, VCF 파일 다루기 (0) | 2018.04.23 |