4 # https://hbctraining.github.io/Intro-to-rnaseq-hpc-O2/lessons/07_automating_workflow.html
7 # This script takes a fastq file of RNA-Seq data, runs FastQC and outputs a counts file for it.
8 # USAGE: sh rnaseq_analysis_on_input_file.sh <name of fastq file>
12 # initialize a variable with an intuitive name to store the name of the input fastq file
15 # grab base of filename for naming outputs
16 base=`basename $fq .subset.fq`
17 echo "Sample name is $base"
19 # specify the number of cores to use
22 # directory with genome reference FASTA and index files + name of the gene annotation file
23 genome=rnaseq/reference_data
24 gtf=rnaseq/reference_data/chr1-hg19_genes.gtf
26 # make all of the output directories
27 # The -p option means mkdir will create the whole path if it
28 # does not exist and refrain from complaining if it does exist
29 mkdir -p rnaseq/results/fastqc
30 mkdir -p rnaseq/results/STAR
31 mkdir -p rnaseq/results/counts
33 # set up output filenames and locations
34 fastqc_out=rnaseq/results/fastqc
35 align_out=rnaseq/results/STAR/${base}_
36 counts_input_bam=rnaseq/results/STAR/${base}_Aligned.sortedByCoord.out.bam
37 counts=rnaseq/results/counts/${base}_featurecounts.txt
39 echo "Processing file $fq"
41 # Run FastQC and move output to the appropriate folder
45 STAR --runThreadN $cores --genomeDir $genome --readFilesIn $fq --outFileNamePrefix $align_out --outSAMtype BAM SortedByCoordinate --outSAMunmapped Within --outSAMattributes Standard
48 samtools index $counts_input_bam
51 featureCounts -T $cores -s 2 -a $gtf -o $counts $counts_input_bam