lesson1/rnaseq_analysis_on_input_file.sh

   1 #!/bin/bash
   2
   3 # Based on
   4 # https://hbctraining.github.io/Intro-to-rnaseq-hpc-O2/lessons/07_automating_workflow.html
   5 #
   6
   7 # This script takes a fastq file of RNA-Seq data, runs FastQC and outputs a counts file for it.
   8 # USAGE: sh rnaseq_analysis_on_input_file.sh <name of fastq file>
   9
  10 set -e
  11
  12 # initialize a variable with an intuitive name to store the name of the input fastq file
  13 fq=$1
  14
  15 # grab base of filename for naming outputs
  16 base=`basename $fq .subset.fq`
  17 echo "Sample name is $base"
  18
  19 # specify the number of cores to use
  20 cores=4
  21
  22 # directory with genome reference FASTA and index files + name of the gene annotation file
  23 genome=rnaseq/reference_data
  24 gtf=rnaseq/reference_data/chr1-hg19_genes.gtf
  25
  26 # make all of the output directories
  27 # The -p option means mkdir will create the whole path if it
  28 # does not exist and refrain from complaining if it does exist
  29 mkdir -p rnaseq/results/fastqc
  30 mkdir -p rnaseq/results/STAR
  31 mkdir -p rnaseq/results/counts
  32
  33 # set up output filenames and locations
  34 fastqc_out=rnaseq/results/fastqc
  35 align_out=rnaseq/results/STAR/${base}_
  36 counts_input_bam=rnaseq/results/STAR/${base}_Aligned.sortedByCoord.out.bam
  37 counts=rnaseq/results/counts/${base}_featurecounts.txt
  38
  39 echo "Processing file $fq"
  40
  41 # Run FastQC and move output to the appropriate folder
  42 fastqc $fq
  43
  44 # Run STAR
  45 STAR --runThreadN $cores --genomeDir $genome --readFilesIn $fq --outFileNamePrefix $align_out --outSAMtype BAM SortedByCoordinate --outSAMunmapped Within --outSAMattributes Standard
  46
  47 # Create BAM index
  48 samtools index $counts_input_bam
  49
  50 # Count mapped reads
  51 featureCounts -T $cores -s 2 -a $gtf -o $counts $counts_input_bam