From 3344791d2a684bc6fa6f1aa64e97c94d8d4522bb Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 25 Jan 2021 17:29:10 -0500 Subject: [PATCH] Paste content of script into lesson1 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- lesson1/lesson1.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/lesson1/lesson1.md b/lesson1/lesson1.md index cf10e40..8677c10 100644 --- a/lesson1/lesson1.md +++ b/lesson1/lesson1.md @@ -25,6 +25,62 @@ Next, import bio-cwl-tools with this command: git submodule add https://github.com/common-workflow-library/bio-cwl-tools.git ``` +## The shell script + +``` +#!/bin/bash + +# Based on +# https://hbctraining.github.io/Intro-to-rnaseq-hpc-O2/lessons/07_automating_workflow.html +# + +# This script takes a fastq file of RNA-Seq data, runs FastQC and outputs a counts file for it. +# USAGE: sh rnaseq_analysis_on_input_file.sh + +set -e + +# initialize a variable with an intuitive name to store the name of the input fastq file +fq=$1 + +# grab base of filename for naming outputs +base=`basename $fq .subset.fq` +echo "Sample name is $base" + +# specify the number of cores to use +cores=4 + +# directory with genome reference FASTA and index files + name of the gene annotation file +genome=rnaseq/reference_data +gtf=rnaseq/reference_data/chr1-hg19_genes.gtf + +# make all of the output directories +# The -p option means mkdir will create the whole path if it +# does not exist and refrain from complaining if it does exist +mkdir -p rnaseq/results/fastqc +mkdir -p rnaseq/results/STAR +mkdir -p rnaseq/results/counts + +# set up output filenames and locations +fastqc_out=rnaseq/results/fastqc +align_out=rnaseq/results/STAR/${base}_ +counts_input_bam=rnaseq/results/STAR/${base}_Aligned.sortedByCoord.out.bam +counts=rnaseq/results/counts/${base}_featurecounts.txt + +echo "Processing file $fq" + +# Run FastQC and move output to the appropriate folder +fastqc $fq + +# Run STAR +STAR --runThreadN $cores --genomeDir $genome --readFilesIn $fq --outFileNamePrefix $align_out --outSAMtype BAM SortedByCoordinate --outSAMunmapped Within --outSAMattributes Standard + +# Create BAM index +samtools index $counts_input_bam + +# Count mapped reads +featureCounts -T $cores -s 2 -a $gtf -o $counts $counts_input_bam +``` + ## Writing the workflow ### 1. File header -- 2.30.2