#!/usr/bin/env bash

#
# This program performs an alignment based RNA-Seq analysis.
#

# Stop on errors. Print the commands.
set -uex

# Download the reference genome.
wget -nc http://data.biostarhandbook.com/books/rnaseq/data/golden.genome.tar.gz

# Unpack the reference genome.
tar xzvf golden.genome.tar.gz

# Download the data
wget -nc http://data.biostarhandbook.com/books/rnaseq/data/golden.reads.tar.gz

# Unpack the data
tar zxvf golden.reads.tar.gz

# The indexed reference genome.
IDX=refs/genome.fa

# Build the hisat2 genome index.
hisat2-build $IDX $IDX

# Index the reference genome with samtools.
samtools faidx $IDX

# Create the root ids of the data layout.
parallel -j 1 echo {1}_{2} ::: BORED EXCITED ::: 1 2 3 > ids

# Create the BAM folder.
mkdir -p bam

# Align the FASTQ files to the reference genome.
cat ids | parallel "hisat2 -x $IDX -1 reads/{}_R1.fq -2 reads/{}_R2.fq | samtools sort > bam/{}.bam"

# Index each BAM file.
cat ids | parallel  "samtools index bam/{}.bam"

# First we turn the each BAM file into BedGraph coverage.
cat ids | parallel "bedtools genomecov -ibam  bam/{}.bam -split -bg  > bam/{}.bg"

# Then convert the BedGraph coverage into BigWig coverage.
cat ids | parallel "bedGraphToBigWig bam/{}.bg  ${IDX}.fai bam/{}.bw"

# Run featureCounts on BAM files in the right order.
featureCounts -p -a refs/features.gff -o counts.txt bam/BORED_?.bam bam/EXCITED_?.bam

# Download the edger R script.
curl http://data.biostarhandbook.com/books/rnaseq/code/edger.r > edger.r

# Download the heatmap R script.
curl http://data.biostarhandbook.com/books/rnaseq/code/heatmap.r > heatmap.r

# Perform the differential expression detection with edger.
cat counts.txt | Rscript edger.r 3x3 > results.csv

# Draw the heatmap from the results.
cat results.csv | Rscript heatmap.r > results.pdf