#!/usr/bin/env bash

#
# This program performs a classification based RNA-Seq analysis.
#
# Details at: http://www.biostarhandbook.com
#

# Stop on errors. Print the commands.
set -uex

# Download the reference genome.
wget -nc http://data.biostarhandbook.com/books/rnaseq/data/golden.genome.tar.gz

# Unpack the reference genome.
tar xzvf golden.genome.tar.gz

# Download the data
wget -nc http://data.biostarhandbook.com/books/rnaseq/data/golden.reads.tar.gz

# Unpack the data
tar zxvf golden.reads.tar.gz

# Set the name for the reference.
REF=refs/transcripts.fa

# Set the name of the index.
IDX=refs/transcript.idx

# Index the genome with kallisto.
kallisto index -i $IDX $REF

# Index the reference genome with samtools
samtools faidx $REF

# Let's make gnu parallel nagging go away.
echo 'will cite' | parallel --citation 2> /dev/null

# Create the root ids of the data layout.
parallel -j 1 echo {1}_{2} ::: BORED EXCITED ::: 1 2 3 > ids

# Make a directory for the results
mkdir -p output

# Run Kallisto to classify the reads.
cat ids | parallel kallisto quant -i $IDX -o output/{} reads/{}_R1.fq reads/{}_R2.fq

# Download the custom script to combine kallisto outputs.
curl http://data.biostarhandbook.com/books/rnaseq/code/combine.py > combine.py

# Combine the outputs created by kallisto.
cat ids | python combine.py output > counts.txt

# Download the edger R script.
curl http://data.biostarhandbook.com/books/rnaseq/code/edger.r > edger.r

# Perform the differential expression detection with edger.
cat counts.txt | Rscript edger.r 3x3 > results.csv

# Draw the heatmap from the results.
cat results.csv | Rscript heatmap.r > results.pdf