#!/usr/bin/env bash

# Stop on errors.
set -euo pipefail

# Collect program output here.
RUNLOG=runlog.txt

echo "Run by `whoami` on `date`" > $RUNLOG

# This is the reference.
REF_ERCC=refs/ERCC92.fa
REF_HUMAN=refs/GRCh38.chrom22.cdna.fa

if [ ! -f $REF_HUMAN ]
then
    URL=http://data.biostarhandbook.com/rnaseq/projects/griffith/GRCh38.chrom22.cdna.fa.gz
    echo "*** Downloading human transcriptome: $REF_HUMAN"
    curl -s $URL | gunzip -c > refs/tmp
    mv refs/tmp $REF_HUMAN
fi

# This the name of the index
IDX_ERCC=refs/ERCC92.idx
IDX_HUMAN=refs/GRCh38.chrom22.cdna.fa.idx

# Build the indices if necessary.
if [ ! -f $IDX_ERCC ]
then
    echo "*** Building kallisto index: $IDX_ERCC"
    kallisto index -i $IDX_ERCC  $REF_ERCC 2>> $RUNLOG
fi

if [ ! -f $IDX_HUMAN ]
then
    echo "*** Building kallisto index: $IDX_HUMAN"
    kallisto index -i $IDX_HUMAN  $REF_HUMAN 2>> $RUNLOG
fi

# Two output directories for control and brain samples.
DIR_ERCC=ercc
DIR_BRAIN=brain

# Make the output directories.
mkdir -p $DIR_ERCC $DIR_BRAIN

for SAMPLE in HBR UHR;
do
    for REPLICATE in 1 2 3;
    do
        # Build the name of the files.
        R1=reads/${SAMPLE}_${REPLICATE}_R1.fq
        R2=reads/${SAMPLE}_${REPLICATE}_R2.fq

        echo "*** Running kallisto on ${SAMPLE}_${REPLICATE} vs $IDX_ERCC"
        OUT1=$DIR_ERCC/${SAMPLE}_${REPLICATE}
        kallisto quant -i $IDX_ERCC -o $OUT1 -b 100 $R1 $R2 2>> $RUNLOG

        echo "*** Running kallisto on ${SAMPLE}_${REPLICATE} vs $IDX_HUMAN"
        OUT2=$DIR_BRAIN/${SAMPLE}_${REPLICATE}
        kallisto quant -i $IDX_HUMAN -o $OUT2 -b 100 $R1 $R2 2>> $RUNLOG
    done
done

echo "*** Created counts for ERCC control samples."
paste ercc/H*/abundance.tsv ercc/U*/abundance.tsv | cut -f 1,4,9,14,19,24,29  > ercc_counts.txt

echo "*** Created counts for the brain samples."
paste brain/H*/abundance.tsv brain/U*/abundance.tsv | cut -f 1,4,9,14,19,24,29  > brain_counts.txt

echo "*** Running the DESeq1 and producing the final result: ercc_deseq1.txt"
curl -s -O http://data.biostarhandbook.com/rnaseq/code/deseq1.r
cat ercc_counts.txt | Rscript deseq1.r 3x3 > ercc_deseq1.txt  2>> $RUNLOG

echo "*** Running the DESeq1 and producing the final result: brain_deseq1.txt"
curl -s -O http://data.biostarhandbook.com/rnaseq/code/deseq1.r
cat brain_counts.txt | Rscript deseq1.r 3x3 > brain_deseq1.txt  2>> $RUNLOG