#!/usr/bin/env bash

# Stop on errors.
set -ueo pipefail

# Make directories in case these don't exist.
mkdir -p info refs

# Collect program output here.
RUNLOG=runlog.txt
echo "# See the $RUNLOG file for run-time messages"
echo "# Run by `whoami` on `date`" > $RUNLOG

# This is the transcriptome sequence.
REF=refs/GRCh38.cdna.fa

if [ ! -f $REF ]
then
    URL=ftp://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
    echo "# Downloading human transcriptome: $REF"
    curl $URL | gunzip -c > refs/tmp
    mv refs/tmp $REF
fi

# This the name of the index
IDX=refs/GRCh38.cdna.fa.idx

# Build the indices if necessary.
if [ ! -f $IDX ]
then
    echo "# Building kallisto index: $IDX"
    kallisto index -i $IDX  $REF 2>> $RUNLOG
fi


SINGLE=info/single.csv
PAIRED=info/paired.csv

if [ ! -f $SINGLE ]
then
    echo "# Missing for file  $SINGLE. Did you run zika-getdata.sh?"
    exit
fi

# Two output directories for control and brain samples.
DIR_OUT=resuts

# Run kallisto on PE reads
for SRR in $(cat info/paired.csv)
do
    R1=reads/${SRR}_1.fastq
    R2=reads/${SRR}_2.fastq
    echo "# Running kalisto on paired end sample: $SRR"
    kallisto quant -i $IDX -o $SRR  $R1 $R1 2>> $RUNLOG
done

# Run kallisto on SE reads.
for SRR in $(cat info/single.csv)
do
    R1=reads/${SRR}.fastq
    echo "# Running kalisto on single end sample: $SRR"
    kallisto quant -i $IDX -o $SRR --single -l 187 -s 70 $R1 2>> $RUNLOG
done