#!/usr/bin/env bash
#
# This script simulates a sequencing run from an accession number.
#
# You cam mutate the genome genome by hand - or as it is in the script.
# Then run this tool generate alignments and visualize these alignments in IGV.
#
set -ue

# Accession number for the reference.
ACC="AF086833"

# How many reads to generate.
N=10000

# How long should each read be.
L=100

# What is the DNA fragment size.
D=500

# Error rates.
E=0.02

# How to tag the alignments.
TAG="@RG\tID:variants\tSM:${ACC}\tLB:simulation"

# Make the genome directory
mkdir -p refs

# This will be the reference.
REF=refs/REFERENCE.fa

# This is will be real genome under study
GENOME=GENOME.fa

# Results will go here.
BAM=align.bam

# This file will keep track of messages printed during the run.
echo "*** Run by `whoami` on `date`" > runlog.txt

# Obtain the reference file if it does not already exists.
if [ ! -f $REF ];
then
    echo "*** Obtain data for $ACC and save it as $REF".
    efetch -db=nuccore -format=fasta -id=$ACC | seqret -filter -sid $ACC > $REF

    # Index reference with different tools needed later.
    echo "*** Index $REF with several tools."
    bwa index $REF 2>> runlog.txt
    samtools faidx $REF

    echo "*** Mutating $REF to $GENOME"
    #
    # Mutate the $GENOME file by hand or by biosed like so
    #
    # The base at position 2000 is mutated from an A to C
    #
    cat $REF | biosed -filter -target AGGGTGGACAACAGAAGAACA -replace AGGGTGGACAACAGAAGAACT > $GENOME
else
    echo "*** Found reference in $REF"
fi

#
# Generate a simulated genome.
echo "*** Generating simulated reads from: $GENOME"
dwgsim -c 0 -N $N -e $E -d $D -1 $L -2 $L -r 0 -R 0 $GENOME data > /dev/null  2> /dev/null

# Read pair names that are the result of the simulation
R1=data.bwa.read1.fastq
R2=data.bwa.read2.fastq

#
# This is how dwgsim will name its data.
#
R1=data.bwa.read1.fastq
R2=data.bwa.read2.fastq

#
# Align the reads and generate athe BAM file.
#
echo "*** Aligning the reads into: $BAM"
bwa mem -R $TAG $REF $R1 $R2 2>> runlog.txt | samtools sort > $BAM
samtools index $BAM