#
# Usage:
#
#   make refs testdata align call annotate
#
#   or
#
#   make refs alldata align call annotate
#

# Phony targets instruct make to use these as rules only.
.PHONY: refs testdata alldata align call annotate

# Accession number for reference genome.
ACC ?= NC_045512

# Accession number for the SRA run.
SRR ?= SRR12053850

# Number of compute cores.
CPU ?= 8

# Number of test reads used when running testdata.
LIMIT=100000

# GenBank reference file.
GBK=${ACC}.gbk

# Fasta reference file.
REF=refs/${ACC}.fa

# GFF reference file.
GFF=refs/${ACC}.gff

# Temporary directory for fasterq-dump.
# See also: https://www.moritzs.de/linux/thank_you_sra/
TMP=.

# All names will have this common element.
ROOT=${SRR}-${ACC}

# Alignment file.
BAM=bam/${ROOT}.bam

# Variant file.
VCF=vcf/${ROOT}.vcf.gz

# The annotated VCF file.
ANN=vcf/${ROOT}.annotated.vcf

# Read names.
R1=reads/${SRR}_1.fastq
R2=reads/${SRR}_2.fastq

# Print usage.
all:
	@echo "USAGE:"
	@echo
	@echo "  make refs testreads align call"
	@echo
	@echo "  make refs allreads align call"
	@echo
	@echo "Additional parameters:"
	@echo
	@echo    ACC=${ACC}, SRR=${SRR}, CPU=${CPU}

# A testrun with partial data.
test: refs testdata align call annotate

# A run with all data.
full: refs alldata align call annotate

# Removes all files created by operations in this file.
clean:
	rm -rf bam refs vcf reads temp data snpEff*

# Initialize the reference genome (and the snpEff database)
refs: init_snpeff
	mkdir -p refs
	bio fetch ${ACC} -format fasta > ${REF}
	bio fetch ${ACC} -format gff > ${GFF}
	bwa index ${REF}

# Download a subset of the reads.
testdata:
	mkdir -p reads
	fastq-dump -F -X ${LIMIT} --split-files -O reads ${SRR}

# Download all reads for the SRR run.
alldata:
	mkdir -p reads
	fasterq-dump -e ${CPU} -t ${TMP} -f -x -p  -O reads --split-files ${SRR}

# Run alignments for the downloaded reads.
align:
	mkdir -p bam
	# Note how we filter alignment for mapped reads only.
	bwa mem -t ${CPU} ${REF} ${R1} ${R2} | samtools view -b -F 4 | samtools sort -@ ${CPU} > ${BAM}
	samtools index ${BAM}
	samtools flagstat ${BAM}

# Perform snp calling.
call:
	mkdir -p vcf
	bcftools mpileup -O v -f ${REF} ${BAM}  | bcftools call --ploidy 1 -mv -O z -o ${VCF}
	bcftools index ${VCF}

# Initializes the snpEff database.
init_snpeff:
	# Build custom snpEff database

	# Snpeff needs the files in specific folders.
	mkdir -p data/${ACC}

	# Download the GenBank file, has to be called genes.gbk.
	bio fetch ${ACC} > data/${ACC}/genes.gbk

	# Set the snpEff config file.
	echo "${ACC}.genome : ${ACC}" > snpEff.config

	# Build the snpEff database.
	snpEff build -v ${ACC}

# Runs the snpEff tool.
annotate:
	snpEff -v ${ACC} ${VCF} > ${ANN}
	mv snpEff_*.*  vcf/