# # Usage: # # make refs testdata align call annotate # # or # # make refs alldata align call annotate # # Phony targets instruct make to use these as rules only. .PHONY: refs testdata alldata align call annotate # Accession number for reference genome. ACC ?= NC_045512 # Accession number for the SRA run. SRR ?= SRR12053850 # Number of compute cores. CPU ?= 8 # Number of test reads used when running testdata. LIMIT=100000 # GenBank reference file. GBK=${ACC}.gbk # Fasta reference file. REF=refs/${ACC}.fa # GFF reference file. GFF=refs/${ACC}.gff # Temporary directory for fasterq-dump. # See also: https://www.moritzs.de/linux/thank_you_sra/ TMP=. # All names will have this common element. ROOT=${SRR}-${ACC} # Alignment file. BAM=bam/${ROOT}.bam # Variant file. VCF=vcf/${ROOT}.vcf.gz # The annotated VCF file. ANN=vcf/${ROOT}.annotated.vcf # Read names. R1=reads/${SRR}_1.fastq R2=reads/${SRR}_2.fastq # Print usage. all: @echo "USAGE:" @echo @echo " make refs testreads align call" @echo @echo " make refs allreads align call" @echo @echo "Additional parameters:" @echo @echo ACC=${ACC}, SRR=${SRR}, CPU=${CPU} # A testrun with partial data. test: refs testdata align call annotate # A run with all data. full: refs alldata align call annotate # Removes all files created by operations in this file. clean: rm -rf bam refs vcf reads temp data snpEff* # Initialize the reference genome (and the snpEff database) refs: init_snpeff mkdir -p refs bio fetch ${ACC} -format fasta > ${REF} bio fetch ${ACC} -format gff > ${GFF} bwa index ${REF} # Download a subset of the reads. testdata: mkdir -p reads fastq-dump -F -X ${LIMIT} --split-files -O reads ${SRR} # Download all reads for the SRR run. alldata: mkdir -p reads fasterq-dump -e ${CPU} -t ${TMP} -f -x -p -O reads --split-files ${SRR} # Run alignments for the downloaded reads. align: mkdir -p bam # Note how we filter alignment for mapped reads only. bwa mem -t ${CPU} ${REF} ${R1} ${R2} | samtools view -b -F 4 | samtools sort -@ ${CPU} > ${BAM} samtools index ${BAM} samtools flagstat ${BAM} # Perform snp calling. call: mkdir -p vcf bcftools mpileup -O v -f ${REF} ${BAM} | bcftools call --ploidy 1 -mv -O z -o ${VCF} bcftools index ${VCF} # Initializes the snpEff database. init_snpeff: # Build custom snpEff database # Snpeff needs the files in specific folders. mkdir -p data/${ACC} # Download the GenBank file, has to be called genes.gbk. bio fetch ${ACC} > data/${ACC}/genes.gbk # Set the snpEff config file. echo "${ACC}.genome : ${ACC}" > snpEff.config # Build the snpEff database. snpEff build -v ${ACC} # Runs the snpEff tool. annotate: snpEff -v ${ACC} ${VCF} > ${ANN} mv snpEff_*.* vcf/