#!/usr/bin/env bash # # This recipe generate genomic variation information for the Corona virus. # # Need more help? See: https://www.biostarhandbook.com # # Stop on any error. set -uex # Reference accession number. ACC=NC_045512 # The reference genome. REF=refs/$ACC.fa # The query file. QUERY=refs/nCov-genomes.fa # Alignment file ALIGNED=bam/align.bam # Genotype file GENOTYPE=vcf/genotypes.vcf # Variant file. VARIANTS=vcf/variants.vcf # Final report/ REPORT=vcf/report.txt # Make a directory for the BAM files. mkdir -p bam # Make a directory for the VCF files. mkdir -p vcf # Align genomes. minimap2 -a $REF $QUERY | samtools sort > $ALIGNED # Index the aligment samtools index $ALIGNED # Compute the genotypes from the alignment file with pileup. bcftools mpileup -B -Ovu -f $REF $ALIGNED > $GENOTYPE # Call the variants from the genotypes. bcftools call -vc -Ov $GENOTYPE > $VARIANTS # Turn the variants into a text file. cat $VARIANTS | bcftools query -f '%CHROM %POS %REF %ALT\n' > variants.txt # Report the distribution of various mutations. bio genotype -b bam/align.bam -c NC_045512 -p 8782,28144