# Stop on error set -uex # Obtain the data bio fetch NC_045512 MT019529.1 > genomes.gb # Turn the genomes into FASTA file. cat genomes.gb | bio fasta > genomes.fa # Turn the genomes into a GFF file. cat genomes.gb | bio gff --type CDS > cds.gff # Align the genomes. mafft --auto --quiet --preservecase genomes.fa > aligned.fa # Create a simpler mutations file. cat aligned.fa | bio format --diff > mutations.txt # Create a VCF file. cat aligned.fa | bio format --vcf > mutations.vcf # What overlaps with our variants? cat genomes.gb | bio table --type CDS --olap 3778,8388,8987 -fields id,gene,date # Variation in DNA. cat genomes.gb | bio fasta --id YP_009724389.1,QHU36823.1 | bio align --diff # Variation as proteins. cat genomes.gb | bio fasta --protein --id YP_009724389.1,QHU36823.1 | bio align --diff