# Error handling. set -ux # Metadata directory. mkdir -p meta refs # # We will repeat similar operations with slight twists. # # The master metadata file for Coronaviridiae. bio taxon 11118 --metadata > meta/corona.txt # # Getting SARS2 genomes # # Filter for SARS2 cat meta/corona.txt | bio taxon --keep 2697049 --field 2 > meta/sars2.txt # Extract accession numbers. cat meta/sars2.txt | cut -f 3 > acc # Generate SARS2 full genome files. blastdbcmd -db ~/db/Betacoronavirus -entry_batch acc > refs/sars2.fa # Generate statistics seqkit stats refs/sars2.fa # # Getting coronaviruses that infect non-human hosts (let's concentrate on bats) # cat meta/corona.txt | bio taxon --keep 9397 --field 1 > meta/sarsx.txt # Extract accession numbers. cat meta/sarsx.txt | cut -f 3 > acc # Extract the genomes. blastdbcmd -db ~/db/Betacoronavirus -entry_batch acc > refs/sarsx.fa # Generate the statistics seqkit stats refs/sarsx.fa # # SARS1 that infects humans # # Filter the metdata for clades. cat meta/corona.txt | bio taxon --keep 694009 --field 2 |\ bio taxon --remove 2697049 --field 2 |\ bio taxon --remove 9397 --field 1 > meta/sars1.txt # Extract accession numbers. cat meta/sars1.txt | cut -f 3 > acc # Extract the genomes. blastdbcmd -db ~/db/Betacoronavirus -entry_batch acc > refs/sars1.fa # Generate the statistics seqkit stats refs/sars1.fa # Cleanup, remaining files. rm -rf acc