#!/usr/bin/env bash # # This recipe clusters nCov genomes. # # Need more help? See: https://www.biostarhandbook.com # # Strict error checking. set -uex # How many sequences seqkit stats refs/nCov-genomes.fa # Cluster the sequences at 70% cd-hit -c 0.7 -i refs/nCov-genomes.fa -o out # What is the minimum, maxium and median similarity cat out.clstr | grep at | awk ' { print $5 } ' | tr -d '%' | datamash min 1 max 1 median 1