# # An example script to prepare reference genome data. # # Stop on any error. set -uex # The directory that stores the global references. # It is meant to be used for multiple projects. DIR=~/refs # Make the DIR if it does exist. mkdir -p $DIR # The URL for the prebuild Hisat2 indices. URL1=ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grch38.tar.gz # Download the prebuild Hisat2 indices. (cd $DIR && wget -nc $URL1) # Unpack without overwriting if not necessary. # Finds the name from the url as explained in: # https://unix.stackexchange.com/questions/325490/how-to-get-last-part-of-http-link-in-bash (cd $DIR && tar xzvf ${URL1##*/}) # The ENSEMBLY annotation file. URL2=ftp://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.chr.gtf.gz # Download and unpack the GFF file. (cd $DIR && wget -nc $URL2) # Unpack the GFF file. Finds the file name from the URL. (cd $DIR && gunzip -k -f ${URL2##*/}) # The Ensembl CDNA (transcript) file. URL3=ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz # Download and unpack the CDNA file. (cd $DIR && wget -nc $URL3) # Unpack the CDNA file. (cd $DIR && gunzip -k -f ${URL3##*/})