#!/usr/bin/env bash set -ueo pipefail # # Cue the music: https://www.youtube.com/watch?v=JtyByefOvgQ # # # Edit (Sept 2015): due to an unreasonable slowness of ncbi the script now requires # that you fetch the runinfo.txt fetch once before starting the script # # It used to work with the runinfo fetch but the command is now unreasonably slow. # It should not really be like that, it is a simple query yet today it takes almost 30 seconsd. # # Basically you need to run this command before running the mission-impossible.sh # # esearch -db sra -query PRJNA257197 | efetch -format runinfo > runinfo.txt # if [ ! -f runinfo.txt ] then echo '' echo '*** Missing runinfo. Run this first' echo '' echo 'esearch -db sra -query PRJNA257197 | efetch -format runinfo > runinfo.txt' echo '' exit fi VCF=ebola-alignment.vcf ANN=ebola-annotated.vcf echo echo "**********************************************************" echo "** Your mission - should you choose to accept it - **" echo "** is to reproduce the SNP results of the Ebola Science **" echo "** paper in less than a minute. **" echo "**********************************************************" echo "" sleep 2 echo '***************************************' echo '** Somebody has set us up the BOMB! **' echo '** All your base are belong to us... **' echo '***************************************' echo "" sleep 1 # Set up directories mkdir -p sra bam refs echo "********************************************" echo "*** Accessing the NCBI remote firewall... **" echo "********************************************" echo "" sleep 1 READNUM=25000 # Create the reference genome ACC=KJ660346 REF=refs/$ACC.fa efetch -db nucleotide -id $ACC -format fasta | seqret -filter -sid $ACC > $REF bwa index $REF 2> /dev/null samtools faidx $REF echo "*******************************" echo "*** Hacking the mainframe... **" echo "*******************************" echo "" sleep 1 # #esearch -db sra -query PRJNA257197 | efetch -format runinfo > ~/refs/runinfo.txt # cat runinfo.txt | grep "04-14" | cut -f 1 -d ',' | grep SRR | head -15 > ids.txt echo "*****************************" echo "*** Downloading secrets... **" echo "*****************************" echo "" cat ids.txt | parallel fastq-dump -X $READNUM -O sra --split-files {} 1> /dev/null sleep 1 echo "*****************************" echo "*** Decoding secrets... **" echo "*****************************" echo "" cat ids.txt | parallel "bwa mem -R '@RG\tID:{}\tSM:{}\tLB:{}' $REF sra/{}_1.fastq sra/{}_2.fastq 2>/dev/null | samtools sort > bam/{}.bam" ls bam/*.bam | parallel samtools index {} sleep 1 echo "******************************" echo -e "\033[5m** DISARM THE EXPLOSIVES... **\033[0m" echo "******************************" sleep 1 echo "" printf "*** CUT THE \e[1mRED\e[m WIRE ..." sleep 2 echo "" printf "*** THE \e[1mOTHER RED\e[m WIRE ..." sleep 2 echo "" printf "*** \e[1mSNP\e[m ***\n\n" sleep 2 freebayes -p 1 -f $REF bam/*.bam > $VCF echo "*****************************" echo "*** Solving all secrets... **" echo "*****************************" echo "" snpEff -v ebola_zaire $VCF 2>> runlog.txt > $ANN echo "***************************************" echo "** Got it! Now let's get outta here! **" echo "***************************************" sleep 2 echo "" echo "*** WARNING! The data will self destruct in one minute! ***" echo ""