#!/usr/bin/env bash # Stop on errors. set -ueo pipefail # Make directories in case these don't exist. mkdir -p info refs # Collect program output here. RUNLOG=runlog.txt echo "# See the $RUNLOG file for run-time messages" echo "# Run by `whoami` on `date`" > $RUNLOG # This is the transcriptome sequence. REF=refs/GRCh38.cdna.fa if [ ! -f $REF ] then URL=ftp://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz echo "# Downloading human transcriptome: $REF" curl $URL | gunzip -c > refs/tmp mv refs/tmp $REF fi # This the name of the index IDX=refs/GRCh38.cdna.fa.idx # Build the indices if necessary. if [ ! -f $IDX ] then echo "# Building kallisto index: $IDX" kallisto index -i $IDX $REF 2>> $RUNLOG fi SINGLE=info/single.csv PAIRED=info/paired.csv if [ ! -f $SINGLE ] then echo "# Missing for file $SINGLE. Did you run zika-getdata.sh?" exit fi # Two output directories for control and brain samples. DIR_OUT=resuts # Run kallisto on PE reads for SRR in $(cat info/paired.csv) do R1=reads/${SRR}_1.fastq R2=reads/${SRR}_2.fastq echo "# Running kalisto on paired end sample: $SRR" kallisto quant -i $IDX -o $SRR $R1 $R1 2>> $RUNLOG done # Run kallisto on SE reads. for SRR in $(cat info/single.csv) do R1=reads/${SRR}.fastq echo "# Running kalisto on single end sample: $SRR" kallisto quant -i $IDX -o $SRR --single -l 187 -s 70 $R1 2>> $RUNLOG done