#!/usr/bin/env bash # # This script assumes that you have downloaded the zika data already. # and you have the data and indices ready. # # Stop on any error. set -ueo pipefail # These will store the various SRR ids. RUNINFO=info/zika.csv SINGLE=info/singles.csv PAIRED=info/paired.csv # Store bam files here. mkdir -p bam # How many CPU cores on the system. CPUS=4 # The hisat index name. IDX=refs/grch38/genome # Keep track of what the aligners printed out. RUNLOG=runlog.txt echo "Run started by `whoami` on `date`" > $RUNLOG # Align the paired end samples. for SAMPLE in $(cat $PAIRED) do R1=reads/${SAMPLE}_1.fastq R2=reads/${SAMPLE}_2.fastq BAM=bam/${SAMPLE}.bam SUMMARY=bam/${SAMPLE}_summary.txt echo "*** Running Hisat2 on paired end sample: $SAMPLE" hisat2 -p $CPUS -x $IDX -1 $R1 -2 $R2 2> $RUNLOG | samtools sort > $BAM 2>> $RUNLOG samtools index $BAM done # Align the single end samples. for SAMPLE in $(cat $SINGLE) do R1=reads/${SAMPLE}.fastq BAM=bam/${SAMPLE}.bam SUMMARY=bam/${SAMPLE}_summary.txt echo "*** Running Hisat2 on single end sample: $SAMPLE" hisat2 -p $CPUS -x $IDX -U $R1 2> $RUNLOG | samtools sort > $BAM 2>> $RUNLOG samtools index $BAM done