#!/usr/bin/env bash # Stop on errors. set -euo pipefail # Collect program output here. RUNLOG=runlog.txt echo "Run by `whoami` on `date`" > $RUNLOG # This is the reference. REF_ERCC=refs/ERCC92.fa REF_HUMAN=refs/GRCh38.chrom22.cdna.fa if [ ! -f $REF_HUMAN ] then URL=http://data.biostarhandbook.com/rnaseq/projects/griffith/GRCh38.chrom22.cdna.fa.gz echo "*** Downloading human transcriptome: $REF_HUMAN" curl -s $URL | gunzip -c > refs/tmp mv refs/tmp $REF_HUMAN fi # This the name of the index IDX_ERCC=refs/ERCC92.idx IDX_HUMAN=refs/GRCh38.chrom22.cdna.fa.idx # Build the indices if necessary. if [ ! -f $IDX_ERCC ] then echo "*** Building kallisto index: $IDX_ERCC" kallisto index -i $IDX_ERCC $REF_ERCC 2>> $RUNLOG fi if [ ! -f $IDX_HUMAN ] then echo "*** Building kallisto index: $IDX_HUMAN" kallisto index -i $IDX_HUMAN $REF_HUMAN 2>> $RUNLOG fi # Two output directories for control and brain samples. DIR_ERCC=ercc DIR_BRAIN=brain # Make the output directories. mkdir -p $DIR_ERCC $DIR_BRAIN for SAMPLE in HBR UHR; do for REPLICATE in 1 2 3; do # Build the name of the files. R1=reads/${SAMPLE}_${REPLICATE}_R1.fq R2=reads/${SAMPLE}_${REPLICATE}_R2.fq echo "*** Running kallisto on ${SAMPLE}_${REPLICATE} vs $IDX_ERCC" OUT1=$DIR_ERCC/${SAMPLE}_${REPLICATE} kallisto quant -i $IDX_ERCC -o $OUT1 -b 100 $R1 $R2 2>> $RUNLOG echo "*** Running kallisto on ${SAMPLE}_${REPLICATE} vs $IDX_HUMAN" OUT2=$DIR_BRAIN/${SAMPLE}_${REPLICATE} kallisto quant -i $IDX_HUMAN -o $OUT2 -b 100 $R1 $R2 2>> $RUNLOG done done echo "*** Created counts for ERCC control samples." paste ercc/H*/abundance.tsv ercc/U*/abundance.tsv | cut -f 1,4,9,14,19,24,29 > ercc_counts.txt echo "*** Created counts for the brain samples." paste brain/H*/abundance.tsv brain/U*/abundance.tsv | cut -f 1,4,9,14,19,24,29 > brain_counts.txt echo "*** Running the DESeq1 and producing the final result: ercc_deseq1.txt" curl -s -O http://data.biostarhandbook.com/rnaseq/code/deseq1.r cat ercc_counts.txt | Rscript deseq1.r 3x3 > ercc_deseq1.txt 2>> $RUNLOG echo "*** Running the DESeq1 and producing the final result: brain_deseq1.txt" curl -s -O http://data.biostarhandbook.com/rnaseq/code/deseq1.r cat brain_counts.txt | Rscript deseq1.r 3x3 > brain_deseq1.txt 2>> $RUNLOG