# Stop on any error. set -uex # Reference genome accession number. ACC=AF086833 # The name of the output BAM file. BAM=simulation.bam # Directory for the reference files. REF=refs # Make the reference directory. mkdir -p $REF # Reference genome file name. REFERENCE=$REF/${ACC}.fa # The "real" genome that will be modified. GENOME=genome.fa # Fetch the genome from the accession. You may comment this out on second runs. efetch -db nucleotide -id $ACC -format fasta > $REFERENCE # Index the genome with bwa. You may comment this out on second runs. bwa index $REFERENCE # Initially the real genome is the same as the reference. # Must comment out this line in second runs and modify the genome yourself. #efetch -db nucleotide -id $ACC -format fasta > $GENOME # Read names produced by the read simulator. R1=R1.fq R2=R2.fq # Run the simulation. Note how it runs on the real genome, not on the reference. wgsim -e 0 -r 0 -N 2000 $GENOME $R1 $R2 # Align in paired end mode to the reference. bwa mem $REFERENCE $R1 $R2 | samtools sort > $BAM # Index the resulting bam file. samtools index $BAM