# Iterates over a BAM file and prints reads that match a genotype at a location. # Works for matches and mistmatching bases only. import pysam import csv import os import sys def match(fname, chrom, pos, metadata="genbank.txt"): if os.path.isfile(metadata): lines = csv.reader(open(metadata, 'rt'), delimiter="\t") pairs = [ (p[0], p[2:]) for p in lines] store = dict(pairs) else: store = dict() # The position is zero based. pos = pos - 1 bam = pysam.AlignmentFile(fname, 'rb') group = dict() for column in bam.pileup(chrom): if column.pos == pos: for read in column.pileups: name = read.alignment.query_name base = read.alignment.query_sequence[read.query_position] group.setdefault(base, []).append(name) for base, values in group.items(): for name in values: fields = store.get(name, []) meta = "\t".join(fields) data = [name, base, meta] print("\t".join(data)) if __name__ == '__main__': if len(sys.argv)< 3: sys.exit("**** usage: %s BAM CHROM POSITION" % sys.argv[0]) fname = sys.argv[1] chrom = sys.argv[2] pos = int(sys.argv[3]) match(fname=fname, chrom=chrom, pos=pos)