#!/usr/bin/env python """ Usage is: bamerrors_cutoff.py -c cutoff list_of_bamerrors_output_files Writes output to bamerrors_cutoff.txt in the current directory. Output identifies columns that are >= cutoff. Format is the following 4 columns: filename, rowID, columnID, value """ import sys import os import getopt import subprocess import re from decimal import * def main(argv=None): if argv is None: argv=sys.argv outputFilename = "bamerrors_cutoff.txt" cutoff = None options, errfiles= getopt.getopt(argv[1:], "c:") for opt, arg in options: if opt in ("-c"): cutoff = Decimal(arg) if (not cutoff): print >> sys.stderr, __doc__ return findHighErrors(outputFilename, cutoff, errfiles) def findHighErrors(outputFilename, cutoff, errFiles): outfile = open(outputFilename, "w") for filename in errFiles: fp = open(filename, "r") fp.readline() # skip the header while True: line = fp.readline() if not line: break fields = line.split() rowname = fields[0] for col in range(1, len(fields) - 1): if Decimal(fields[col]) >= cutoff: outfile.write("%s\t%s\t%d\t%s\n" % ( filename, rowname, col, fields[col])) fp.close() outfile.flush() outfile.close() if __name__ == "__main__": sys.exit(main())