# This file contains function definitions for opening the output file and # writing lines to that file, for the script ab-ind.py def open_outfile(out_f, in_f): try: fo = open('ab-ind.txt', 'w') except IOError: print "%s%s" % ("Error: can\'t open output file ", out_f) sys.exit("\nOutput file open error\n\n") else: print "Opened the output file successfully" try: # fo.write("##ABySS Insertion-Deletion Identification file\n") # fo.write("##Input file: " + in_f + "\n\n") fo.write("\n") except IOError: print "%s%s" % ("Error: can\'t write to output file ", out_f) sys.exit("Output file write error\n\n") else: print "Successful write to output file\n\n" return fo def write_outline(qname, chro, q_start, q_end, s_start, s_end, final_query_string, final_sbjct_string, fo): # Input Parameters: # q_start and q_end are strings representing the positional values for the start and end # of the bases in the contig (query string); whether they are increasing or decreasing # determines whether the indel is +/- # s_start and s_end are the same for the sbjct string; except that they are absolute positional # values where the q values are relative and will always either start or end with value 1 out_line = "" Start_S = "" End_S = "" s_s = 0 e_s = 0 del_len = 0 q_match = False s_match = False del_plus = False del_minus = False ins_plus = False ins_minus = False s_start_i = int(s_start) s_end_i = int(s_end) s_minus = False if s_start_i > s_end_i: s_minus = True q_start_i = int(q_start) q_end_i = int(q_end) q_minus = False if q_start_i > q_end_i: q_minus = True print "%s%s" % ("Concatenated Query string:", final_query_string) indel_start_q = final_query_string.find("-") if indel_start_q < 0: q_match = False else: q_match = True indel_end_q = final_query_string.rfind("-") del_len = indel_end_q - indel_start_q + 1 print "%s%s" % ("Concatenated Sbjct string:", final_sbjct_string) indel_start_s = final_sbjct_string.find("-") if indel_start_s < 0: s_match = False else: s_match = True indel_end_s = final_sbjct_string.rfind("-") ins_len = indel_end_s - indel_start_s + 1 if indel_start_q < 0 and indel_start_s < 0: print "%s" % ("No indels this Query\n") elif indel_start_q >= 0 and indel_start_s >= 0: print "%s" % ("Data error this Query\n") out_line = qname.strip() + " has a data error (both INS and DEL are present!\n" else: out_line = chro + "\t.\t" if indel_start_q > 0: out_line = out_line + "DEL\t" print "%s%i" % ("indel_start_q:", indel_start_q) print "%s%i" % ("indel_end_q:", indel_end_q) print "%s%s" % ("Length of deletion:", del_len) if q_minus: # Deletion - del_minus = True print "-Del\n" else: # Deletion + del_plus = True print "+Del\n" else: # indel_start_s must be > 0 out_line = out_line + "INS\t" print "%s%i" % ("indel_start_s:", indel_start_s) print "%s%i" % ("indel_end_s:", indel_end_s) print "%s%s" % ("Length of insertion:", ins_len) if q_minus: # Insertion - print "-Ins\n" ins_minus = True else: # Insertion + print "+Ins\n" ins_plus = True if del_plus or del_minus: del_segment = final_sbjct_string[indel_start_q:indel_end_q+1].upper() del_seg_l = len(del_segment) out_line = out_line + "\t" + str(s_start_i + indel_start_q) + "\t" + str(s_start_i + indel_start_q + del_seg_l - 1) out_line = out_line + "\t" + "0:" + str(del_seg_l) if q_minus: out_line = out_line + "\t" + "-" else: out_line = out_line + "\t" + "+" out_line = out_line + "\t" + qname.strip() + ":" if q_minus: i_val_s = q_start_i - (indel_start_q - 1) i_val_e = q_start_i - indel_start_q out_line = out_line + str(i_val_s) + "-" + str(i_val_e) else: i_val_s = q_start_i + indel_start_q - 1 i_val_e = q_start_i + indel_start_q out_line = out_line + str(i_val_s) + "-" + str(i_val_e) out_line = out_line + "\t" + "-/" + del_segment + "\n" elif ins_plus or ins_minus: out_line = out_line + "\t" + str(s_start_i + indel_start_s - 1) + "\t" + str(s_start_i + indel_start_s) ins_segment = final_query_string[indel_start_s:indel_end_s+1].upper() ins_seg_l = len(ins_segment) out_line = out_line + "\t" + str(ins_seg_l) + ":0" if q_minus: out_line = out_line + "\t" + "-" else: out_line = out_line + "\t" + "+" out_line = out_line + "\t" + qname.strip() + ":" if q_minus: i_val_s = q_start_i - indel_start_s i_val_e = q_start_i - indel_start_s - ins_seg_l + 1 out_line = out_line + str(i_val_s) + "-" + str(i_val_e) else: i_val_s = q_start_i + indel_start_s i_val_e = q_start_i + indel_start_s + ins_seg_l - 1 out_line = out_line + str(i_val_s) + "-" + str(i_val_e) out_line = out_line + "\t" + ins_segment + "/-" + "\n" else: out_line = out_line + "\t" + End_S + "\n" fo.write(out_line)