#!/usr/bin/env python
"""
"""
import sys
import os
import re
import string
import subprocess
import tempfile
import getopt
import fastaExtract
from datetime import datetime
import time

class DataException(Exception) : 
    def __init__(self, msg):
        self.message = msg
    def __str__(self):
        return self.message


def readAtacHeader(file):
    """
        Seek to beginning of file, skip lines that don't start with "/".
        Create dictionary of name/val pairs from lines starting with "/". 
        Return dictionary and leave file positioned to read first data row.
    """
    retval = {}
    startOfLine = 0
    file.seek(0)
    while True:
        startOfLine = file.tell()
        line = file.readline()
        if not line:
            break
        line = line.strip()
        if line[0] != "/":
            if len(retval) == 0:
                # Skip the comments at head of file
                continue
            else:
                # Done with header, put the line back so we're positioned at start of first data row.
                file.seek(startOfLine)
                break
        # Store the header rows
        pair = line.split("=")
        key = pair[0][1:]
        value = pair[1] 
        retval[key] = value
    return retval


class AtacRow:
    def __init__(   self, row):
        global refFile, contigFile

        self.fields = row.split()
        self.row = row
        self.rowClass = self.fields[0]
        self.matchType = self.fields[1]
        self.matchIndex = self.fields[2]
        self.parentIndex = self.fields[3]
        self.contigID = self.fields[4]
        self.cMatchStart = int(self.fields[5])
        self.cMatchLen = int(self.fields[6])
        self.cDir = int(self.fields[7])
        self.refID = self.fields[8]
        self.rMatchStart = int(self.fields[9])
        self.rMatchLen = int(self.fields[10])
        self.rDir = int(self.fields[11])

        self.cMatchEnd = self.cMatchStart + self.cMatchLen - 1
        self.rMatchEnd = self.rMatchStart + self.rMatchLen - 1

        self.cMatchSeq = contigFile.getSequence(self.contigID, self.cMatchStart, self.cMatchLen)
        self.rMatchSeq = refFile.getSequence(self.refID, self.rMatchStart, self.rMatchLen)
        if self.rDir == -1:
            self.rMatchSeq = fastaExtract.reverse_complement(self.rMatchSeq)

    def __str__(self):
        return self.row

class AtacPair:
    def __init__(   self, row1, row2):
        global refFile, contigFile

        if (row1.rDir != row2.rDir):
            raise DataException("%s and %s have different reference directions" % (row1, row2));
        if (row1.cDir != row2.cDir) or (row2.cDir != 1):
            raise DataException("%s and %s have unexpected contig directions" % (row1, row2));
        if (row1.refID != row2.refID):
            raise DataException("%s and %s have different refIDs" % (row1, row2));

        self.row1 = row1
        self.row2 = row2
        self.cGap = (row2.cMatchStart - row1.cMatchEnd) - 1
        if (row1.rDir == 1):
            self.rGap = (row2.rMatchStart - row1.rMatchEnd) - 1
        else: 
            self.rGap = (row1.rMatchStart - row2.rMatchEnd) - 1

        if (self.cGap < 0):
            raise DataException("cGap is negative")
        if (self.rGap < 0):
            raise DataException("rGap is negative")
        if (self.cGap > 1000):
            raise DataException("cGap is %d" % self.cGap)
        if (self.rGap > 1000):
            raise DataException("rGap is %d" % self.rGap)

        self.cGapStart = row1.cMatchEnd + 1
        self.cGapEnd = row2.cMatchStart - 1
        self.cEndLowerMatch = row1.cMatchEnd
        self.cStartUpperMatch = row2.cMatchStart
        self.cGapSeq = contigFile.getSequence(row1.contigID, self.cGapStart, self.cGap)

        if (row1.rDir == 1):
            self.rGapStart = row1.rMatchEnd + 1
            self.rGapEnd = row2.rMatchStart - 1
            self.rEndLowerMatch = row1.rMatchEnd
            self.rStartUpperMatch = row2.rMatchStart
            self.rGapSeq = refFile.getSequence(row1.refID, self.rGapStart, self.rGap)
        else:
            self.rGapStart = row2.rMatchEnd + 1 
            self.rGapEnd = row1.rMatchStart - 1 
            self.rEndLowerMatch = row2.rMatchEnd
            self.rStartUpperMatch = row1.rMatchStart
            self.rGapSeq = fastaExtract.reverse_complement(refFile.getSequence(row1.refID, self.rGapStart, self.rGap))

        # If we correct a BRI or BRD case we store the new pair here.
        self.correctedPair = None 

        # If True, indicates that this pair is not original; it's a correction.  
        self.isCorrected = False


"""
    Starting at current offset in the file, read groups of rows that have the same contig ID and that are of type "M u".
    Leave the file position ready to read the first row with a different contig ID.
    Returns a list containing one or more atac rows.  Returns an empty list on EOF.
"""
def getFromAtac(atacFile):
    atacRows = []
    contig = prevContig = None

    while True:
        pos = atacFile.tell()
        line = atacFile.readline()

        # eof
        if not line:
            break
           
        line = line.strip()
        row = line.split()

        # skip lines that aren't the right format
        if not ((len(row) > 4) and (row[0] == 'M' and row[1] == 'u')):
            continue

        contig = row[4]
        if (prevContig):
            if prevContig == contig:
                atacRows.append(AtacRow(line))
            else:
                atacFile.seek(pos)
                break
        else:
            atacRows.append(AtacRow(line))
    
        prevContig = contig
            
    if len(atacRows):
        contig = atacRows[0].contigID
    return atacRows, contig


def printAligned(cStr, rStr, oStr, rFirstPos, rLastPos):

    print formatString(cStr)
    aList = [] 
    for i in range(0, len(cStr)):
        if ((cStr[i] == "-") or (rStr[i] == "-") or (cStr[i] != rStr[i])):
            aList.append(" ")
        else:
            aList.append("|")
    print formatString("".join(aList))
    print formatString(rStr)
    print "%d -> %d" % (rFirstPos, rLastPos) 
    print
    print oStr

def formatString(str):
    retval = ""
    for i in range(0, len(str)):
        if i and ((i % 10) == 0):
            retval += " "
        retval += str[i]
    return retval

def correctBR0(pair):
    print("Original rgap=%d, original cgap=%d" % (pair.rGap, pair.cGap))

    len = pair.rGap
    mismatches = 0

    # Find out how many bps of cgap and rgap are different
    for i in range(0, len):
        if pair.rGapSeq[i] != pair.cGapSeq[i]:
            mismatches += 1
    
    # If there are any mismatches, we can't correct the indel, but if cgap = rgap, we log some info.
    if not mismatches:
        percentage = 0
    else:
        percentage = (float(mismatches) / float(pair.rGap)) * 100.0 
    print "BR0:%s\t%d\t%d\t%.2f" % (pair.row1.contigID, pair.rGap, mismatches, percentage)
    if not mismatches:
        return True
    return False

"""
             atac              atac
             row1              row2
             (f1)              (f2)

    contig: 0-----10 g g g 14-------n
    ref:    0-----10 g g - 14-------n

    Extend the first match (f1) on both contig and reference.
"""
def justifyPlusRight(pair, f1, f2, smallerGapLen):
    # Add to the length of the contig
    f1[6] = str(int(f1[6]) + smallerGapLen)
    # Add to the length of the ref 
    f1[10] = str(int(f1[10]) + smallerGapLen)

    pair.correctedPair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
    pair.correctedPair.isCorrected = True
    return "right"

"""
             atac              atac
             row1              row2
             (f1)              (f2)

    contig: 0-----10 g g g 14-------n
    ref:    0-----10 - g g 14-------n

    Extend the second match (f2) back toward the left in the
    diagram above, on contig and reference
"""
def justifyPlusLeft(pair, f1, f2, smallerGapLen):
    # adust cmatchstart and cmatchlen
    f2[5] = str(int(f2[5]) - smallerGapLen)
    f2[6] = str(int(f2[6]) + smallerGapLen)
    # adjust rmatchstart and rmatchlen
    f2[9] = str(int(f2[9]) - smallerGapLen)
    f2[10] = str(int(f2[10]) + smallerGapLen)

    pair.correctedPair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
    pair.correctedPair.isCorrected = True
    return "left"

"""
             atac              atac
             row1              row2
             (f1)              (f2)

    contig: 0-----10 g g g 14-------n
    ref:    n-----14 - g g 10-------0

    Extend the second match (f2) back toward the left 
"""
def justifyMinusRight(pair, f1, f2, smallerGapLen):
    # adjust cmatch start and len
    f2[5] = str(int(f2[5]) - smallerGapLen)
    f2[6] = str(int(f2[6]) + smallerGapLen)
    # adjust rmatchLen (e.g. instead of being 11 log, it's now 13 long, in the diagram above).
    f2[10] = str(int(f2[10]) + smallerGapLen)

    pair.correctedPair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
    pair.correctedPair.isCorrected = True
    return "right"

"""
             atac              atac
             row1              row2
             (f1)              (f2)

    contig: 0-----10 g g g 14-------n
    ref:    n-----14 g g - 10-------0

    Extend the first match (f1) on both contig and reference.
"""
def justifyMinusLeft(pair, f1, f2, smallerGapLen):
    # Add to the length of the contig, on the first match row
    f1[6] = str(int(f1[6]) + smallerGapLen)
    # extend the rmatch 
    f1[10] = str(int(f1[10]) + smallerGapLen)
    f1[9] = str(int(f1[9]) - smallerGapLen)

    pair.correctedPair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
    pair.correctedPair.isCorrected = True
    return "left"

"""
Right justification means push the hyphen to the right, sequence to the left.
In other words, hyphens are pushed toward the higher number positions.
When the reference direction is "-", we display the reference sequence from n - 0,
but apparently Sanger indels and other programs would display the reference from 0 - n,
so they right justify by putting the hyphen toward the higher number.  To be consistent
with them we need to do the same, even though we show the higher numbered ref positions
on the left.

"""
def findBestAlignment(pair):

    global preferredJustification
    smallerGapLen = min(pair.rGap, pair.cGap)
    prefix_mismatches = suffix_mismatches = 0

    for i in range(0, smallerGapLen):
        if pair.rGapSeq[i] != pair.cGapSeq[i]:
            prefix_mismatches += 1
    for i in range(1, smallerGapLen + 1):
        if pair.rGapSeq[-i] != pair.cGapSeq[-i]:
            suffix_mismatches += 1

    print("Original rgap=%d, original cgap=%d, prefix_mismatches = %d, suffix_mismatches = %d" % (pair.rGap, 
        pair.cGap, prefix_mismatches, suffix_mismatches))

    f1 = list(pair.row1.fields)
    f2 = list(pair.row2.fields)

    justify = preferredJustification 
    if (pair.row1.rDir == 1):
        if (preferredJustification == "right"):
            if not prefix_mismatches:
                print "case 1"
                justify = justifyPlusRight(pair, f1, f2, smallerGapLen) 
            elif not suffix_mismatches:
                print "case 2"
                justify = justifyPlusLeft(pair, f1, f2, smallerGapLen) 
        else:
            if not suffix_mismatches:
                print "case 3"
                justify = justifyPlusLeft(pair, f1, f2, smallerGapLen) 
            elif not prefix_mismatches:
                print "case 4"
                justify = justifyPlusRight(pair, f1, f2, smallerGapLen) 
    else: 
        if (preferredJustification == "right"):
            if not suffix_mismatches:
                print "case 5"
                justify = justifyMinusRight(pair, f1, f2, smallerGapLen) 
            elif not prefix_mismatches:
                print "case 6"
                justify = justifyMinusLeft(pair, f1, f2, smallerGapLen) 
        else:
            if not prefix_mismatches:
                print "case 7"
                justify = justifyMinusLeft(pair, f1, f2, smallerGapLen) 
            elif not suffix_mismatches:
                print "case 8"
                justify = justifyMinusRight(pair, f1, f2, smallerGapLen) 

    if pair.cGap > pair.rGap:
        type = "BRI"
        cStr = "%s%s%s" % (pair.row1.cMatchSeq, pair.cGapSeq.lower(), pair.row2.cMatchSeq)
        if (justify == "right" and pair.row1.rDir == 1) or (justify == "left" and pair.row1.rDir == -1): 
            rStr = "%s%s%s%s" % (
                pair.row1.rMatchSeq, pair.rGapSeq.lower(), "-" * (pair.cGap - pair.rGap), pair.row2.rMatchSeq)
        else:
            rStr = "%s%s%s%s" % (
                pair.row1.rMatchSeq, "-" * (pair.cGap - pair.rGap), pair.rGapSeq.lower(), pair.row2.rMatchSeq)
    else:
        type = "BRD"
        rStr = "%s%s%s" % (pair.row1.rMatchSeq, pair.rGapSeq.lower(), pair.row2.rMatchSeq)
        if (justify == "right" and pair.row1.rDir == 1) or (justify == "left" and pair.row1.rDir == -1): 
            cStr = "%s%s%s%s" % (
                pair.row1.cMatchSeq, pair.cGapSeq.lower(), "-" * (pair.rGap - pair.cGap), pair.row2.cMatchSeq)
        else:
            cStr = "%s%s%s%s" % (
                pair.row1.cMatchSeq, "-" * (pair.rGap - pair.cGap), pair.cGapSeq.lower(), pair.row2.cMatchSeq)

    oStr = "B\t%s\t%d\t%s\t%d\t%d\t%s" %  (
        pair.row1.refID, pair.rGapStart, pair.row1.rDir, pair.rGapStart, pair.rGapEnd + 1, pair.row1.rDir)


    gffStr = "%s\t%s\t%s\t%d\t%d\t%d:%d\t%s\t%s:%d-%d\t%s/%s" % (
        pair.row1.refID,
        ".",
        type,
        pair.rEndLowerMatch + 2,
        pair.rStartUpperMatch,
        pair.cGap, pair.rGap,
        ("+", "-")[pair.row1.rDir == -1],
        pair.row1.contigID, pair.cEndLowerMatch + 2, pair.cStartUpperMatch,
        (pair.cGapSeq, "-")[len(pair.cGapSeq) == 0], (pair.rGapSeq, "-")[len(pair.rGapSeq) == 0])

    return cStr, rStr, oStr, gffStr

    
def processPair(pair):
    if (not pair.cGap and not pair.rGap):
        raise DataException("No indel found, cGap=0 and rGap=0")

    if (pair.cGap == pair.rGap):
        cStr, rStr, oStr, gffStr  = handleBR0(pair)
        if (correctBR0(pair)):
            raise DataException("No indel found, cGap=0 and rGap=0")
    elif (pair.rGap != 0 and pair.cGap != 0):
        cStr, rStr, oStr, gffStr  = handleBRIndel(pair)
    elif (pair.cGap != 0):
        cStr, rStr, oStr, gffStr  = handleINS(pair)
    else:
        cStr, rStr, oStr, gffStr  = handleDEL(pair)
    if (pair.row1.rDir == 1):
        printAligned(cStr, rStr, oStr, pair.row1.rMatchStart, pair.row2.rMatchEnd)
    else:
        printAligned(cStr, rStr, oStr, pair.row1.rMatchEnd, pair.row2.rMatchStart)
    print
    return pair.correctedPair, oStr, gffStr

def handleBR0(pair):
    global bFlag
    if not bFlag: 
        raise DataException("cgap=%d and rgap=%d" % (pair.cGap, pair.rGap))

    rStr = "%s%s%s" % (pair.row1.rMatchSeq, pair.rGapSeq.lower(), pair.row2.rMatchSeq)
    cStr = "%s%s%s" % (pair.row1.cMatchSeq, pair.cGapSeq.lower(), pair.row2.cMatchSeq)

    oStr = "B\t%s\t%d\t%s\t%d\t%d\t%s" %  (
        pair.row1.refID, pair.rGapStart, pair.row1.rDir, pair.rGapStart, pair.rGapEnd + 1, pair.row1.rDir)

    type = "BR0"
    gffStr = "%s\t%s\t%s\t%d\t%d\t%d:%d\t%s\t%s:%d-%d\t%s/%s" % (
        pair.row1.refID,
        ".",
        type,
        pair.rEndLowerMatch + 2,
        pair.rStartUpperMatch,
        pair.cGap, pair.rGap,
        ("+", "-")[pair.row1.rDir == -1],
        pair.row1.contigID, pair.cEndLowerMatch + 2, pair.cStartUpperMatch,
        (pair.cGapSeq, "-")[len(pair.cGapSeq) == 0], (pair.rGapSeq, "-")[len(pair.rGapSeq) == 0])
    return cStr, rStr, oStr, gffStr

def handleBRIndel(pair):
    global bFlag
    if not bFlag: 
        raise DataException("cgap=%d and rgap=%d" % (pair.cGap, pair.rGap))
    return findBestAlignment(pair)


def handleINS(pair):
    cStr = "%s%s%s" % (pair.row1.cMatchSeq, pair.cGapSeq, pair.row2.cMatchSeq) 
    rStr = "%s%s%s" % (pair.row1.rMatchSeq, "-" * pair.cGap, pair.row2.rMatchSeq)
    oStr = "I\t%s\t%d\t%s" %  (pair.row1.refID, pair.rGapStart, pair.row1.rDir)

    type = ("INS", "INSC")[pair.isCorrected]
    gffStr = "%s\t%s\t%s\t%d\t%d\t%d:%d\t%s\t%s:%d-%d\t%s/%s" % (
        pair.row1.refID,
        ".",
        type,
        pair.rEndLowerMatch + 1,
        pair.rStartUpperMatch + 1,
        pair.cGap, pair.rGap,
        ("+", "-")[pair.row1.rDir == -1],
        pair.row1.contigID, pair.cEndLowerMatch + 2, pair.cStartUpperMatch,
        (pair.cGapSeq, "-")[len(pair.cGapSeq) == 0], (pair.rGapSeq, "-")[len(pair.rGapSeq) == 0])
    return cStr, rStr, oStr, gffStr

def handleDEL(pair):
    cStr = "%s%s%s" % (pair.row1.cMatchSeq, "-" * pair.rGap, pair.row2.cMatchSeq)
    rStr = "%s%s%s" % (pair.row1.rMatchSeq, pair.rGapSeq, pair.row2.rMatchSeq)
    oStr = "D\t%s\t%d\t%d\t%s" %  (pair.row1.refID, pair.rGapStart, pair.rGapEnd + 1,  pair.row1.rDir)

    type = ("DEL", "DELC")[pair.isCorrected]
    gffStr = "%s\t%s\t%s\t%d\t%d\t%d:%d\t%s\t%s:%d-%d\t%s/%s" % (
        pair.row1.refID,
        ".",
        type,
        pair.rEndLowerMatch + 2,
        pair.rStartUpperMatch,
        pair.cGap, pair.rGap,
        ("+", "-")[pair.row1.rDir == -1],
        pair.row1.contigID, pair.cEndLowerMatch + 1, pair.cStartUpperMatch + 1,
        (pair.cGapSeq, "-")[len(pair.cGapSeq) == 0], (pair.rGapSeq, "-")[len(pair.rGapSeq) == 0])
    return cStr, rStr, oStr, gffStr


def leftJustifyIndel(pair):
    if (pair.rGap != 0 and pair.cGap != 0):
        print "Not left justifying BR rows"
        return None
    shifted = False
    corrected = pair.isCorrected

    f1 = list(pair.row1.fields)
    f2 = list(pair.row2.fields)


    if (pair.row1.rDir == 1):
        if (pair.cGap != 0):
            bp = pair.cGapSeq[-1]
        else:
            bp = pair.rGapSeq[-1]
        # Shift indel left, making first match shorter and second match longer
        while bp  == pair.row1.rMatchSeq[-1]:
            if pair.row1.cMatchLen == 1 or pair.row1.rMatchLen == 1:
                print "leftJustifyIndel at end of atac match"
                break
            f1[6] = str(int(f1[6]) - 1)
            f1[10] = str(int(f1[10]) - 1)

            f2[5] = str(int(f2[5]) - 1)
            f2[9] = str(int(f2[9]) - 1)
            f2[6] = str(int(f2[6]) + 1)
            f2[10] = str(int(f2[10]) + 1)

            shifted = True
            pair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
            if (pair.cGap != 0):
                bp = pair.cGapSeq[-1]
            else:
                bp = pair.rGapSeq[-1]
    else:
        # Shift indel toward lower number bp on ref to be consistent with "+" match case. 
        # When we show the alignment with reference running from n to 0, this means we're 
        # actually shifting the indel toward the right.  Making the first match longer and
        # and the 2nd shorter.
        if (pair.cGap != 0):
            bp = pair.cGapSeq[0]
        else:
            bp = pair.rGapSeq[0]
        while bp  == pair.row2.rMatchSeq[0]:
            print "bp=%s, end of ref match=%s" % (bp, pair.row2.rMatchSeq)
            if pair.row2.cMatchLen == 1 or pair.row2.rMatchLen == 1:
                print "leftJustifyIndel at end of atac match"
                break
            f1[6] = str(int(f1[6]) + 1)
            f1[9] = str(int(f1[9]) - 1)
            f1[10] = str(int(f1[10]) + 1)

            f2[5] = str(int(f2[5]) + 1)
            f2[6] = str(int(f2[6]) - 1)
            f2[10] = str(int(f2[10]) - 1)

            shifted = True
            pair = AtacPair(AtacRow("\t".join(f1)), AtacRow("\t".join(f2)))
            if (pair.cGap != 0):
                bp = pair.cGapSeq[0]
            else:
                bp = pair.rGapSeq[0]
    if shifted:
        pair.isCorrected = corrected
        return pair
    return None

# For negative matches, return modified gffline, where inserted/deleted sequence is reverse complemented,
# For positive matches, just return the gffLine as is.
def reverseRefGff(gffLine):
    fields = gffLine.split()
    if (fields[6] == "-"):
        lastFields = fields[8].split("/")
        if not lastFields[0].startswith("-"):
            lastFields[0] = fastaExtract.reverse_complement(lastFields[0])
        if not lastFields[1].startswith("-"):
            lastFields[1] = fastaExtract.reverse_complement(lastFields[1])
        fields[8] = "/".join(lastFields)
        return "\t".join(fields)
    return gffLine

contigFile = refFile = None
bFlag = True
preferredJustification = "right"

def main(argv=None):
    if argv is None:
        argv = sys.argv

    global contigFile, refFile, atacFile, bFlag, preferredJustification
    contigFilename = refFilename = indelFilename = refDirectory = atacFilename = outFilename = None 

    options, remainder = getopt.getopt(argv[1:], "br:c:i:a:o:j:")
    for opt, arg in options:
        if opt in ("-r"):
            refFilename  = arg  
        elif opt in ("-c"):
            contigFilename = arg
        elif opt in ("-i"):
            indelFilename = arg
        elif opt in ("-a"):
            atacFilename = arg
        elif opt in ("-o"):
            outFilename = arg
        elif opt in ("-b"):
            bFlag = False
        elif opt in ("-j"):
            preferredJustification = arg

    print time.asctime(time.localtime(time.time()))
    print "preferred justification of BRI/BRD cases is %s." % preferredJustification
    sys.stdout.flush()

    atacFile = open(atacFilename, "r")
    atacHeader = readAtacHeader(atacFile)
    # fd.close()

    print time.asctime(time.localtime(time.time()))
    print "Indexing the reference, this will take a few minutes ..."
    sys.stdout.flush()
    fastaExtract.FastaExtract.debug = True
    refFile = fastaExtract.FastaExtract(refFilename)

    print time.asctime(time.localtime(time.time()))
    print "Reading contigs"
    sys.stdout.flush()
    fastaExtract.FastaExtract.debug = False
    contigFile = fastaExtract.FastaExtract(contigFilename, fixedLineLength = False)

    print time.asctime(time.localtime(time.time()))
    print "Processing indel candidates."
    sys.stdout.flush()

    if not outFilename:
        outFilename = "%s.vs.%s.atac.variants" % (atacHeader["assemblyId1"], atacHeader["assemblyId2"])
    outFile = open(outFilename, "w")

    gffFilename = outFilename + ".gff"
    gffFile = open(gffFilename, "w")
    gffFile.write("#contig/chr_assembly2\t.\tvariant_type\tSTART\tEND\tsize(assembly1:assembly2)\t" + 
        "strand\tassembly1_coordinates\tsequence(assembly1:assembly2)\n")
    gffRFilename = outFilename + ".r.gff"
    gffRFile = open(gffRFilename, "w")
    gffRFile.write("#contig/chr_assembly2\t.\tvariant_type\tSTART\tEND\tsize(assembly1:assembly2)\t" +
        "strand\tassembly1_coordinates\tsequence(assembly1:assembly2)\n")


    contigID = None

    # Get groups of rows with same contig IDs from atac output (they are consecutive in the atac.uids file).
    while True:
        atacRows, contigID = getFromAtac(atacFile)
        # Check for eof
        if not len(atacRows):
            break
        if len(atacRows) < 2:
            # print("\tSkipping %s, only found %d atac rows with this contigID" % (contigID, len(atacRows)))
            continue

        print
        print "PROCESSING CONTIG %s.  There are %d atac rows with this contig" % (contigID, len(atacRows))
        sys.stdout.flush()

        for i in range(0, len(atacRows) - 1):
            row1 = atacRows[i]
            row2 = atacRows[i + 1]
            try:
                print "Showing alignment of atac rows:\n\t%s\n\t%s" % (row1, row2)
                pair = AtacPair(row1, row2)
                correctedPair, oStr, gffStr  = processPair(pair)
                if correctedPair:
                    print "Corrected Alignment:"
                    if (correctedPair.rGap != 0 and correctedPair.cGap != 0):
                        raise DataException("Internal Error, still have BR after doing correction."
                            "Corrected rgap=%d, Corrected cgap=%d.\nCorrected row1=%s\nCorrected row2=%s\n" % (
                            correctedPair.rGap, correctedPair.cGap, correctedPair.row1, correctedPair.row2))
                    pair = correctedPair
                    correctedPair, oStr, gffStr = processPair(correctedPair)

                outFile.write(oStr + "\n")
                outFile.flush()

                # If we can left justify it, print the new alignment and gffstr.  Only justifies pure INS[C]/DEL[C] rows.
                justifiedPair = leftJustifyIndel(pair)
                if justifiedPair:
                    print "Left Justified Alignment:"
                    justifiedPair, oStr, justifiedGffStr = processPair(justifiedPair)
                    gffFile.write(justifiedGffStr + "\n")
                    gffRFile.write(reverseRefGff(justifiedGffStr) + "\n")
                else:
                    gffFile.write(gffStr + "\n")
                    gffRFile.write(reverseRefGff(gffStr) + "\n")

                gffFile.flush()
                gffRFile.flush()

            except DataException, err : 
                print "Skipping %s because: %s" % (contigID, err)
                print
                continue


if __name__ == "__main__":
    sys.exit(main())