#!/usr/bin/env python
# Time-stamp: <aln2cdt.py 2013-02-08 15:44:56 Mark Voorhies>
# Script for mapping a pair of .aln and .dnd files produced by CLUSTAL[WX]
#  (in CLUSTAL and New Hampshire formats respectively) to a pair of
#  .cdt and .gtr files suitable for viewing in JavaTreeView.
# This script is intended to be a BioPerl independent replacement for
#  Alok's aln2cdt.pl

from ClustalTools import *
from FastaFile import FastaFile

class NameStretcher:
    def __init__(self, longnames):
        self.longnames = longnames
    def __call__(self, shortname):
        names = [i for i in self.longnames if(i.startswith(shortname))]
        if(len(names) == 1):
            return names[0]
        elif(len(names) == 0):
            raise KeyError, "No match for %s" % shortname
        else:
            raise KeyError, "Multiple matches for %s" % shortname

if(__name__ == "__main__"):
    import sys, re
    if(len(sys.argv) < 2):
        sys.stderr.write("Usage: %s prefix [fasta]\n" % sys.argv[0])
        sys.stderr.write("  To map prefix.aln and prefix.dnd\n")
        sys.stderr.write("      to prefix.cdt and prefix.gtr\n")
        sys.stderr.write("  If fasta is provided, it is used to\n")
        sys.stderr.write("      expand truncated aln names\n")
        sys.exit(1)

    # TODO: the full names in the phb should be sufficient to fix
    #       CLUSTAL clipping, without needing to resort to the FASTA.

    prefix = sys.argv[1]
    tree = NewHampshireGraph.fromPhb(open(prefix+".phb"))
    tree.setRoot()
    tree.makeBtree()
    tree.writeGtr(open(prefix+".gtr","w"))

    alignment = MultipleAlignment.fromClustal(open(prefix+".aln"))
    if(len(sys.argv) > 2):
        fasta = FastaFile(open(sys.argv[2]))
        alignment.remap_names(NameStretcher(fasta.seqs.keys()))
    tree.writeCdt(open(prefix+".cdt","w"), alignment)

