#!/usr/bin/env python

# -*- coding: utf-8 -*-

# Created on Sun Dec 23 10:53:21 2018

# Author: XiaoTao Wang

from __future__ import division, print_function
import os.path as op
import glob, sys, argparse, runHiC

currentVersion = runHiC.__version__

def getargs():
    ## Construct an ArgumentParser object for command-line arguments
    parser = argparse.ArgumentParser(description = '''Generate fragment-delimited genomic bins.
                                     Output a genome segmentation of restriction fragments as a
                                     BED file.''',
                                     formatter_class = argparse.ArgumentDefaultsHelpFormatter)
    
    # Version
    parser.add_argument('-v', '--version', action = 'version',
                        version = ' '.join(['%(prog)s', currentVersion]),
                        help = 'Print version number and exit')
    
    parser.add_argument('-O', '--output', help='Output file name.')
    parser.add_argument('-C', '--chromsizes-path', help='''UCSC-like chromsizes file, with
                        chromosomes in desired order.''')
    parser.add_argument('--fasta-path', help='''Genome assembly FASTA file or folder containing
                        FASTA files (uncompressed).''')
    parser.add_argument('--enzyme', help='''Name of restriction enzyme. Set it to "Arima" if you
                        want to get in-silico fragments digested by Arima-HiC enzyme cocktail.''')
    
    ## Parse the command-line arguments
    commands = sys.argv[1:]
    if not commands:
        commands.append('-h')
    args = parser.parse_args(commands)
    
    return args, commands

def digest():

    # Parse Arguments
    args, commands = getargs()
    # Improve the performance if you don't want to run it
    if commands[0] not in ['-h', '-v', '--help', '--version']:

        import runHiC.utilities as util

        chromsizes = op.abspath(op.expanduser(args.chromsizes_path))
        fasta = op.abspath(op.expanduser(args.fasta_path))
        enzyme = args.enzyme
        out = args.output
        
        chromsizes = util.read_chromsizes(chromsizes, all_names=True)
        chroms = list(chromsizes.keys())

        # Load sequences
        if op.isdir(fasta):
            filepaths = glob.glob(op.join(fasta, '*.fa'))
            filepaths.extend(glob.glob(op.join(fasta, '*.fasta')))
        else:
            filepaths = [fasta]
        fasta_records = util.load_fasta(chroms, *filepaths)

        # Digest sequences
        frags = util.digest(fasta_records, enzyme)

        # Write output
        try:
            if out is None:
                f = sys.stdout
            else:
                f = open(out, 'wt')
            frags.to_csv(f, sep='\t', index=False, header=False)
        except (IOError, OSError) as e:
            if e.errno == 32:  # broken pipe
                try:
                    f.close()
                except OSError:
                    pass
            else:
                raise
        else:
            f.close()

if __name__ == '__main__':
    digest()