#!/usr/bin/env python

import argparse
import logging
import os
import sys

from cctyper.controller import Controller
from cctyper.prodigal import Prodigal
from cctyper.hmmer import HMMER
from cctyper.castyping import Typer
from cctyper.minced import Minced
from cctyper.xgb import XGB
from cctyper.crisprcas import CRISPRCas
from cctyper.plot import Map

########## Arguments ##########
ap = argparse.ArgumentParser()

# Required
ap.add_argument('input', help='Input fasta file')
ap.add_argument('output', help='Prefix for output directory')

# Optional
ap.add_argument('-t', '--threads', help='Number of parallel processes [%(default)s].', default=4, type=int)
ap.add_argument('--prodigal', help='Which mode to run prodigal in [%(default)s].', default='single', type=str, choices=['single','meta'])
ap.add_argument('--circular', help='Input should be treated as circular.', action='store_true')
ap.add_argument('--skip_check', help='Skip check of input.', action='store_true')
ap.add_argument('--keep_tmp', help='Keep temporary files (prodigal, hmmer, minced).', action='store_true')
ap.add_argument('--log_lvl', help='Logging level [%(default)s].', default='INFO', type=str, choices=['DEBUG','INFO','WARNING','ERROR'])
ap.add_argument('--redo_typing', help='Redo the typing. Skip prodigal and HMMER and load the hmmer.tab from the output dir.', action='store_true')
ap.add_argument('--simplelog', help='No color or progress bar in log.', action='store_true')

# Data
apd = ap.add_argument_group('data arguments')
apd.add_argument('--db', help='Path to database.', default='', type=str)

# Thresholds
apt = ap.add_argument_group('cas threshold arguments')
apt.add_argument('--dist', help='Max allowed distance between genes in operon [%(default)s].', default=3, type=int)
apt.add_argument('--overall_eval', help='Overall E-value threshold [%(default)s].', default=0.01, type=float)
apt.add_argument('--overall_cov_seq', help='Overall sequence coverage threshold [%(default)s].', default=0.3, type=float)
apt.add_argument('--overall_cov_hmm', help='Overall HMM coverage threshold [%(default)s].', default=0.3, type=float)

# CRISPRs
apc = ap.add_argument_group('crispr threshold arguments')
apc.add_argument('--ccd', help='Distance (bp) threshold to connect Cas operons and CRISPR arrays [%(default)s].', default=10000, type=int)
apc.add_argument('--pred_prob', help='Prediction probability cut-off for assigning subtype to CRISPR repeats [%(default)s].', default=0.75, type=float)
apc.add_argument('--kmer', help='kmer size. Has to match training kmer size! [%(default)s].', default=4, type=int)

# Plot
app = ap.add_argument_group('plotting arguments')
app.add_argument('--no_plot', help='Do not draw a map of CRISPR-Cas.', action='store_true')
app.add_argument('--scale', help='Scaling of plot [%(default)s].', default=10, type=int)
app.add_argument('--no_grid', help='Do not add grid to plot.', action='store_true')
app.add_argument('--expand', help='Expand operons with un-annotated genes. The value determines by how many bp in each end to expand. 0 only fills gaps [%(default)s].', default=0, type=int)
app.add_argument('--custom_hmm', help='Path to custom HMM database to decorate plot. Warning: This overwrites plotting of low-quality matches to Cas HMMs', default='', type=str)

# Workflow starts here


########## Initialize ##########
master = Controller(ap.parse_args())

########## Prodigal ##########
proteins = Prodigal(master)
proteins.run_prod()

########## Hmmer ##########
hmmeri = HMMER(proteins)
hmmeri.main_hmm()

########## Operons ##########
castyper = Typer(hmmeri)
castyper.typing()

########## CRISPRs ##########
crispr = Minced(castyper)
crispr.run_minced()

########## RepeatType ########
repeatPred = XGB(crispr)
repeatPred.xgb_run()

######### CRISPR-Cas ########
criscas = CRISPRCas(repeatPred)
criscas.crisprcas()

######### Plot ###########
plotting = Map(criscas)
plotting.plot()

######### Clean ###########
master.clean()


