#!/usr/bin/env python
import argparse
from io import StringIO

from gimmemotifs import __version__
from gimmemotifs.preprocessing import coverage_table

if __name__ == "__main__":
    description = """
    GimmeMotifs v{0} - coverage_table
    """.format(
        __version__
    )

    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        "-p",
        "--peaks",
        required=True,
        dest="peakfile",
        help="BED file containing peaks",
        metavar="FILE",
        default=None,
    )
    parser.add_argument(
        "-d",
        "--datafile",
        required=True,
        dest="datafiles",
        help="data files (BAM, BED or bigWig format)",
        metavar="FILE",
        nargs="*",
    )
    parser.add_argument(
        "-w",
        "--window",
        dest="window",
        help="window size (default 200)",
        metavar="WINDOW",
        type=int,
        default=200,
    )
    parser.add_argument(
        "-l",
        "--logtransform",
        dest="log_transform",
        help="Log transfrom",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "-n",
        "--normalization",
        dest="normalization",
        help="Normalization: none, quantile or scale",
        default="none",
        metavar="METHOD",
    )
    parser.add_argument(
        "-t", "--top", dest="top", help="Select regions.", default=0, type=int
    )
    parser.add_argument(
        "-T",
        "--topmethod",
        dest="topmethod",
        help="Method to select regions (var, std, mean or random)",
        default="var",
    )
    parser.add_argument(
        "-D",
        dest="rmdup",
        help="keep duplicate reads (removed by default)",
        default=True,
        action="store_false",
    )
    parser.add_argument(
        "-R",
        dest="rmrepeats",
        help="keep reads with mapq 0 (removed by default) ",
        action="store_false",
        default=True,
    )
    parser.add_argument(
        "--nthreads",
        dest="ncpus",
        help="Number of threads",
        metavar="INT",
        type=int,
        default=12,
    )

    args = parser.parse_args()
    peakfile = args.peakfile
    datafiles = args.datafiles
    df = coverage_table(
        peakfile,
        datafiles,
        args.window,
        log_transform=args.log_transform,
        normalization=args.normalization,
        top=args.top,
        topmethod=args.topmethod,
        rmdup=args.rmdup,
        rmrepeats=args.rmrepeats,
        ncpus=args.ncpus
    )

yesno = {True: "yes", False: "no"}

output = StringIO()
output.write("# Table created by coverage_table (GimmeMotifs {})\n".format(__version__))
output.write("# Input file: {}\n".format(args.peakfile))
output.write("# Data files: {}\n".format(args.datafiles))
output.write("# Window: {}\n".format(args.window))
output.write("# Duplicates removed: {}\n".format(yesno[args.rmdup]))
output.write("# MAPQ 0 removed: {}\n".format(yesno[args.rmrepeats]))
output.write("# Log transformed: {}\n".format(yesno[args.log_transform]))
output.write("# Normalization: {}\n".format(args.normalization))
if args.top > 0:
    output.write("# Top {} regions selected by {}\n".format(args.top, args.topmethod))
df.to_csv(output, sep="\t", float_format="%0.5f")
output.seek(0)
print(output.read(), end="")
