#!/usr/bin/env python
import argparse
from io import StringIO

from gimmemotifs import __version__
from gimmemotifs.preprocessing import coverage_table

if __name__ == "__main__":
    description = """
    GimmeMotifs v{0} - coverage_table
    """.format(
        __version__
    )

    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        dest="peakfile",
        help="BED file containing peaks",
        metavar="PEAK_FILE",
    )
    parser.add_argument(
        dest="datafiles",
        help="BAM, BED or bigWig file(s) with read data",
        metavar="DATA_FILE(S)",
        nargs="*",
    )
    parser.add_argument(
        "-w",
        "--window",
        dest="window",
        help="window size (default 200)",
        metavar="INT",
        type=int,
        default=200,
    )
    parser.add_argument(
        "-l",
        "--log-transform",
        dest="log_transform",
        help="apply a natural log transform",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "-n",
        "--normalization",
        metavar="STR",
        dest="normalization",
        help="apply 'quantile' or 'scale' normalization",
        default=None,
    )
    parser.add_argument(
        "-t",
        "--top",
        metavar="STR",
        dest="top",
        help="Select regions.",
        default=-1,
        type=int,
    )
    parser.add_argument(
        "-T",
        "--top-method",
        metavar="STR",
        dest="topmethod",
        help="select top regions by 'var', 'std', 'mean' or 'random'",
        default="var",
    )
    parser.add_argument(
        "-D",
        dest="rmdup",
        help="keep duplicate reads (removed by default)",
        default=True,
        action="store_false",
    )
    parser.add_argument(
        "-R",
        dest="rmrepeats",
        help="keep reads with mapq 0 (removed by default) ",
        action="store_false",
        default=True,
    )
    parser.add_argument(
        "--nthreads",
        dest="ncpus",
        metavar="INT",
        help="Number of threads (default 12)",
        type=int,
        default=12,
    )

    args = parser.parse_args()
    peakfile = args.peakfile
    datafiles = args.datafiles
    df = coverage_table(
        peakfile=peakfile,
        datafiles=datafiles,
        window=args.window,
        log_transform=args.log_transform,
        normalization=args.normalization,
        top=args.top,
        topmethod=args.topmethod,
        rmdup=args.rmdup,
        rmrepeats=args.rmrepeats,
        ncpus=args.ncpus,
    )

    yesno = {True: "yes", False: "no"}

    output = StringIO()
    output.write(
        "# Table created by coverage_table (GimmeMotifs {})\n".format(__version__)
    )
    output.write("# Input file: {}\n".format(args.peakfile))
    output.write("# Data files: {}\n".format(args.datafiles))
    output.write("# Window: {}\n".format(args.window))
    output.write("# Duplicates removed: {}\n".format(yesno[args.rmdup]))
    output.write("# MAPQ 0 removed: {}\n".format(yesno[args.rmrepeats]))
    output.write("# Log transformed: {}\n".format(yesno[args.log_transform]))
    output.write("# Normalization: {}\n".format(args.normalization))
    if args.top > 0:
        output.write(
            "# Top {} regions selected by {}\n".format(args.top, args.topmethod)
        )
    df.to_csv(output, sep="\t", float_format="%0.5f")
    output.seek(0)
    print(output.read(), end="")
