"""Load, transform, and write out input data to deidentified FHIR"""

import argparse
import datetime
import shutil
import sys

import cumulus_fhir_support as cfs

import cumulus_etl
from cumulus_etl import cli_utils, deid, errors, loaders
from cumulus_etl.etl import pipeline

###############################################################################
#
# External requirements (like cTAKES)
#
###############################################################################


def check_mstool() -> None:
    """
    Verifies that the MS anonymizer tool is installed in PATH.
    """
    if not shutil.which(deid.MSTOOL_CMD):
        print(
            f"No executable found for {deid.MSTOOL_CMD}.\n\n"
            "Please see https://github.com/microsoft/Tools-for-Health-Data-Anonymization\n"
            "and install it into your PATH.",
            file=sys.stderr,
        )
        raise SystemExit(errors.MSTOOL_MISSING)


###############################################################################
#
# Main
#
###############################################################################


def define_etl_parser(parser: argparse.ArgumentParser) -> None:
    """Fills out an argument parser with all the ETL options."""
    parser.usage = "%(prog)s [OPTION]... INPUT OUTPUT PHI"

    pipeline.add_common_etl_args(parser)
    parser.add_argument(
        "--version", action="version", version=f"cumulus-etl {cumulus_etl.__version__}"
    )
    parser.add_argument(
        "--philter", action="store_true", help="run philter on all freeform text fields"
    )
    parser.add_argument(
        "--allow-missing-resources",
        action="store_true",
        help="run tasks even if their resources are not present",
    )
    cli_utils.add_task_selection(parser, etl_mode=True)

    cli_utils.add_aws(parser)

    export = cli_utils.add_bulk_export(parser)
    export.add_argument(
        "--export-to",
        metavar="DIR",
        help="where to put exported files (default is to delete after use)",
    )

    group = parser.add_argument_group("external export identification")
    group.add_argument(
        "--export-group",
        metavar="NAME",
        help="name of the FHIR Group that was exported (default is to grab this from an "
        "export log file in the input folder, but you can also use this to assign a "
        "nickname as long as you consistently set the same nickname)",
    )
    group.add_argument(
        "--export-timestamp",
        metavar="TIMESTAMP",
        help="when the data was exported from the FHIR Group (default is to grab this from an "
        "export log file in the input folder)",
    )


def handle_completion_args(
    args: argparse.Namespace, loader_results: loaders.LoaderResults
) -> (str, datetime.datetime):
    """Returns (group_name, datetime)"""
    # Grab completion options from CLI or loader
    export_group_name = args.export_group or loader_results.group_name
    export_datetime = (
        datetime.datetime.fromisoformat(args.export_timestamp)
        if args.export_timestamp
        else loader_results.export_datetime
    )

    # Error out if we have missing args
    missing_group_name = export_group_name is None
    missing_datetime = not export_datetime
    if missing_group_name and missing_datetime:
        errors.fatal(
            "Missing Group name and timestamp export information for the input data.",
            errors.COMPLETION_ARG_MISSING,
            extra="This is likely because you don’t have an export log in your input folder.\n"
            "This log file (log.ndjson) is generated by some bulk export tools.\n"
            "Instead, please manually specify the Group name and timestamp of the export "
            "with the --export-group and --export-timestamp options.\n"
            "These options are necessary to track whether all the required data from "
            "a Group has been imported and is ready to be used.\n"
            "See https://docs.smarthealthit.org/cumulus/etl/bulk-exports.html for more "
            "information.\n",
        )
    # These next two errors can be briefer because the user clearly knows about the args.
    elif missing_datetime:
        errors.fatal("Missing --export-datetime argument.", errors.COMPLETION_ARG_MISSING)
    elif missing_group_name:
        errors.fatal("Missing --export-group argument.", errors.COMPLETION_ARG_MISSING)

    return export_group_name, export_datetime


async def etl_main(args: argparse.Namespace) -> None:
    check_mstool()

    inline_resources = cli_utils.expand_inline_resources(args.inline_resource)
    inline_mimetypes = cli_utils.expand_inline_mimetypes(args.inline_mimetype)

    i2b2_args = {"export_to": args.export_to}
    ndjson_args = {
        "export_to": args.export_to,
        "since": args.since,
        "until": args.until,
        "resume": args.resume,
        "inline": args.inline,
        "inline_resources": inline_resources,
        "inline_mimetypes": inline_mimetypes,
    }

    async def prep_scrubber(
        _client: cfs.FhirClient, results: loaders.LoaderResults
    ) -> tuple[deid.Scrubber, dict]:
        # Establish the group name and datetime of the loaded dataset (from CLI args or Loader)
        export_group, export_datetime = handle_completion_args(args, results)

        results.directory = await deid.Scrubber.scrub_bulk_data(results.path)

        scrubber = deid.Scrubber(args.dir_phi, use_philter=args.philter)

        return scrubber, {"export_group_name": export_group, "export_datetime": export_datetime}

    await pipeline.run_pipeline(
        args, prep_scrubber=prep_scrubber, i2b2_args=i2b2_args, ndjson_args=ndjson_args
    )


async def run_etl(parser: argparse.ArgumentParser, argv: list[str]) -> None:
    """Parses an etl CLI"""
    define_etl_parser(parser)
    args = parser.parse_args(argv)
    await etl_main(args)
