#!/usr/bin/env python
"""
This script checks if any IAM or s3 bucket policy changes made in the terraform config

Usage:
    plan_check.py [options] <path>

Arguments:
    path    The path of the config directory to check

Options:
    -h, --help                              Show this message and exit.[default: False]
    -s, --skip-iam                          Skip iam changes for this run.[default: False]
    --modified-only                         Only run plan check on directories that were changed in git.
                                            Requires that this command be run from a git repository directory
    --print-diff                            Print the diff for any directories with changed configurations.
                                            [default: False]
    --with-colors                           Print the diff with colors [default: False]
    -j NUM_JOBS, --parallel-jobs=NUM_JOBS   The number of Terraform operations to run in parallel.
                                            [default: 4].
    --output-dir=<dir>                      Directory to write plan output to.
     --version                              Show the version.
"""

import os
import re
import concurrent.futures
from enum import Enum
from pathlib import Path
from typing import Optional, Tuple, List, Dict

from docopt import docopt

from terrawrap.utils.version import version_check
from terrawrap.version import __version__
from terrawrap.utils.cli import execute_command
from terrawrap.utils.config import parse_wrapper_configs, find_wrapper_config_files, resolve_envvars
from terrawrap.utils.git_utils import get_git_changed_files, get_git_root
from terrawrap.utils.module import get_module_usage_graph
from terrawrap.utils.path import get_file_graph
from terrawrap.utils.tf_variables import get_auto_var_usage_graph

from networkx import compose_all, descendants

TERRAFORM_PERFORM_ACTIONS = "Terraform will perform the following actions"
IAM_POLICY_RE = re.compile('[-~+] .*(aws_iam_|aws_s3_bucket_policy|aws_organizations_policy).*')
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
CURRENT_DIRECTORY = os.getcwd()


class WrapperExitCode(Enum):
    SUCCESS = 0
    IAM_CHANGES = 2
    TERRAFORM_FAILURE = 3


class PlanExitCode(Enum):
    SUCCESS_NO_DIFF = 0
    FAILURE = 1
    SUCCESS_WITH_DIFF = 2


def get_subdirectories(root_dir: str) -> Tuple[List[str], List[str]]:
    """
    Make a list of subdirectories (with '.tf' files) of the path provided as the argument
    :param root_dir: The directory to run the script on.
    :return: lists of regular and symlinked directories
    """
    regular_directories = []
    symlinked_directories = []

    for current_dir, dirs, files in os.walk(root_dir, followlinks=True):
        if ".terraform" not in current_dir and any(entry.endswith(".tf") for entry in files):
            if not is_plan_check_enabled(current_dir):
                continue

            if os.path.islink(current_dir):
                symlinked_directories.append(current_dir)
            else:
                regular_directories.append(current_dir)
    return regular_directories, symlinked_directories


def convert_plan_to_json(
    plan_binary_file: Path, 
    source_directory: Path,
    additional_envvars: Dict[str, str],
) -> Path:
    """
    Converts binary terraform plan to json. Saves it in the same directory.
    :param plan_binary_file: File with a plan saved in a binary format
    :param source_directory: Directory with source terraform files
    :param additional_envvars: A dictionary representing additional environment variables to supply
    :return: Path object pointing to a json plan file
    """
    wrapper_py = os.path.join(SCRIPT_DIR, 'tf')
    command_env = os.environ.copy()
    command_env.update(additional_envvars)

    exit_code, stdout = execute_command(
        [wrapper_py, source_directory, "show", "-json", str(plan_binary_file)],
        print_output=False,
        env=command_env,
    )

    show_stdout = "\n".join(stdout[1:]) #  the first line of `tf` output is a command itself

    if exit_code == PlanExitCode.FAILURE.value:
        raise RuntimeError(f"'terraform show' failed for {plan_binary_file}:\n{show_stdout}")

    plan_json_file = plan_binary_file.parent / "tfplan.json"
    with plan_json_file.open("w") as plan_json_stream:
        plan_json_stream.write(show_stdout)

    return plan_json_file


def init_and_plan_directory(
        directory: str,
        skip_iam: bool,
        print_diff: bool,
        with_colors: bool,
        additional_envvars: Dict[str, str],
        output_directory: Optional[str] = None
) -> WrapperExitCode:
    """
    Run 'init' and 'plan' against the passed in directory
    :param directory: A directory with the terraform config
    :param skip_iam: A boolean to skip iam changes check
    :param print_diff: A boolean to print diffs for changes
    :param with_colors: A boolean to print diffs using ansi colors
    :param additional_envvars: A dictionary representing additional environment variables to supply
    :return: One of the WrapperExitCode enums
    """

    arguments = ['-input=false']

    if not with_colors:
        arguments.append('-no-color')

    pr_checker_arguments = ['-var-file=pr_checker.tfvars']
    wrapper_py = os.path.join(SCRIPT_DIR, 'tf')

    command_env = os.environ.copy()
    command_env.update(additional_envvars)

    # We're using --no-resolve-envvars here because we've already resolved the environment variables in
    # the constructor. We are then passing in those environment variables explicitly in the
    # execute_command call below.
    init_exit_code, init_stdout = execute_command(
        [wrapper_py, "--no-resolve-envvars", directory, 'init', '-upgrade'] + arguments,
        print_output=False,
        env=command_env
    )

    if init_exit_code:
        print("'terraform init' failed for {}:\n{}".format(directory, "".join(init_stdout)))
        return WrapperExitCode.TERRAFORM_FAILURE

    if os.path.exists('%s/%s' % (directory, 'pr_checker.tfvars')):
        arguments += pr_checker_arguments

    # If output_directory is provided, creates a subdirectory to save a plan.
    # Add -out parameter to a terraform plan command.
    if output_directory:
        root_directory = get_git_root(directory)
        rel_directory = Path(directory).relative_to(root_directory)
        plan_directory = Path(output_directory) / rel_directory
        plan_directory.mkdir(parents=True, exist_ok=True)
        plan_binary_file = plan_directory / "tfplan.binary"
        arguments.append(f"-out={plan_binary_file}")

    # We're using --no-resolve-envvars here because we've already resolved the environment variables in
    # the constructor. We are then passing in those environment variables explicitly in the
    # execute_command call below.
    plan_exit_code, plan_stdout = execute_command(
        [wrapper_py, "--no-resolve-envvars", directory, 'plan', '-detailed-exitcode', '-lock=false'] + arguments,
        print_output=False,
        env=command_env
    )

    if plan_exit_code == PlanExitCode.FAILURE.value:
        print(
            "'terraform plan' failed for {}:\n{}".format(
                directory,
                "".join(plan_stdout)
            )
        )
        return WrapperExitCode.TERRAFORM_FAILURE
    elif print_diff and plan_exit_code == PlanExitCode.SUCCESS_WITH_DIFF.value:
        print(
            "'terraform plan' generated following changes for {}:\n{}".format(
                directory,
                "".join(plan_stdout)
            )
        )

    if output_directory:
        try:
            convert_plan_to_json(
                plan_binary_file,
                directory,
                additional_envvars,
            )
        except RuntimeError as exception:
            print("\n".join(exception.args))
            return WrapperExitCode.TERRAFORM_FAILURE

    # Check output for IAM changes
    if skip_iam:
        return WrapperExitCode.SUCCESS
    return check_for_iam_changes(plan_stdout, directory)


def check_for_iam_changes(stdout: List[str], directory: str) -> WrapperExitCode:
    """
    Look for the IAM changes in stdout
    :param stdout: Stdout of the 'plan' command for IAM changes lookup
    :param directory: A directory with the config to be printed along with the notification about IAM changes
    :return: One of the WrapperExitCode enums
    """
    iam_resources = []
    reached_actual_changes = False

    for line in stdout:
        # TODO: Refactor this to use JSON at some point
        # https://www.terraform.io/docs/internals/json-format.html
        if not reached_actual_changes and TERRAFORM_PERFORM_ACTIONS in line:
            reached_actual_changes = True

        if reached_actual_changes:
            match = re.search(IAM_POLICY_RE, line)
            if match:
                iam_resources.append(match.group(0))

    if iam_resources:
        print("Detected IAM resources modified in {0}:\n{1}\n".format(directory, "\n".join(iam_resources)))
        return WrapperExitCode.IAM_CHANGES

    return WrapperExitCode.SUCCESS


def execute_init_and_plan(
        regular_directories: List[str],
        symlinked_directories: List[str],
        skip_iam: bool,
        print_diff: bool,
        with_colors: bool,
        num_parallel: int,
        output_directory: Optional[str] = None
) -> Tuple[List[str], List[str]]:
    """
    Execute functions concurrently
    :param regular_directories: Non symlinked directories to be processed in parallel
    :param symlinked_directories: Symlinked directories to be processed one-by-one
    :param skip_iam: A boolean to skip iam changes check
    :param print_diff: A boolean to control printing diffs for changes
    :param with_colors: A boolean to control printing diffs with ansi colors
    :param num_parallel: Number of workers to use
    :return: Tuple of two lists, the first list is of directories with terraform failures, the second is of
    directories with IAM failures.
    """
    # Lookup everyone's environment variables at once so that we get the benefit of Parameter Store calls
    # being cached.
    directory_to_envvars = {
        directory: resolve_envvars(parse_wrapper_configs(find_wrapper_config_files(directory)).envvars)
        for directory in regular_directories + symlinked_directories
    }

    # Track directories with IAM issues or errors.
    directories_with_iam_changes = []
    directories_with_errors = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_parallel) as executor:
        future_exec = {}
        for directory in regular_directories:
            envvars = directory_to_envvars[directory]
            future = executor.submit(
                init_and_plan_directory,
                directory,
                skip_iam,
                print_diff,
                with_colors,
                envvars,
                output_directory,
            )
            future_exec[future] = directory

        for future in concurrent.futures.as_completed(future_exec):
            exit_code = future.result()
            directory = future_exec[future]
            if exit_code == WrapperExitCode.IAM_CHANGES:
                directories_with_iam_changes.append(directory)
            if exit_code == WrapperExitCode.TERRAFORM_FAILURE:
                directories_with_errors.append(directory)

    for directory in symlinked_directories:
        envvars = directory_to_envvars[directory]
        exit_code = init_and_plan_directory(directory, skip_iam, print_diff, with_colors, envvars, output_directory)
        if exit_code == WrapperExitCode.IAM_CHANGES:
            directories_with_iam_changes.append(directory)
        if exit_code == WrapperExitCode.TERRAFORM_FAILURE:
            directories_with_errors.append(directory)

    return directories_with_errors, directories_with_iam_changes


def get_modified_subdirectories(plan_path: str) -> Tuple[List[str], List[str]]:
    """
    Use Git to find which directories have changed and return a list of them
    A changed directory is a directory that has files that changed, or has symlinks to files that
    changed, or uses a module that changed
    :param plan_path: root to search for changed subdirectories from
    :return: A list of "regular" directories and directories that are symlinks which have changed
    """
    changed_files = get_git_changed_files(plan_path)
    root = get_git_root(plan_path)

    module_usage_graph = get_module_usage_graph(root)
    file_graph = get_file_graph(root)
    auto_vars_usage_graph = get_auto_var_usage_graph(root)

    graph = compose_all([module_usage_graph, file_graph, auto_vars_usage_graph])

    directories_to_check = set()
    for path in changed_files:
        if path not in graph.nodes:
            continue

        affected_directories = descendants(graph, path)

        # filter out directories that we shouldn't run plan for
        affected_directories = [
            affected_dir
            for affected_dir in affected_directories
            if should_run_plan_for(affected_dir, plan_path)
        ]

        if affected_directories:
            directories_to_check.update(affected_directories)

    # group directories into regular and symlink paths so downstream code can treat them differently
    regular_directories = [
        directory for directory in directories_to_check
        if not os.path.islink(directory)
    ]

    symlinked_directories = [
        directory for directory in directories_to_check
        if os.path.islink(directory)
    ]

    return regular_directories, symlinked_directories


def should_run_plan_for(directory: str, plan_path: str) -> bool:
    """
    Return True if we are allowed to run plan for a given directory
    :param directory: The directory to check if we should run plan there
    :param plan_path: The root used for plan_check. All directories outside of this dir shouldn't run plan
    """

    # We don't want to run plan if the directory doesn't exist anymore (it could have been deleted)
    # Or if there are no TF files in it
    # Or if plan has been disabled for that dir in .tf_wrapper
    # Or if the directory is outside of the path arg used to run this command
    return (
        os.path.commonpath([plan_path, directory]) == plan_path
        and os.path.exists(directory)
        and os.path.isdir(directory)
        and is_plan_check_enabled(directory)
        and any(file.endswith('.tf') for file in os.listdir(directory))
    )


def is_plan_check_enabled(directory: str) -> bool:
    """Return True if plan check is enabled based on .tf_wrapper config"""
    wrapper_config_files = find_wrapper_config_files(path=os.path.abspath(directory))
    wrapper_config = parse_wrapper_configs(wrapper_config_files=wrapper_config_files)

    return wrapper_config.plan_check


def main():
    version_check(current_version=__version__)
    arguments = docopt(__doc__, version="Terrawrap %s" % __version__)

    skip_iam = arguments['--skip-iam']
    modified_only = arguments['--modified-only']
    print_diff = arguments['--print-diff']
    with_colors = arguments['--with-colors']
    output_directory = arguments['--output-dir']

    try:
        num_parallel = int(arguments['--parallel-jobs'])
    except ValueError:
        raise RuntimeError(
            "Unable to parse number of parallel jobs, '%s' is not an integer." % arguments['--parallel-jobs']
        )

    # Get the directory with Terraform config passed to this script as an argument
    config_dir = arguments['<path>']
    if not os.path.isabs(config_dir):
        config_dir = os.path.abspath(os.path.join(CURRENT_DIRECTORY, config_dir))

    if modified_only:
        regular_directories, symlinked_directories = get_modified_subdirectories(config_dir)
    else:
        regular_directories, symlinked_directories = get_subdirectories(config_dir)

    print(
        "Running plan check for regular directories %s and symlink directories %s"
        % (regular_directories, symlinked_directories)
    )

    failing_directories, iam_directories = execute_init_and_plan(
        regular_directories, symlinked_directories, skip_iam, print_diff, with_colors, num_parallel, output_directory
    )

    if failing_directories:
        print("General Terraform failures detected. Check the output above and please resolve any issues.")
        print("Directories with Terraform failures:")
        for directory in failing_directories:
            print("\t%s" % directory)

    if iam_directories:
        print(
            "\nIAM changes have been detected. If this is intended, please contact the DevOps team to merge."
        )
        print("Directories with IAM changes:")
        for directory in iam_directories:
            print("\t%s" % directory)

    exit(1 if iam_directories or failing_directories else 0)


if __name__ == '__main__':
    main()
