#!/usr/bin/env python
"""
This script checks if any IAM or s3 bucket policy changes made in the terraform config

Usage:
    plan_check.py [options] <path>

Arguments:
    path    The path of the config directory to check

Options:
    -h, --help                              Show this message and exit.[default: False]
    -s, --skip-iam                          Skip iam changes for this run.[default: False]
    --modified-only                         Only run plan check on directories that were changed in git.
                                            Requires that this command be run from a git repository directory
    --print-diff                            Print the diff for any directories with changed configurations.
                                            [default: False]
    --with-colors                           Print the diff with colors [default: False]
    -j NUM_JOBS, --parallel-jobs=NUM_JOBS   The number of Terraform operations to run in parallel.
                                            [default: 4].
     --version                              Show the version.
"""

import os
import re
import concurrent.futures
from enum import Enum
from typing import Tuple, List, Dict

from docopt import docopt

from terrawrap.utils.version import version_check
from terrawrap.version import __version__
from terrawrap.utils.cli import execute_command
from terrawrap.utils.config import parse_wrapper_configs, find_wrapper_config_files, resolve_envvars
from terrawrap.utils.git_utils import get_git_changed_files, get_git_root
from terrawrap.utils.module import get_module_usage_graph
from terrawrap.utils.path import get_file_graph
from terrawrap.utils.tf_variables import get_auto_var_usage_graph

from networkx import compose_all, descendants

TERRAFORM_PERFORM_ACTIONS = "Terraform will perform the following actions"
IAM_POLICY_RE = re.compile('[-~+] .*(aws_iam_|aws_s3_bucket_policy).*')
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
CURRENT_DIRECTORY = os.getcwd()


class WrapperExitCode(Enum):
    SUCCESS = 0
    IAM_CHANGES = 2
    TERRAFORM_FAILURE = 3


class PlanExitCode(Enum):
    SUCCESS_NO_DIFF = 0
    FAILURE = 1
    SUCCESS_WITH_DIFF = 2


def get_subdirectories(root_dir: str) -> Tuple[List[str], List[str]]:
    """
    Make a list of subdirectories (with '.tf' files) of the path provided as the argument
    :param root_dir: The directory to run the script on.
    :return: lists of regular and symlinked directories
    """
    regular_directories = []
    symlinked_directories = []

    for current_dir, dirs, files in os.walk(root_dir, followlinks=True):
        if ".terraform" not in current_dir and any(entry.endswith(".tf") for entry in files):
            if not is_plan_check_enabled(current_dir):
                continue

            if os.path.islink(current_dir):
                symlinked_directories.append(current_dir)
            else:
                regular_directories.append(current_dir)
    return regular_directories, symlinked_directories


def init_and_plan_directory(
        directory: str,
        skip_iam: bool,
        print_diff: bool,
        with_colors: bool,
        additional_envvars: Dict[str, str]
) -> WrapperExitCode:
    """
    Run 'init' and 'plan' against the passed in directory
    :param directory: A directory with the terraform config
    :param skip_iam: A boolean to skip iam changes check
    :param print_diff: A boolean to print diffs for changes
    :param with_colors: A boolean to print diffs using ansi colors
    :param additional_envvars: A dictionary representing additional environment variables to supply
    :return: One of the WrapperExitCode enums
    """

    arguments = ['-input=false']

    if not with_colors:
        arguments.append('-no-color')

    pr_checker_arguments = ['-var-file=pr_checker.tfvars']
    wrapper_py = os.path.join(SCRIPT_DIR, 'tf')

    command_env = os.environ.copy()
    command_env.update(additional_envvars)

    # We're using --no-resolve-envvars here because we've already resolved the environment variables in
    # the constructor. We are then passing in those environment variables explicitly in the
    # execute_command call below.
    init_exit_code, init_stdout = execute_command(
        [wrapper_py, "--no-resolve-envvars", directory, 'init'] + arguments,
        print_output=False,
        env=command_env
    )

    if init_exit_code:
        print("'terraform init' failed for {}:\n{}".format(directory, "".join(init_stdout)))
        return WrapperExitCode.TERRAFORM_FAILURE

    if os.path.exists('%s/%s' % (directory, 'pr_checker.tfvars')):
        arguments += pr_checker_arguments

    # We're using --no-resolve-envvars here because we've already resolved the environment variables in
    # the constructor. We are then passing in those environment variables explicitly in the
    # execute_command call below.
    plan_exit_code, plan_stdout = execute_command(
        [wrapper_py, "--no-resolve-envvars", directory, 'plan', '-detailed-exitcode', '-lock=false'] + arguments,
        print_output=False,
        env=command_env
    )

    if plan_exit_code == PlanExitCode.FAILURE.value:
        print(
            "'terraform plan' failed for {}:\n{}".format(
                directory,
                "".join(plan_stdout)
            )
        )
        return WrapperExitCode.TERRAFORM_FAILURE
    elif print_diff and plan_exit_code == PlanExitCode.SUCCESS_WITH_DIFF.value:
        print(
            "'terraform plan' generated following changes for {}:\n{}".format(
                directory,
                "".join(plan_stdout)
            )
        )

    # Check output for IAM changes
    if skip_iam:
        return WrapperExitCode.SUCCESS
    return check_for_iam_changes(plan_stdout, directory)


def check_for_iam_changes(stdout: List[str], directory: str) -> WrapperExitCode:
    """
    Look for the IAM changes in stdout
    :param stdout: Stdout of the 'plan' command for IAM changes lookup
    :param directory: A directory with the config to be printed along with the notification about IAM changes
    :return: One of the WrapperExitCode enums
    """
    iam_resources = []
    reached_actual_changes = False

    for line in stdout:
        # TODO: Refactor this to use JSON at some point
        # https://www.terraform.io/docs/internals/json-format.html
        if not reached_actual_changes and TERRAFORM_PERFORM_ACTIONS in line:
            reached_actual_changes = True

        if reached_actual_changes:
            match = re.search(IAM_POLICY_RE, line)
            if match:
                iam_resources.append(match.group(0))

    if iam_resources:
        print("Detected IAM resources modified in {0}:\n{1}\n".format(directory, "\n".join(iam_resources)))
        return WrapperExitCode.IAM_CHANGES

    return WrapperExitCode.SUCCESS


def execute_init_and_plan(
        regular_directories: List[str],
        symlinked_directories: List[str],
        skip_iam: bool,
        print_diff: bool,
        with_colors: bool,
        num_parallel: int
) -> Tuple[List[str], List[str]]:
    """
    Execute functions concurrently
    :param regular_directories: Non symlinked directories to be processed in parallel
    :param symlinked_directories: Symlinked directories to be processed one-by-one
    :param skip_iam: A boolean to skip iam changes check
    :param print_diff: A boolean to control printing diffs for changes
    :param with_colors: A boolean to control printing diffs with ansi colors
    :param num_parallel: Number of workers to use
    :return: Tuple of two lists, the first list is of directories with terraform failures, the second is of
    directories with IAM failures.
    """
    # Lookup everyone's environment variables at once so that we get the benefit of Parameter Store calls
    # being cached.
    directory_to_envvars = {
        directory: resolve_envvars(parse_wrapper_configs(find_wrapper_config_files(directory)).envvars)
        for directory in regular_directories + symlinked_directories
    }

    # Track directories with IAM issues or errors.
    directories_with_iam_changes = []
    directories_with_errors = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_parallel) as executor:
        future_exec = {}
        for directory in regular_directories:
            envvars = directory_to_envvars[directory]
            future = executor.submit(
                init_and_plan_directory,
                directory,
                skip_iam,
                print_diff,
                with_colors,
                envvars,
            )
            future_exec[future] = directory

        for future in concurrent.futures.as_completed(future_exec):
            exit_code = future.result()
            directory = future_exec[future]
            if exit_code == WrapperExitCode.IAM_CHANGES:
                directories_with_iam_changes.append(directory)
            if exit_code == WrapperExitCode.TERRAFORM_FAILURE:
                directories_with_errors.append(directory)

    for directory in symlinked_directories:
        envvars = directory_to_envvars[directory]
        exit_code = init_and_plan_directory(directory, skip_iam, print_diff, with_colors, envvars)
        if exit_code == WrapperExitCode.IAM_CHANGES:
            directories_with_iam_changes.append(directory)
        if exit_code == WrapperExitCode.TERRAFORM_FAILURE:
            directories_with_errors.append(directory)

    return directories_with_errors, directories_with_iam_changes


def get_modified_subdirectories(plan_path: str) -> Tuple[List[str], List[str]]:
    """
    Use Git to find which directories have changed and return a list of them
    A changed directory is a directory that has files that changed, or has symlinks to files that
    changed, or uses a module that changed
    :param plan_path: root to search for changed subdirectories from
    :return: A list of "regular" directories and directories that are symlinks which have changed
    """
    changed_files = get_git_changed_files(plan_path)
    root = get_git_root(plan_path)

    module_usage_graph = get_module_usage_graph(root)
    file_graph = get_file_graph(root)
    auto_vars_usage_graph = get_auto_var_usage_graph(root)

    graph = compose_all([module_usage_graph, file_graph, auto_vars_usage_graph])

    directories_to_check = set()
    for path in changed_files:
        if path not in graph.nodes:
            continue

        affected_directories = descendants(graph, path)

        # filter out directories that we shouldn't run plan for
        affected_directories = [
            affected_dir
            for affected_dir in affected_directories
            if should_run_plan_for(affected_dir, plan_path)
        ]

        if affected_directories:
            directories_to_check.update(affected_directories)

    # group directories into regular and symlink paths so downstream code can treat them differently
    regular_directories = [
        directory for directory in directories_to_check
        if not os.path.islink(directory)
    ]

    symlinked_directories = [
        directory for directory in directories_to_check
        if os.path.islink(directory)
    ]

    return regular_directories, symlinked_directories


def should_run_plan_for(directory: str, plan_path: str) -> bool:
    """
    Return True if we are allowed to run plan for a given directory
    :param directory: The directory to check if we should run plan there
    :param plan_path: The root used for plan_check. All directories outside of this dir shouldn't run plan
    """

    # We don't want to run plan if the directory doesn't exist anymore (it could have been deleted)
    # Or if there are no TF files in it
    # Or if plan has been disabled for that dir in .tf_wrapper
    # Or if the directory is outside of the path arg used to run this command
    return (
        os.path.commonpath([plan_path, directory]) == plan_path
        and os.path.exists(directory)
        and os.path.isdir(directory)
        and is_plan_check_enabled(directory)
        and any(file.endswith('.tf') for file in os.listdir(directory))
    )


def is_plan_check_enabled(directory: str) -> bool:
    """Return True if plan check is enabled based on .tf_wrapper config"""
    wrapper_config_files = find_wrapper_config_files(path=os.path.abspath(directory))
    wrapper_config = parse_wrapper_configs(wrapper_config_files=wrapper_config_files)

    return wrapper_config.plan_check


def main():
    version_check(current_version=__version__)
    arguments = docopt(__doc__, version="Terrawrap %s" % __version__)

    skip_iam = arguments['--skip-iam']
    modified_only = arguments['--modified-only']
    print_diff = arguments['--print-diff']
    with_colors = arguments['--with-colors']

    try:
        num_parallel = int(arguments['--parallel-jobs'])
    except ValueError:
        raise RuntimeError(
            "Unable to parse number of parallel jobs, '%s' is not an integer." % arguments['--parallel-jobs']
        )

    # Get the directory with Terraform config passed to this script as an argument
    config_dir = arguments['<path>']
    if not os.path.isabs(config_dir):
        config_dir = os.path.abspath(os.path.join(CURRENT_DIRECTORY, config_dir))

    if modified_only:
        regular_directories, symlinked_directories = get_modified_subdirectories(config_dir)
    else:
        regular_directories, symlinked_directories = get_subdirectories(config_dir)

    print(
        "Running plan check for regular directories %s and symlink directories %s"
        % (regular_directories, symlinked_directories)
    )

    failing_directories, iam_directories = execute_init_and_plan(
        regular_directories, symlinked_directories, skip_iam, print_diff, with_colors, num_parallel
    )

    if failing_directories:
        print("General Terraform failures detected. Check the output above and please resolve any issues.")
        print("Directories with Terraform failures:")
        for directory in failing_directories:
            print("\t%s" % directory)

    if iam_directories:
        print(
            "\nIAM changes have been detected. If this is intended, please contact the DevOps team to merge."
        )
        print("Directories with IAM changes:")
        for directory in iam_directories:
            print("\t%s" % directory)

    exit(1 if iam_directories or failing_directories else 0)


if __name__ == '__main__':
    main()
