"""
SQS_Task_Token_Listener Lambda in Step Functions
Listens to SQS queue for TaskTokens and calls StepFunctions
"""
import json
import logging
import os
import re
import boto3

logger = logging.getLogger(__name__)

step_functions_client = boto3.client(service_name='stepfunctions')
dynamo_db_client = boto3.client('dynamodb')
textract = boto3.client('textract')
s3 = boto3.client('s3')


def lambda_handler(event, context):

    log_level = os.environ.get('LOG_LEVEL', 'INFO')
    logger.setLevel(log_level)
    logger.info(json.dumps(event))

    token_store_ddb = os.environ.get('TOKEN_STORE_DDB', None)
    if not token_store_ddb:
        raise Exception("no TOKEN_STORE_DDB set")

    output_bucket = os.environ.get('OUTPUT_BUCKET')
    output_bucket_prefix = os.environ.get('OUTPUT_BUCKET_PREFIX')

    logger.info(f"LOG_LEVEL: {log_level} \n \
                TOKEN_STORE_DDB: {token_store_ddb} \n \
                OUTPUT_BUCKET: {output_bucket} \n \
                OUTPUT_BUCKET_PREFIX: {output_bucket_prefix}")

    for record in event['Records']:
        event_source = record["eventSource"]
        if event_source == "aws:sqs":
            body = json.loads(record["body"])
            message = json.loads(body['Message'])
            logger.debug(f"message: {message}")

            input_bucket_key = message['DocumentLocation']['S3ObjectName']
            s3_filename, s3_suffix = os.path.splitext(input_bucket_key)
            output_bucket_prefix = re.sub('/', '', output_bucket_prefix)

            job_status = message['Status']
            job_tag = message['JobTag']
            job_id = message['JobId']

            logger.info(f"job_tag: '{job_tag}'")

            ddb_response = dynamo_db_client.get_item(
                TableName=token_store_ddb, Key={"ID": {
                    'S': job_tag
                }})
            logger.debug(f"ddb_response: {ddb_response}")
            task_token = ddb_response['Item']['Token']['S']
            test_run_id = ""
            if 'test_run_id' in ddb_response['Item']:
                test_run_id = ddb_response['Item']['test_run_id']['S']
            if test_run_id:
                output_bucket_key = output_bucket_prefix + "/" + test_run_id + "/" + s3_filename + ".json"
            else:
                output_bucket_key = output_bucket_prefix + "/" + s3_filename + ".json"

            if job_status == 'SUCCEEDED':
                extraArgs = {}
                result_value = {"Blocks": []}
                while True:
                    textract_results = textract.get_document_analysis(
                        JobId=job_id, **extraArgs)
                    result_value['Blocks'].extend(textract_results['Blocks'])
                    if 'NextToken' in textract_results:
                        extraArgs['NextToken'] = textract_results['NextToken']
                    else:
                        break

                result_location = output_bucket + "/" + output_bucket_key
                message['textract_result_location'] = result_location

                s3.put_object(Body=bytes(
                    json.dumps(result_value, indent=4).encode('UTF-8')),
                              Bucket=output_bucket,
                              Key=output_bucket_key)

                step_functions_client.send_task_success(
                    taskToken=task_token, output=json.dumps(message))
            else:
                step_functions_client.send_task_failure(taskToken=task_token,
                                                        error=job_status,
                                                        cause="see output")
        else:
            logger.error(f"unsupported event_source: {event_source}")
