#!/bin/bash

# NOTES: requirements
# gcloud components install kubectl
# gcloud auth configure-docker
#

set -e

ARGS=$@

VERSION=
PROG=$(basename $0)
DO_TEST=1
STAGING_ONLY=
ROLLBACK=

ROOTDIR=$(git rev-parse --show-toplevel)
if [ "$PWD" != "$ROOTDIR" ]; then
    echo "ERROR: current dir is not the clone's root directory"
    exit 1
fi

check_rc() {
    RC=$1
    CMD=$2
    if [ "$RC" -ne 0 ]; then
        echo "ERROR: Command \'${CMD}\' failed - deploy aborted"
        exit 1
    fi
}

get_cluster_location() {
    LOCAL_CLUSTER_NAME=$1
    echo $(gcloud container clusters list | grep "$LOCAL_CLUSTER_NAME " | awk '{ print $2 }')
}

get_ip() {
    # Selector must be the string 'live' or 'staging', to retrieve the public IP of the corresponding cluster
    SELECTOR=$1
    N=$(pymconfig --name)
    echo $(kubectl get ingress | grep "${N}-${SELECTOR}-ingress " | awk '{ print $3 }')
}

usage() {
    cat << EOF
USAGE: $PROG [--no-test] [--rollback] <version>

Deploy a Docker image to GKE in two steps, first to a staging service, then to
a live service if the tests against staging all passed.

OPTIONS:
  --no-test         Skip tests, but still deploy to staging first, then live.
  --staging-only    Deploy only to staging cluster, not to the live one.
  --live-ip         Print the url of the live environment and exit.
  --staging-ip      Print the url of the staging environment and exit.
  --rollback        Rollback to the given version (an extra fast deploy, without
                    testing or waiting for resource readiness)
  --debug           Debug verbosity.
  --help            This text.

EOF
}

parse_args() {
    while [ "$1" != "" ]; do
        case $1 in
            "--no-test")       export DO_TEST=;;
            "--staging-only")  export STAGING_ONLY=1;;
            "--rollback")      export ROLLBACK=1;;
            "--live-ip")       get_ip 'live'; exit 0;;
            "--staging-ip")    get_ip 'staging'; exit 0;;
            "--debug")         set -x; DEBUG='true';;
            "-h" | "--help")   usage; exit 0;;
            *)                 VERSION=$1;;
        esac
        shift
    done
}

parse_args $ARGS

if [ -z "$VERSION" ]; then
    echo "ERROR: please specify a docker image version"
    exit -1
fi

APP_NAME=$(pymconfig --name)
DOMAIN_STAGING=$(pymconfig --staging-host)
DOMAIN_LIVE=$(pymconfig --live-host)
DOCKER_ROOT_REPO=$(pymconfig --docker-repo)
DOCKER_REPO=$DOCKER_ROOT_REPO/$APP_NAME
GCP_PROJECT=$DOCKER_ROOT_REPO
GCP_REGION=$(pymconfig --gcp-region)
MEMORY_LIMIT=$(pymconfig --memory-limit | tail -n 1)
NAME_STAGING=${APP_NAME}-staging
NAME_LIVE=${APP_NAME}-live

# Stop previous versions?
# gcloud config set app/stop_previous_version true

do_create_namespace() {
    NAMESPACE=$1
    CNT=$(kubectl get namespace | grep "$NAMESPACE " | wc -l)
    if [ $CNT -eq 0 ]; then
        echo "=> Creating namespace $NAMESPACE"
        kubectl create namespace $NAMESPACE
    fi
    echo "=> Using namespace $NAMESPACE"
    kubectl config set-context --current --namespace=$NAMESPACE
}

do_apply_deployment() {
    # Deploy the container to run on port 8080
    LOCAL_NAME=$1
    NAMESPACE=$2

    CONTAINER_NAME=container-$APP_NAME

    echo "=> Creating pod config"
    DEP_FILE=.pym/deployment-$LOCAL_NAME.yaml
    cat <<EOF > $DEP_FILE
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: $LOCAL_NAME
  namespace: $NAMESPACE
  labels:
    app: $LOCAL_NAME
    version: $VERSION
spec:
  selector:
    matchLabels:
      run: $LOCAL_NAME
      app: $LOCAL_NAME
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      labels:
        run: $LOCAL_NAME
        app: $LOCAL_NAME
        version: $VERSION
    spec:
      terminationGracePeriodSeconds: 180
      containers:
      - name: $CONTAINER_NAME
        image: gcr.io/$DOCKER_REPO:${VERSION}
        resources:
          requests:
            memory: "${MEMORY_LIMIT}Mi"
          limits:
            memory: "${MEMORY_LIMIT}Mi"
        readinessProbe:
          initialDelaySeconds: 150
          periodSeconds: 10
          failureThreshold: 3
          timeoutSeconds: 5
          httpGet:
            path: /ping
            port: 8080
        livenessProbe:
          initialDelaySeconds: 150
          periodSeconds: 10
          failureThreshold: 3
          timeoutSeconds: 5
          httpGet:
            path: /ping
            port: 8080
        lifecycle:
          preStop:
            exec:
              command: ["/bin/bash", "-c", "sleep 30"]
        ports:
        - containerPort: 8080
        env:
        - name: PORT
          value: "8080"
        - name: VERSION
          value: "${VERSION}"
        - name: PYM_ENV
          value: "${LOCAL_NAME}"
EOF

    for VAR in $(pymconfig --env-secrets)
    do
        echo "   Adding $VAR"
        # Notice '-f 2-' in cut to only cut left of the 1st occurence of =
        VALUE=$(env | grep "^$VAR=" | cut -d '=' -f 2-)
        if [ -z "$VALUE" ]; then
            echo "ERROR: variable $VAR has no value in env"
            exit 1
        fi
        echo "        - name: $VAR" >> $DEP_FILE
        echo "          value: \"$VALUE\"" >> $DEP_FILE
    done

    echo "=> Checking if deployment exists or is new..."
    IS_DEPLOYED=$(kubectl get deployment | grep "$LOCAL_NAME " | wc -l)

    if [ "$IS_DEPLOYED" -eq "1" ]; then
        echo "=> Deployment already exists. Applying change..."
        kubectl apply -f $DEP_FILE --namespace=$NAMESPACE
        check_rc $? 'kubectl apply'
    else
        echo "=> Deployment is new. Creating it..."
        kubectl create --save-config -f $DEP_FILE --namespace=$NAMESPACE
        check_rc $? 'kubectl create'
    fi
}

do_apply_ingress() {
    # Deploy an ingress opening port 80 and 443, redirected to the container's port 8080
    # Accept HTTPS traffic on port 443 via a managed SSL certificate
    LOCAL_NAME=$1
    LOCAL_DOMAIN=$2
    NAMESPACE=$3

    if [ -z "$LOCAL_DOMAIN" ]; then
        echo "BUG!! set LOCAL_DOMAIN please!"
        exit -1
    fi

    IS_DEPLOYED=$(kubectl get ingress -o wide | grep "$LOCAL_NAME " | wc -l)

    if [ "$IS_DEPLOYED" -eq "1" ]; then
        echo "=> The ingress already exists"
        return
    fi

    # Create a managed cert
    # See: https://cloud.google.com/kubernetes-engine/docs/how-to/managed-certs
    echo "=> Applying managed certificate (domain: ${LOCAL_DOMAIN})..."
    YAML_FILE=.pym/cert-$LOCAL_NAME.yaml
    cat <<EOF > $YAML_FILE
apiVersion: networking.gke.io/v1beta1
kind: ManagedCertificate
metadata:
  name: $LOCAL_DOMAIN-certificate
spec:
  domains:
    - $LOCAL_DOMAIN
EOF
    kubectl apply -f $YAML_FILE --namespace=$NAMESPACE
    check_rc $? 'kubectl apply'

    # NOTE: waiting for cert is only interesting the first time it's generated,
    # and even then, the cert won't become active until the ingress starts
    # using it, and then with a delay of up to 15 min. So waiting won't help us
    # get around that downtime delay...
    # See: https://stackoverflow.com/questions/53886750/google-managed-ssl-certificate-stuck-on-failed-not-visible
    # NOTE: leaving his code here as a reminder to self on how to check certificate status
    #
    # # Wait for certificate to be provisioned
    # echo "=> Waiting for certificate to be provisioned"
    # STATUS='Provisioning'
    # while [ "$STATUS" != "Active" ]; do
    #     sleep 1;
    #     STATUS=$(kubectl get managedcertificates ${LOCAL_DOMAIN}-certificate -o jsonpath='{..certificateStatus}' | tr -s '[[:space:]]' '\n')
    #     echo -n '*'
    # done
    # echo ''

    # Configure the timeout and connection draining of the GCP load balancer
    # See: https://cloud.google.com/kubernetes-engine/docs/how-to/configure-backend-service#creating_a_backendconfig
    echo "=> Applying backend..."
    YAML_FILE=.pym/backend-$LOCAL_NAME.yaml
    cat <<EOF > $YAML_FILE
apiVersion: cloud.google.com/v1beta1
kind: BackendConfig
metadata:
  name: $LOCAL_NAME-backend
spec:
  timeoutSec: 50
  connectionDraining:
    drainingTimeoutSec: 60
EOF
    kubectl apply -f $YAML_FILE --namespace=$NAMESPACE
    check_rc $? 'kubectl apply'

    # Proceed setting up the ingress
    echo "=> Applying service..."
    YAML_FILE=.pym/nodeport-$LOCAL_NAME.yaml
    cat <<EOF > $YAML_FILE
apiVersion: v1
kind: Service
metadata:
  name: $LOCAL_NAME-service
  namespace: $NAMESPACE
  labels:
    app: $LOCAL_NAME
    version: $VERSION
  annotations:
    beta.cloud.google.com/backend-config: '{"ports": {"8080":"$LOCAL_NAME-backend"}}'
spec:
  type: NodePort
  selector:
    app: $LOCAL_NAME
    run: $LOCAL_NAME
  ports:
  - port: 8080
    protocol: TCP
    targetPort: 8080
EOF
    kubectl apply -f $YAML_FILE --namespace=$NAMESPACE
    check_rc $? 'kubectl apply'

    # Verify the backend settings with:
    # gcloud compute backend-services list -> find the backend
    # gcloud compute backend-services describe <BACKEND_NAME> --global

    # Note: relevant articles are:
    # https://cloud.google.com/kubernetes-engine/docs/how-to/managed-certs
    # https://cloud.google.com/kubernetes-engine/docs/concepts/ingress
    echo "=> Applying ingress..."
    YAML_FILE=.pym/ingress-$LOCAL_NAME.yaml
    cat <<EOF > $YAML_FILE
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: $LOCAL_NAME-ingress
  namespace: $NAMESPACE
  annotations:
    kubernetes.io/ingress.global-static-ip-name: $LOCAL_NAME-ip
    networking.gke.io/managed-certificates: $LOCAL_DOMAIN-certificate
  labels:
    app: $LOCAL_NAME
    version: $VERSION
spec:
  backend:
    serviceName: $LOCAL_NAME-service
    servicePort: 8080
EOF
    kubectl apply -f $YAML_FILE --namespace=$NAMESPACE
    check_rc $? 'kubectl apply'
}

do_wait_for_rollout() {
    # Selector must be the string 'live' or 'staging'
    LOCAL_NAME=$1
    SELECTOR=$2
    NAMESPACE=$3

    # A deployment is rolling out: wait till we can reach it on the staging/live IP
    echo "=> Waiting for deployment rollout..."
    kubectl rollout status deployment/$LOCAL_NAME
    check_rc $? 'kubectl rollout'

    # Then wait for rollout to be complete
    echo "=> Waiting for all pods to run new image..."
    COUNT=2
    while [ "$COUNT" -ne 1 ]; do
        sleep 1
        echo -n '*'
        COUNT=$(kubectl get pods -o jsonpath='{range .items[*]}{@.metadata.labels.version}{" "}{@..phase}{"\n"}{end}' --namespace $NAMESPACE | grep Running | sort | uniq | wc -l)
    done
    echo ''

    # Wait for external IP to be assigned
    echo "=> Waiting for external IP..."
    IP=""
    while [ -z "$IP" ]; do
        sleep 1
        IP=$(get_ip $SELECTOR)
    done
    echo ''

    echo "=> $LOCAL_NAME is now live at $IP"
}

#
# General settings
#

if [ ! -z "$ROLLBACK" ]; then
    echo ""
    echo "***********************************************"
    echo "*                                             *"
    echo "* ROLLING BACK TO VERSION $VERSION"
    echo "*                                             *"
    echo "***********************************************"
    echo ""
else
    echo ""
    echo "***********************************************"
    echo "*                                             *"
    echo "* DEPLOYING VERSION $VERSION"
    echo "*                                             *"
    echo "***********************************************"
    echo ""
fi

echo "=> Creating tmp dir .pym/"
mkdir -p .pym

# ------------------------------------------------------------------------------
#
# DEPLOY TO STAGING CLUSTER
#
# The staging cluster should have only 1 zone, have auto-scaling with minimum
# instance count = 1 and maximum = 2
#
# ------------------------------------------------------------------------------

echo "=> Configuring gcloud..."
gcloud config set project $GCP_PROJECT
gcloud config set compute/region $GCP_REGION

echo "=> Using cluster $NAME_STAGING"
gcloud config set container/cluster $NAME_STAGING

# Using --zone since the staging environment is supposed to be in one zone only
LOCATION=$(get_cluster_location $NAME_STAGING)
echo "=> Deploying to zone $LOCATION"
gcloud container clusters get-credentials $NAME_STAGING --zone $LOCATION --project $GCP_PROJECT

# And create/apply deployment, service and ingress, when needed

echo ""
echo "=> Deploying $VERSION to $NAME_STAGING"

do_create_namespace $APP_NAME
do_apply_deployment $NAME_STAGING $APP_NAME
do_apply_ingress $NAME_STAGING $DOMAIN_STAGING $APP_NAME
if [ -z "$ROLLBACK" ]; then
    do_wait_for_rollout $NAME_STAGING 'staging' $APP_NAME
else
    echo "ROLLBACK: not waiting for staging to be ready"
fi

# ------------------------------------------------------------------------------
#
# RUN ACCEPTANCE TESTS
#
# ------------------------------------------------------------------------------

cd $ROOTDIR
if [ ! -z "$DO_TEST" ]; then
    if [ -z "$ROLLBACK" ]; then
        echo ""
        echo "=> Executing tests against $IP:$TEST_PORT"
        TEST_PORT=$(pymconfig --staging-port)
        pymtest --host $IP --port $TEST_PORT --no-ssl-check

        RC=$?
        if [ "$RC" -ne 0 ]; then
            echo "ERROR: Acceptance tests failed against $IP - deploy aborted"
            exit 1
        fi

    else
        echo "ROLLBACK: skip staging tests"
    fi
fi

# ------------------------------------------------------------------------------
#
# DEPLOY TO LIVE CLUSTER
#
# ------------------------------------------------------------------------------

if [ ! -z "$STAGING_ONLY" ]; then
    echo "=> Not deploying to live cluster"
else

    echo "=> Using cluster $NAME_LIVE"
    gcloud config set container/cluster $NAME_LIVE

    # Using --region since the live environment is supposed to be in a regional one
    LOCATION=$(get_cluster_location $NAME_LIVE)
    echo "=> Deploying to region $LOCATION"
    gcloud container clusters get-credentials $NAME_LIVE --region $LOCATION --project $GCP_PROJECT

    echo ""
    echo "=> Deploying $VERSION to $NAME_LIVE"

    do_create_namespace $APP_NAME
    do_apply_deployment $NAME_LIVE $APP_NAME
    do_apply_ingress $NAME_LIVE $DOMAIN_LIVE $APP_NAME
    do_wait_for_rollout $NAME_LIVE 'live' $APP_NAME
fi
