#!/bin/bash
#
# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Top-level harness which deploys a ready-to-use Hadoop cluster including
# starting GCE VMs, installing Hadoop binaries, configuring HDFS, installing
# GHFS libraries, and configuring GHFS.
#
# Usage: ./ghadoop [deploy|delete] [optional: <paths to env files>]...

# Prints the usage for this script and exits.
function print_usage() {
cat <<EOF
      Usage: ./ghadoop [deploy|delete] [optional: <paths to env files>]...
             ./ghadoop deploy foo/ghadoop_env.sh  # Specify a config file.
             ./ghadoop delete
             ./ghadoop delete foo/ghadoop_env.sh foo/my_env_overrides.sh

             The environment variable files are sourced in order, so variables
             present in multiple config files will end up defined by the last
             appearance in a specified config file.
EOF
exit 1
}

# Generates a string which simply runs the filename denoted by $1.sh as root
# while redirecting its stdout and stderr to out-$1 and err-$1, respectively.
# Prints the string to stdout.
function get_remote_cmd() {
  echo "sudo ./$1.sh > $1.stdout 2>$1.stderr"
}

# Simple wrapper around "echo" so that it's easy to add log messages with a
# date/time prefix.
function loginfo() {
  echo "`date`: ${@}"
}

# Given $1 describing the command to confirm (deploy|delete), prints and reads
# a confirmation prompt from the console.
function prompt_confirmation() {
  local msg="$1 cluster with following settings?
      CONFIGBUCKET='${CONFIGBUCKET?}'
      PROJECT='${PROJECT?}'
      GCE_ZONE='${GCE_ZONE?}'
      PREFIX='${PREFIX?}'
      NUM_WORKERS=${NUM_WORKERS?}
      NAMENODE_HOSTNAME='${NAMENODE_HOSTNAME}'
      WORKERS='${WORKERS[@]}'
      "
  if (( ${USE_ATTACHED_PDS} )); then
    msg="${msg}NAMENODE_ATTACHED_PD='${NAMENODE_ATTACHED_PD}'
      WORKER_ATTACHED_PDS='${WORKER_ATTACHED_PDS[@]}'
      "
  fi
  msg="${msg}(y/n) "
  read -p "${msg}" PROMPT_RESPONSE
  if [[ ${PROMPT_RESPONSE} != 'y' ]]; then
    loginfo "Aborting command '$1', exiting..."
    exit 1
  fi

  if [[ ${EUID} -eq 0 ]]; then
    msg='Are you sure you want to run the command as root? (y/n)'
    read -p "${msg}" PROMPT_RESPONSE
    if [[ ${PROMPT_RESPONSE} != 'y' ]]; then
      loginfo "Aborting command '$1', exiting..."
      exit 1
    fi
  fi
}

function get_gcutil_cmd() {
  local gcutil_cmd=$1
  local gcutil_cmd_flags=(--zone=${GCE_ZONE})

  local cmd_args=${@:2}

  # Only add the --permit_root_ssh flag if running as root.
  local maybe_permit_root=''
  if [[ ${EUID} -eq 0 ]]; then
    maybe_permit_root='--permit_root_ssh'
  fi

  # --max_wait_time and --sleep_between_polls are not applicable to gcutil ssh
  # or gcutil push or gcutil pull.
  if [[ "${gcutil_cmd}" =~ ^(pull|push|ssh)$ ]]; then
    local maybe_wait_and_poll=''
  else
    local maybe_wait_and_poll=" \
        --max_wait_time=${GCUTIL_TIMEOUT_SECONDS} \
        --sleep_between_polls=${GCUTIL_POLL_INTERVAL_SECONDS}"
  fi

  local cmd_base="gcutil ${maybe_permit_root} ${maybe_wait_and_poll} \
      --project=${PROJECT} \
      ${gcutil_cmd}"
  echo ${cmd_base} "${gcutil_cmd_flags[@]}" ${cmd_args}
}

# The gcutil command with global flags and some common command flags to use for
# all GCE operations.
function run_gcutil_cmd() {
  local gcutil_cmd=$1
  local full_cmd=$(get_gcutil_cmd ${@})
  loginfo "Running ${full_cmd}"
  ${full_cmd}

  local exitcode=$?
  if (( ${exitcode} != 0 )); then
    loginfo "Exited ${exitcode} : ${full_cmd}" >> ${DEBUG_FILE}
    if [[ "${gcutil_cmd}" == 'ssh' ]] && (( ${INITIAL_SSH_SUCCEEDED} )); then
      loginfo "Fetching on-VM failure logs with cmd ${full_cmd}"
      echo "tail *.stderr" | ${full_cmd} >> ${DEBUG_FILE}
    fi
  fi
  return ${exitcode}
}

# Checks for obvious issues like missing "required" fields.
function run_sanity_checks() {
  if [[ -z "${CONFIGBUCKET}" ]]; then
    loginfo 'CONFIGBUCKET must be provided'
    exit 1
  fi

  if [[ -z "${PROJECT}" ]]; then
    loginfo 'PROJECT must be provided'
    exit 1
  fi

  # Make sure groupings of shell scripts for running on VMs are defined.
  if (( ${#COMMAND_GROUPS[@]} <= 0 )); then
    loginfo 'COMMAND_GROUPS must be non-empty.'
    exit 1
  fi

  # Make sure the series of steps to run on VMs are defined.
  if (( ${#COMMAND_STEPS[@]} <= 0 )); then
    loginfo 'COMMAND_STEPS must be non-empty.'
    exit 1
  fi

  # Make sure the hostnames all abide by the PREFIX.
  for NODE in ${WORKERS[@]} ${NAMENODE_HOSTNAME?}; do
    if ! [[ "${NODE}" =~ ^${PREFIX}.* ]]; then
      loginfo "Error: VM instance name ${NODE} doesn't start with ${PREFIX}."
      exit 1
    fi
  done

  # Check for the right number of elements in WORKERS.
  if (( ${#WORKERS[@]} != ${NUM_WORKERS?} )); then
    loginfo "WORKERS must contain ${NUM_WORKERS} elements; got ${#WORKERS[@]}"
    exit 1
  fi

  # Check for disk names being defined if USE_ATTACHED_PDS is true.
  if (( ${USE_ATTACHED_PDS} )); then
    if (( ${#WORKER_ATTACHED_PDS[@]} != ${NUM_WORKERS?} )); then
      local actual=${#WORKER_ATTACHED_PDS[@]}
      local varname='WORKER_ATTACHED_PDS'
      loginfo "${varname} has ${actual} elements, expected ${NUM_WORKERS}"
      exit 1
    fi
    if [[ -z "${NAMENODE_ATTACHED_PD}" ]]; then
      loginfo 'NAMENODE_ATTACHED_PD must be defined since USE_ATTACHED_PDS==1.'
      exit 1
    fi
  fi

  # Make sure HDFS is enabled if it is the default FS
  if [[ "${DEFAULT_FS}" == 'hdfs' ]] && (( ! "${ENABLE_HDFS}" )); then
    loginfo 'ENABLE_HDFS must 1 if DEFAULT_FS is hdfs.'
    exit 1
  fi

  local scheme=${HADOOP_TARBALL_URI%%://*}
  # Make sure HADOOP_TARBALL_URI uses supported scheme
  if [[ ! "${scheme}" =~ ^(gs|https?)$ ]] ; then
    loginfo "Unsupported scheme: \"$scheme\" in" \
        "HADOOP_TARBALL_URI: ${HADOOP_TARBALL_URI}."
    exit 1
  fi

  return 0
}

# Checks for more heavyweight but obvious issues like CONFIGBUCKET
# inaccessibility prior to turning on any VMs.
function validate_deployment_settings() {
  # Perform gsutil checks last, because they are slow.
  loginfo "Checking for existence of gs://${CONFIGBUCKET}..."
  gsutil ls -b gs://${CONFIGBUCKET}

  # Catch the exitcode so that we can provide more user-friendly error messages
  # while still propagating the return value out for consolidated error-trap
  # handling.
  local exitcode=$?
  if (( ${exitcode} != 0 )); then
    loginfo "Failed to access bucket gs://${CONFIGBUCKET}."
    loginfo 'Please make sure the bucket exists and is accessible with gsutil.'
    return ${exitcode}
  fi

  # Make sure HADOOP_TARBALL_URI exists if it st
  local scheme=${HADOOP_TARBALL_URI%%://*}
  if [[ "${scheme}" == 'gs' ]]; then
    loginfo "Checking for existence of ${HADOOP_TARBALL_URI}..."
    gsutil stat ${HADOOP_TARBALL_URI}
    local exitcode=$?
    if (( ${exitcode} != 0 )); then
      loginfo "Failed to find file ${HADOOP_TARBALL_URI}."
      loginfo 'Please make sure it exists and is accessible with gsutil.'
      return ${exitcode}
    fi
  fi

  return 0
}

# Handler for errors occuring during the deployment to print useful info before
# exiting.
function handle_error() {
  # Save the error code responsible for the trap.
  local errcode=$?

  # Wait for remaining async things to finish, otherwise our error message may
  # get lost among other logspam.
  wait
  loginfo "Command failed: '${BASH_COMMAND}' on line ${BASH_LINENO[0]}."
  loginfo "Exit code of failed command: ${errcode}"
  if [[ -a ${DEBUG_FILE} ]]; then
    loginfo "Detailed debug info available in file: ${DEBUG_FILE}"
  fi
  loginfo 'Check console output for error messages and/or retry your command.'
  exit ${errcode}
}

# In the case of a single-node setup, we expect $WORKERS and $NAMENODE_HOSTNAME
# to refer to the same thing, so some logic must call this function to avoid
# duplicating certain steps (e.g. addinstance/deleteinsatnce).
function is_single_node_setup() {
  if [ ${#WORKERS[@]} == 1 ] &&
     [ "${WORKERS[0]}" == "${NAMENODE_HOSTNAME}" ]; then
    true
  else
    false
  fi
}

# Repeatedly try to ssh into node until success or limit is reached.
# Will fail if node takes too long.
function wait_for_ssh() {
  local node=$1
  local max_attempts=5
  local is_sshable="run_gcutil_cmd ssh ${node} exit 0"
  local sleep_time=${GCUTIL_POLL_INTERVAL_SECONDS}
  for (( i=0; i < ${max_attempts}; i++ )); do
    if ${is_sshable}; then
      return 0
    else
      # Save the error code responsible for the trap.
      local errcode=$?
      loginfo "'${node}' not yet sshable (${errcode}); sleeping ${sleep_time}."
      sleep ${sleep_time}
    fi
  done
  loginfo "Node '${node}' did not become ssh-able after ${max_attempts} attempts"
  return ${errcode}
}

# Use handle_error() for any errors in deployment commands.
trap handle_error ERR

loginfo 'Starting...'

# SCRIPT_DIR is the directory containing the partial scripts such as
# setup_hadoop_user.sh. It can be modified if the directory is not the same as
# the directory containing this script.
GHADOOP_DIR="`dirname $0`"
SCRIPT_DIR="${GHADOOP_DIR}/libexec"

# The temporary directory in which to place generated scripts for running
# on remote VMs. Can be modified if a different directory is prefereed.
SCRIPT_TMPDIR="/tmp/deploy-$(date +%Y%m%d-%H%M%S)"

# A file containing more detailed debug info, only created on failure.
DEBUG_FILE="${SCRIPT_TMPDIR}/debuginfo.txt"

# Check the primary command.
if [[ "$1" != 'deploy' ]] && [[ "$1" != 'delete' ]]; then
  print_usage
fi

# Check whether to use custom environment-variable file(s).
ENV_FILES=${GHADOOP_DIR}/ghadoop_env.sh
if (( ${#@} >= 2 )); then
  ENV_FILES=${@:2}
  loginfo "Using custom environment-variable file(s): ${ENV_FILES}"
else
  loginfo "Using default environment-variable file: ${ENV_FILES}"
fi

# Pull in all the environment variables from the files, or exit if we can't
# find one of them.
for ENV_FILE in ${ENV_FILES}; do
  if stat ${ENV_FILE} > /dev/null; then
    loginfo "Reading environment-variable file: ${ENV_FILE}"
    source ${ENV_FILE}
  else
    loginfo "Cannot find environment-variable file: ${ENV_FILE}"
    exit 1
  fi
done

evaluate_late_variable_bindings

# Even before asking for confirmation, do basic sanity checks.
run_sanity_checks

# If we're just deleting, run the delete and exit early without actually
# generating remote scripts.
if [[ "$1" == 'delete' ]]; then
  prompt_confirmation 'Delete'
  loginfo 'Deleting hadoop cluster...'
  if ! is_single_node_setup; then
    for NODE in ${WORKERS[@]}; do
      run_gcutil_cmd deleteinstance --force --delete_boot_pd ${NODE} &
    done
  fi
  run_gcutil_cmd deleteinstance --force --delete_boot_pd ${NAMENODE_HOSTNAME} &
  for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done
  loginfo 'Done deleting VMs!'

  # Optionally delete all the attached disks as well now that the instances
  # have been deleted.
  if (( ${USE_ATTACHED_PDS} && ${DELETE_ATTACHED_PDS_ON_DELETE} )); then
    if ! is_single_node_setup; then
      loginfo "Deleting attached worker disks: ${WORKER_ATTACHED_PDS[@]}"
      for DISK in ${WORKER_ATTACHED_PDS[@]}; do
        run_gcutil_cmd deletedisk --force ${DISK} &
      done
    fi
    loginfo "Deleting attached master disk: ${NAMENODE_ATTACHED_PD}"
    run_gcutil_cmd deletedisk --force ${NAMENODE_ATTACHED_PD} &
    for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done
    loginfo 'Done deleting disks!'
  fi
  exit 0
fi

# Before proceeding, check for confirmation from the user.
prompt_confirmation 'Deploy'

# Run the more heavyweight validation after the confirmation prompt.
validate_deployment_settings

loginfo "Using local tmp dir for flag-specified env variables: ${SCRIPT_TMPDIR}"
mkdir ${SCRIPT_TMPDIR}

# Dump a temporary script which "sets" all the env variables. This will act as
# preamble for all the other remote scripts.
cat <<EOF > ${SCRIPT_TMPDIR}/hadoop-env-setup.sh
#!/bin/bash
set -e
EOF
cat ${ENV_FILES} >> ${SCRIPT_TMPDIR}/hadoop-env-setup.sh
echo 'evaluate_late_variable_bindings' >> ${SCRIPT_TMPDIR}/hadoop-env-setup.sh

# Iterate over the COMMAND_GROUPS array defined by the ENV_FILES.
for COMMAND_GROUP in "${COMMAND_GROUPS[@]}"; do
  FILEGROUP="${COMMAND_GROUP%%:*}"
  loginfo "Generating command group '${FILEGROUP}':"
  GENFILE="${SCRIPT_TMPDIR}/${FILEGROUP}.sh"
  cat ${SCRIPT_TMPDIR}/hadoop-env-setup.sh > ${GENFILE}
  FILES=(${COMMAND_GROUP#*:})
  for FILE in "${FILES[@]}"; do
    loginfo "    Appending file '${FILE}'..."
    cat ${SCRIPT_DIR}/${FILE} >> ${GENFILE}
  done
  # Make it runnable.
  chmod 750 ${SCRIPT_TMPDIR}/${FILEGROUP}.sh
done

loginfo 'Done generating remote shell scripts.'

# Optionally create the disks to be attached to the VMs.
if (( ${USE_ATTACHED_PDS} && ${CREATE_ATTACHED_PDS_ON_DEPLOY} )); then
  if ! is_single_node_setup; then
    loginfo "Creating attached worker disks: ${WORKER_ATTACHED_PDS[@]}"
    for DISK in ${WORKER_ATTACHED_PDS[@]}; do
      run_gcutil_cmd \
          adddisk \
          --size_gb=${WORKER_ATTACHED_PDS_SIZE_GB} \
          ${DISK} &
    done
  fi
  loginfo "Creating attached master disk: ${NAMENODE_ATTACHED_PD}"
  run_gcutil_cmd \
      adddisk \
      --size_gb=${NAMENODE_ATTACHED_PD_SIZE_GB} \
      ${NAMENODE_ATTACHED_PD} &
  for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done
  loginfo 'Done creating disks!'
fi

# Start datanodes and namenode.
# For now, we will always auto-create a persistent boot disk and auto-delete it
# on shutdown; truly persistent volumes will be used as a non-root mount point.
# We can preserve the persistent boot disk once the setup is idempotent.
if ! is_single_node_setup; then
  for ((i=0; i < NUM_WORKERS; i++)); do
    if (( ${USE_ATTACHED_PDS} )); then
      OPTIONAL_DISK_ARG="--disk=${WORKER_ATTACHED_PDS[${i}]},mode=rw"
    fi
    run_gcutil_cmd \
        addinstance \
        --machine_type=${GCE_MACHINE_TYPE} \
        --service_account=default \
        --image=${GCE_IMAGE} \
        --network=default \
        --service_account_scopes=${GCE_SERVICE_ACCOUNT_SCOPES} \
        --persistent_boot_disk \
        --external_ip_address=ephemeral \
        ${OPTIONAL_DISK_ARG} ${WORKERS[${i}]} &
  done
fi
if (( ${USE_ATTACHED_PDS} )); then
  OPTIONAL_DISK_ARG="--disk=${NAMENODE_ATTACHED_PD},mode=rw"
fi
run_gcutil_cmd \
    addinstance \
    --machine_type=${GCE_MACHINE_TYPE} \
    --service_account=default \
    --image=${GCE_IMAGE} \
    --network=default \
    --service_account_scopes=${GCE_SERVICE_ACCOUNT_SCOPES} \
    --persistent_boot_disk \
    --external_ip_address=ephemeral \
    ${OPTIONAL_DISK_ARG} ${NAMENODE_HOSTNAME} &
for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done

loginfo 'Instances all created. Entering polling loop to wait for ssh-ability'

# This wait is necessary due to VMs not being immediately ssh-able. It may
# still fail if a VM is particularly slow in becoming ssh-able.
for NODE in ${WORKERS[@]}; do
  wait_for_ssh ${NODE} &
done
wait_for_ssh ${NAMENODE_HOSTNAME} &

# Wait for all nodes to be ready.
for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done

INITIAL_SSH_SUCCEEDED=1
loginfo 'Instances all ssh-able'
loginfo 'Starting upload of remote scripts.'

# Use the same COMMAND_GROUPS array to get all the files to upload.
for COMMAND_GROUP in "${COMMAND_GROUPS[@]}"; do
  FILEGROUP="${COMMAND_GROUP%%:*}"
  GENFILE="${SCRIPT_TMPDIR}/${FILEGROUP}.sh"
  for NODE in ${WORKERS[@]}; do
    run_gcutil_cmd push ${NODE} ${GENFILE} '~/' &
  done
  run_gcutil_cmd push ${NAMENODE_HOSTNAME} ${GENFILE} '~/' &
done

# Wait for all uploads to finish.
for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done

loginfo 'Uploads of shell scripts finished...'

# Now iterate over the deployment-specification's COMMAND_STEPS to run the
# setup.
for COMMAND_STR in ${COMMAND_STEPS[@]}; do
  WORKERS_CMD=$(echo ${COMMAND_STR} | cut -d ',' -f 2)
  MASTER_CMD=$(echo ${COMMAND_STR} | cut -d ',' -f 1)
  if [[ "${WORKERS_CMD}" != "*" ]]; then
    loginfo "Invoking on workers: ${WORKERS_CMD}"
    for NODE in ${WORKERS[@]}; do
      get_remote_cmd ${WORKERS_CMD} | run_gcutil_cmd ssh ${NODE} &
    done
  fi
  if [[ "${MASTER_CMD}" != "*" ]]; then
    loginfo "Invoking on master: ${MASTER_CMD}"
    get_remote_cmd ${MASTER_CMD} | run_gcutil_cmd ssh ${NAMENODE_HOSTNAME}
  fi
  # Wait for all the async stuff to finish before moving on.
  for SUBPROC in $(jobs -p); do wait ${SUBPROC}; done
  loginfo "Step '${COMMAND_STR}' done..."
done

loginfo 'Cluster setup complete. Cleaning up temporary files...'
rm -r ${SCRIPT_TMPDIR}

LOGIN_CMD=$(get_gcutil_cmd ssh ${NAMENODE_HOSTNAME})
loginfo "Deployment complete! To log in to the master: ${LOGIN_CMD}"
