#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.



###############################################################################
# Runs the following jobs to validate a hadoop cluster; suitable for running
# over ssh automatically/without a terminal.
## teragen
## terasort
## teravalidate
# If they all pass 0 will be returned and 1 otherwise
################################################################################

# Usage:
#  gcutil --project <project> ssh hadoopnamenode-0 < hadoop-validate-setup.sh
#

sudo su hadoop

cd /home/hadoop/hadoop-install
. libexec/hadoop-config.sh

#set the hadoop command and the path to the hadoop examples jar
HADOOP_CMD="${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR}"

#find the hadoop examples jar
HADOOP_EXAMPLES_JAR=''

#find under HADOOP_PREFIX (tar ball install)
HADOOP_EXAMPLES_JAR=$(find ${HADOOP_PREFIX} -name 'hadoop-examples-*.jar' | head -n1)

#if its not found look under /usr/share/hadoop (rpm/deb installs)
if [[ "${HADOOP_EXAMPLES_JAR}" == '' ]]; then
  HADOOP_EXAMPLES_JAR=$(find /usr/share/hadoop -name 'hadoop-examples-*.jar' | head -n1)
fi

#if it is still empty then dont run the tests
if [[ "${HADOOP_EXAMPLES_JAR}" == '' ]]; then
  echo "Did not find hadoop-examples-*.jar under '${HADOOP_PREFIX} or '/usr/share/hadoop'"
  exit 1
fi

#dir where to store the data on hdfs. The data is relative of the users home dir on hdfs.
PARENT_DIR="validate_deploy_$(date +%s)"
TERA_GEN_OUTPUT_DIR="${PARENT_DIR}/tera_gen_data"
TERA_SORT_OUTPUT_DIR="${PARENT_DIR}/tera_sort_data"
TERA_VALIDATE_OUTPUT_DIR="${PARENT_DIR}/tera_validate_data"
#tera gen cmd
TERA_GEN_CMD="${HADOOP_CMD} jar ${HADOOP_EXAMPLES_JAR} teragen 10000 ${TERA_GEN_OUTPUT_DIR}"

#tera sort cmd
TERA_SORT_CMD="${HADOOP_CMD} jar ${HADOOP_EXAMPLES_JAR} terasort ${TERA_GEN_OUTPUT_DIR} ${TERA_SORT_OUTPUT_DIR}"

#tera validate cmd
TERA_VALIDATE_CMD="${HADOOP_CMD} jar ${HADOOP_EXAMPLES_JAR} teravalidate ${TERA_SORT_OUTPUT_DIR} ${TERA_VALIDATE_OUTPUT_DIR}"

echo 'Starting teragen....'

#run tera gen
echo ${TERA_GEN_CMD}
eval ${TERA_GEN_CMD}
if [[ $? -ne 0 ]]; then
  echo 'tera gen failed.'
  exit 1
fi
echo 'Teragen passed starting terasort....'


#run tera sort
echo ${TERA_SORT_CMD}
eval ${TERA_SORT_CMD}
if [[ $? -ne 0 ]]; then
  echo 'tera sort failed.'
  exit 1
fi
echo 'Terasort passed starting teravalidate....'

#run tera validate
echo ${TERA_VALIDATE_CMD}
eval ${TERA_VALIDATE_CMD}
if [[ $? -ne 0 ]]; then
  echo 'tera validate failed.'
  exit 1
fi

echo 'teragen, terasort, teravalidate passed.'
echo "Cleaning the data created by tests: ${PARENT_DIR}"

CLEANUP_CMD="${HADOOP_CMD} dfs -rmr -skipTrash ${PARENT_DIR}"
echo ${CLEANUP_CMD}
eval ${CLEANUP_CMD}

exit 0
