# Copyright 2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Generates the config files which will be needed by the hadoop servers such
# as 'slaves' listing all datanode hostnames, 'masters' listing the namenode,
# and the xml files which go in the Hadoop configuration directory

set -e

# If disks are mounted use the first one to hold target of symlink /hadoop
if MOUNTED_DISKS=($(find /mnt/* -maxdepth 0)); then
  MOUNTED_HADOOP_DIR=${MOUNTED_DISKS[0]}/hadoop
  mkdir -p ${MOUNTED_HADOOP_DIR}
  ln -s ${MOUNTED_HADOOP_DIR} /hadoop
else
  MOUNTED_DISKS=('')
fi

# Used for hadoop.tmp.dir
HADOOP_TMP_DIR=/hadoop/tmp
mkdir -p ${HADOOP_TMP_DIR}

# Set HDFS specific config variables and create HDFS specific directories
if (( ${ENABLE_HDFS} )); then
  # Location of HDFS data blocks on data nodes
  HDFS_NAME_DIR=/hadoop/dfs/name

  # Location of HDFS metadata on namenode
  HDFS_DATA_DIRS="${MOUNTED_DISKS[@]/%//hadoop/dfs/data}"

  # Do not create HDFS_NAME_DIR, or Hadoop with think it is already formatted
  mkdir -p ${HDFS_DATA_DIRS}
fi

chown -R hadoop:hadoop ${TEMP_DIR} ${HADOOP_TMP_DIR} \
    $(find /hadoop /mnt/*/hadoop -maxdepth 0 -type d | cat)

# Ideally we expect WORKERS to be an actual array, but if it's a
# space-separated string instead, we'll just cast it as an array first.
if ! declare -p WORKERS | grep -q '^declare \-a'; then
  WORKERS=(${WORKERS})
fi

echo ${WORKERS[@]} | tr ' ' '\n' > ${HADOOP_CONF_DIR}/slaves
echo ${NAMENODE_HOSTNAME} > ${HADOOP_CONF_DIR}/masters

# Basic configuration not related to GHFS or HDFS.
# Rough rule-of-thumb settings for default maps/reduces taken from
# http://wiki.apache.org/hadoop/HowManyMapsAndReduces
DEFAULT_NUM_MAPS=$((${NUM_WORKERS} * 10))
DEFAULT_NUM_REDUCES=$((${NUM_WORKERS} * 4))

NUM_CORES="$(grep -c processor /proc/cpuinfo)"
MAP_SLOTS=${NUM_CORES}
REDUCE_SLOTS=${NUM_CORES}

JAVA_HOME=$(which java | xargs readlink -f | sed -E "s/\/(jre|jdk).*\/bin\/java$//")

# Download and use ghconfig for xml configuration.
if [[ ! -f "$(which ghconfig)" ]]; then
  wget "${GHCONFIG}" -O /tmp/ghconfig.tar.gz
  tar -C /usr/share/google -xzvf /tmp/ghconfig.tar.gz
  ln -s /usr/share/google/ghconfig*/ghconfig /usr/local/bin
fi

ghconfig configure_hadoop \
    --hadoop_conf_dir ${HADOOP_CONF_DIR} \
    --java_home ${JAVA_HOME} \
    --job_tracker_uri ${JOB_TRACKER_URI} \
    --java_opts "${JAVAOPTS}" \
    --default_num_maps ${DEFAULT_NUM_MAPS} \
    --default_num_reduces ${DEFAULT_NUM_REDUCES} \
    --map_slots ${MAP_SLOTS} \
    --reduce_slots ${REDUCE_SLOTS} \
    --hadoop_tmp_dir ${HADOOP_TMP_DIR}


if (( ${ENABLE_HDFS} )); then
  ghconfig configure_hdfs \
      --hadoop_conf_dir ${HADOOP_CONF_DIR} \
      --namenode_uri ${NAMENODE_URI} \
      --hdfs_data_dirs ${HDFS_DATA_DIRS/ /,} \
      --hdfs_name_dir ${HDFS_NAME_DIR}
fi

chown -R hadoop:hadoop ${HADOOP_CONF_DIR}
