Skip to content

Commit

Permalink
HADOOP-12857. rework hadoop-tools (aw)
Browse files Browse the repository at this point in the history
  • Loading branch information
aw-was-here committed Mar 23, 2016
1 parent 8f85e5d commit 7381550
Show file tree
Hide file tree
Showing 33 changed files with 651 additions and 132 deletions.
182 changes: 182 additions & 0 deletions dev-support/bin/dist-tools-hooks-maker
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

VERSION=${1:-3.0.0-SNAPSHOT}
TARGETDIR=${2:-/tmp/target}
TOOLSDIR=${3:-/tmp/tools}

function getfilename
{
declare module=$1
declare modtype=$2

if [[ ${modtype} = builtin ]]; then
echo "${TARGETDIR}/hadoop-${VERSION}/libexec/tools/${module}.sh"
else
echo "${TARGETDIR}/hadoop-${VERSION}/libexec/shellprofile.d/${module}.sh"
fi
}

function header
{
declare fn=$1

cat >>"${fn}" <<-'TOKEN'
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
#
# IMPORTANT: This file is automatically generated by hadoop-dist at
# -Pdist time.
#
#
TOKEN

}

function optional_prologue
{
declare fn=$1
declare module=$2

if [[ -z "${OPTMODS}" ]]; then
OPTMODS=${module}
else
OPTMODS="${OPTMODS},${module}"
fi

{
echo "if hadoop_verify_entry HADOOP_TOOLS_OPTIONS \"${module}\"; then"
echo " hadoop_add_profile \"${module}\""
echo "fi"
echo ""
echo "function _${module}_hadoop_classpath"
echo "{"
} >> "${fn}"
}

function builtin_prologue
{
declare fn=$1
declare module=$2

{
echo ""
echo "function hadoop_classpath_tools_${module}"
echo "{"
} >> "${fn}"
}

function dependencywork
{
declare fn=$1
declare module=$2
declare depfn=$3

declare depline
declare jarname

while read -r depline; do
jarname=$(echo "${depline}" | awk -F: '{print $2"-"$4".jar"}')

if [[ -f "${TARGETDIR}/hadoop-${VERSION}/share/hadoop/tools/lib/${jarname}" ]]; then
{
echo " if [[ -f \"\${HADOOP_TOOLS_HOME}/\${HADOOP_TOOLS_LIB_JARS_DIR}/${jarname}\" ]]; then"
echo " hadoop_add_classpath \"\${HADOOP_TOOLS_HOME}/\${HADOOP_TOOLS_LIB_JARS_DIR}/${jarname}\""
echo " fi"
} >> "${fn}"

elif [[ -f "${TARGETDIR}/hadoop-${VERSION}/share/hadoop/common/${jarname}"
|| -f "${TARGETDIR}/hadoop-${VERSION}/share/hadoop/common/lib/${jarname}" ]]; then
true
else
echo "ERROR: ${module} has missing dependencies: ${jarname}"
fi
done < <(grep compile "${depfn}")

{
echo " hadoop_add_classpath \"\${HADOOP_TOOLS_HOME}/\${HADOOP_TOOLS_LIB_JARS_DIR}/${module}-${VERSION}.jar\""
echo "}"
echo ""
} >> "${fn}"
}

function document_optionals
{
echo "Rewriting ${TARGETDIR}/hadoop-${VERSION}/etc/hadoop/hadoop-env.sh"
sed -e "s^@@@HADOOP_OPTIONAL_TOOLS@@@^${OPTMODS}^" \
"${TARGETDIR}/hadoop-${VERSION}/etc/hadoop/hadoop-env.sh" \
> "${TARGETDIR}/hadoop-${VERSION}/etc/hadoop/hadoop-env.sh.new"
mv "${TARGETDIR}/hadoop-${VERSION}/etc/hadoop/hadoop-env.sh.new" \
"${TARGETDIR}/hadoop-${VERSION}/etc/hadoop/hadoop-env.sh"
}

function process
{
declare fn
declare basefn
declare modtype
declare module
declare newfile
declare newdir

while read -r fn; do
basefn=${fn##*/}
module=$(echo "${basefn}" | cut -f1 -d.)
modtype=$(echo "${basefn}" | cut -f2 -d.)
modtype=${modtype##tools-}

newfile=$(getfilename "${module}" "${modtype}")
newdir=$(dirname "${newfile}")
mkdir -p "${newdir}"

if [[ -f "${newfile}" ]]; then
rm "${newfile}"
fi

touch "${newfile}"

header "${newfile}" "${module}"

"${modtype}_prologue" "${newfile}" "${module}"

dependencywork "${newfile}" "${module}" "${fn}"

chmod a+rx "${newfile}"

done < <(find "${TOOLSDIR}" -name '*.tools-builtin.txt' -o -name '*.tools-optional.txt')

document_optionals
}

process
10 changes: 6 additions & 4 deletions hadoop-common-project/hadoop-common/src/main/bin/hadoop
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ case ${COMMAND} in
;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_to_classpath_toolspath
hadoop_add_to_classpath_tools hadoop-archives
;;
checknative)
CLASS=org.apache.hadoop.util.NativeLibraryChecker
Expand All @@ -133,11 +133,11 @@ case ${COMMAND} in
;;
distch)
CLASS=org.apache.hadoop.tools.DistCh
hadoop_add_to_classpath_toolspath
hadoop_add_to_classpath_tools hadoop-extras
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_to_classpath_toolspath
hadoop_add_to_classpath_tools hadoop-distcp
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
Expand All @@ -146,7 +146,9 @@ case ${COMMAND} in
echo "HADOOP_COMMON_LIB_JARS_DIR='${HADOOP_COMMON_LIB_JARS_DIR}'"
echo "HADOOP_COMMON_LIB_NATIVE_DIR='${HADOOP_COMMON_LIB_NATIVE_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_PATH='${HADOOP_TOOLS_PATH}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
fs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,44 @@ function hadoop_debug
fi
}

## @description Given variable $1 delete $2 from it
## @audience public
## @stability stable
## @replaceable no
function hadoop_delete_entry
{
if [[ ${!1} =~ \ ${2}\ ]] ; then
hadoop_debug "Removing ${2} from ${1}"
eval "${1}"=\""${!1// ${2} }"\"
fi
}

## @description Given variable $1 add $2 to it
## @audience public
## @stability stable
## @replaceable no
function hadoop_add_entry
{
if [[ ! ${!1} =~ \ ${2}\ ]] ; then
hadoop_debug "Adding ${2} to ${1}"
#shellcheck disable=SC2140
eval "${1}"=\""${!1} ${2} "\"
fi
}

## @description Given variable $1 determine if $2 is in it
## @audience public
## @stability stable
## @replaceable no
## @return 0 = yes, 1 = no
function hadoop_verify_entry
{
# this unfortunately can't really be tested by bats. :(
# so if this changes, be aware that unit tests effectively
# do this function in them
[[ ${!1} =~ \ ${2}\ ]]
}

## @description Add a subcommand to the usage output
## @audience private
## @stability evolving
Expand Down Expand Up @@ -264,10 +302,9 @@ function hadoop_bootstrap
YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}

# setup a default HADOOP_TOOLS_PATH
hadoop_deprecate_envvar TOOL_PATH HADOOP_TOOLS_PATH
HADOOP_TOOLS_PATH=${HADOOP_TOOLS_PATH:-${HADOOP_PREFIX}/share/hadoop/tools/lib/*}
HADOOP_TOOLS_HOME=${HADOOP_TOOLS_HOME:-${HADOOP_PREFIX}}
HADOOP_TOOLS_DIR=${HADOOP_TOOLS_DIR:-"share/hadoop/tools"}
HADOOP_TOOLS_LIB_JARS_DIR=${HADOOP_TOOLS_LIB_JARS_DIR:-"${HADOOP_TOOLS_DIR}/lib"}

# usage output set to zero
hadoop_reset_usage
Expand Down Expand Up @@ -322,6 +359,7 @@ function hadoop_exec_hadoopenv
if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then
if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then
export HADOOP_ENV_PROCESSED=true
# shellcheck disable=SC1090
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi
fi
Expand All @@ -334,6 +372,7 @@ function hadoop_exec_hadoopenv
function hadoop_exec_userfuncs
{
if [[ -e "${HADOOP_CONF_DIR}/hadoop-user-functions.sh" ]]; then
# shellcheck disable=SC1090
. "${HADOOP_CONF_DIR}/hadoop-user-functions.sh"
fi
}
Expand All @@ -348,6 +387,7 @@ function hadoop_exec_hadooprc
{
if [[ -f "${HOME}/.hadooprc" ]]; then
hadoop_debug "Applying the user's .hadooprc"
# shellcheck disable=SC1090
. "${HOME}/.hadooprc"
fi
}
Expand All @@ -373,11 +413,22 @@ function hadoop_import_shellprofiles
files2=(${HADOOP_CONF_DIR}/shellprofile.d/*.sh)
fi

# enable bundled shellprofiles that come
# from hadoop-tools. This converts the user-facing HADOOP_OPTIONAL_TOOLS
# to the HADOOP_TOOLS_OPTIONS that the shell profiles expect.
# See dist-tools-hooks-maker for how the example HADOOP_OPTIONAL_TOOLS
# gets populated into hadoop-env.sh

for i in ${HADOOP_OPTIONAL_TOOLS//,/ }; do
hadoop_add_entry HADOOP_TOOLS_OPTIONS "${i}"
done

for i in "${files1[@]}" "${files2[@]}"
do
if [[ -n "${i}"
&& -f "${i}" ]]; then
hadoop_debug "Profiles: importing ${i}"
# shellcheck disable=SC1090
. "${i}"
fi
done
Expand Down Expand Up @@ -945,34 +996,25 @@ function hadoop_add_common_to_classpath
hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"'/*'
}

## @description Add the HADOOP_TOOLS_PATH to the classpath
## @description Run libexec/tools/module.sh to add to the classpath
## @description environment
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_add_to_classpath_toolspath
## @param module
function hadoop_add_to_classpath_tools
{
declare -a array
declare -i c=0
declare -i j
declare -i i
declare idx
declare module=$1

if [[ -n "${HADOOP_TOOLS_PATH}" ]]; then
hadoop_debug "Adding HADOOP_TOOLS_PATH to CLASSPATH"
oldifs=${IFS}
IFS=:
for idx in ${HADOOP_TOOLS_PATH}; do
array[${c}]=${idx}
((c=c+1))
done
IFS=${oldifs}
((j=c-1)) || ${QATESTMODE}

for ((i=0; i<=j; i++)); do
hadoop_add_classpath "${array[$i]}" after
done
if [[ -f "${HADOOP_LIBEXEC_DIR}/tools/${module}.sh" ]]; then
# shellcheck disable=SC1090
. "${HADOOP_LIBEXEC_DIR}/tools/${module}.sh"
else
hadoop_error "ERROR: Tools helper ${HADOOP_LIBEXEC_DIR}/tools/${module}.sh was not found."
fi

if declare -f hadoop_classpath_tools_${module} >/dev/null 2>&1; then
"hadoop_classpath_tools_${module}"
fi
}

Expand Down
10 changes: 7 additions & 3 deletions hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
# Copyright 2014 The Apache Software Foundation
#
# Licensed to the Apache Software Foundation (ASF) under one
Expand Down Expand Up @@ -87,7 +88,10 @@
# Misc paths
####

# setup a default HADOOP_TOOLS_PATH, where things like distcp lives
# This is where things like distcp, S3, and other things live
# note that this path only gets added for certain commands and not
# part of the general classpath
# export HADOOP_TOOLS_PATH="${HADOOP_PREFIX}/share/hadoop/tools/lib/*"
# part of the general classpath unless HADOOP_OPTIONAL_TOOLS is used
# to configure them in
# export HADOOP_TOOLS_HOME=${HADOOP_PREFIX}
# export HADOOP_TOOLS_DIR=${HADOOP_TOOLS_DIR:-"share/hadoop/tools"}
# export HADOOP_TOOLS_LIB_JARS_DIR=${HADOOP_TOOLS_LIB_JARS_DIR:-"${HADOOP_TOOLS_DIR}/lib"}
Loading

0 comments on commit 7381550

Please sign in to comment.