客户可以通过搭建独立于集群外的客户机向EMR集群提交任务,执行客户端命令等。
网络:客户机需和emr集群保持网络互通,一般为同一vpc,同一安全组下;
系统:CentOS 7.x 64bit;
JAVA: JDK 1.8 版本;
1 拷贝如下脚本内容至需要安装的客户机,保存为 emr-install-clients.sh:
#!/bin/bash
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/bin:/root/bin
# emr集群master1 内网ip地址 和 root密码
masterip="$1"
masterpwd="$2"
[[ -n $masterpwd ]] || {
echo "usage: $0 masterip rootpassword"
exit
}
[ $USER = root ] || {
echo "must run with root, quit now."
exit 1
}
[ -x "$(which expect 2>/dev/null)" ] || {
echo "install expect ..."
yum -y install expect &>>/dev/null
}
[ -x "$(which expect)" ] || {
echo "install expect failed."
exit 1
}
[ -x "$(which rsync 2>/dev/null)" ] || {
echo "install rsync ..."
yum -y install rsync &>>/dev/null
}
[ -x "$(which rsync)" ] || {
echo "install rsync failed."
exit 1
}
rsync_file() {
expect <<EOF
set timeout -1
spawn rsync -aP --delete $1 $2
expect {
timeout { puts stderr "TimedOut"; exit 1; exp_continue }
"*yes/no" { send "yes\r"; exp_continue }
"refused" { puts stderr "ConnectionRefused"; exit 1; exp_continue }
"*assword" { send "$masterpwd\r";
expect {
"denied" { puts stderr "WrongPassword"; exit 1 }
}
}
eof
}
EOF
}
[ -d /usr/local/service ] && {
echo "/usr/local/service already exists, will overwrite all files, are you sure? (yes|no)"
read flag
[[ "$flag" = yes ]] || {
echo "You choose not to continue, quit now."
exit
}
}
[ -d /usr/local/jdk ] && {
echo "/usr/local/jdk already exists, will overwrite all files, are you sure? (yes|no)"
read flag
[[ "$flag" = yes ]] || {
echo "You choose not to continue, quit now."
exit
}
}
# 添加用户
id hadoop &>>/dev/null || {
echo "add hadoop user ..."
useradd hadoop
}
id hadoop &>>/dev/null || {
echo "add hadoop user failed."
exit 1
}
sync
# 拷贝jdk
echo "copy jdk ..."
rsync_file $masterip:/usr/local/jdk/ /usr/local/jdk/
[ $? -eq 0 ] || {
echo "copy jdk failed."
exit 1
}
# 拷贝各类客户端
echo "copy clients ..."
rsync_file $masterip:/usr/local/service/ /usr/local/service/
[ $? -eq 0 ] || {
echo "copy clients failed."
exit 1
}
# 创建工作目录
mkdir -p /data/emr
# 修改文件权限
chown -R hadoop:hadoop /data/emr
# 修改环境变量配置,将以下内容添加到 /etc/profile 中
grep -wq HADOOP_HOME /etc/profile || {
cat >>/etc/profile<<EOF
export JAVA_HOME=/usr/local/jdk
export HADOOP_HOME=/usr/local/service/hadoop
export HIVE_HOME=/usr/local/service/hive
export HBASE_HOME=/usr/local/service/hbase
export SPARK_HOME=/usr/local/service/spark
export STORM_HOME=/usr/local/service/storm
export SQOOP_HOME=/usr/local/service/sqoop
export KYLIN_HOME=/usr/local/service/kylin
PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HIVE_HOME/bin:\$HBASE_HOME/bin:\$SPARK_HOME/bin:\$STORM_HOME/bin:\$SQOOP_HOME/bin:\$KYLIN_HOME/bin:\$PATH
EOF
}
cat <<EOF
clients install finished !
EOF
2 执行脚本部署客户端:
su - root
chmod u+x emr-install-clients.sh
./emr-install-clients.sh 主节点内网ip地址 主节点root密码
# 切换hadoop用户,执行客户端命令
su - hadoop
source /etc/profile
hive -e "SHOW DATABASES;"
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。