首页
学习
活动
专区
工具
TVP
发布
精选内容/技术社群/优惠产品,尽在小程序
立即前往

cdh6.3.2 Spark 多版本共存

一 部署Spark客户端

1.1 部署spark3客户端

tar -zxvf spark-3.3.1-bin-3.0.0-cdh6.3.2.tgz -C /opt/cloudera/parcels/CDH/lib

cd /opt/cloudera/parcels/CDH/lib

mv spark-3.3.1-bin-3.0.0-cdh6.3.2/ spark3

将 CDH 集群的 spark-env.sh 复制到 /opt/cloudera/parcels/CDH/lib/spark3/conf 下:

cp /etc/spark/conf/spark-env.sh  /opt/cloudera/parcels/CDH/lib/spark3/conf

chmod +x /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-env.sh

#修改 spark-env.sh

vim /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-env.sh

export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark3

HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}

将 gateway 节点的 hive-site.xml 复制到 spark3/conf 目录下,不需要做变动:

cp /etc/hive/conf/hive-site.xml /opt/cloudera/parcels/CDH/lib/spark3/conf/

1.2 部署Spark2客户端

tar -zxvf spark-2.4.0-bin-hadoop2.7.tgz -C /opt/cloudera/parcels/CDH/lib

cd /opt/cloudera/parcels/CDH/lib

mv spark-2.4.0-bin-hadoop2.7/ spark2

将 CDH 集群的 spark-env.sh 复制到 /opt/cloudera/parcels/CDH/lib/spark2/conf 下:

cp /etc/spark/conf/spark-env.sh  /opt/cloudera/parcels/CDH/lib/spark2/conf

chmod +x /opt/cloudera/parcels/CDH/lib/spark2/conf/spark-env.sh

#修改 spark-env.sh

vim /opt/cloudera/parcels/CDH/lib/spark2/conf/spark-env.sh

export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark2

HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}

将 gateway 节点的 hive-site.xml 复制到 spark2/conf 目录下,不需要做变动:

cp /etc/hive/conf/hive-site.xml /opt/cloudera/parcels/CDH/lib/spark2/conf/

二 创建spark-sql

2.1 spark3

vim /opt/cloudera/parcels/CDH/bin/spark3-sql

#!/bin/bash

export HADOOP_CONF_DIR=/etc/hadoop/conf

export YARN_CONF_DIR=/etc/hadoop/conf

SOURCE="${BASH_SOURCE[0]}"

BIN_DIR="$( dirname "$SOURCE" )"

while [ -h "$SOURCE" ]

do

SOURCE="$(readlink "$SOURCE")"

[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

done

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

LIB_DIR=$BIN_DIR/../lib

export HADOOP_HOME=$LIB_DIR/hadoop

# Autodetect JAVA_HOME if not defined

. $LIB_DIR/bigtop-utils/bigtop-detect-javahome

exec $LIB_DIR/spark3/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"

配置 spark-sql 快捷方式

chmod +x /opt/cloudera/parcels/CDH/bin/spark3-sql

alternatives --install /usr/bin/spark-sql spark-sql /opt/cloudera/parcels/CDH/bin/spark3-sql 1

2.2 spark2

vim /opt/cloudera/parcels/CDH/bin/spark2-sql

#!/bin/bash

export HADOOP_CONF_DIR=/etc/hadoop/conf

export YARN_CONF_DIR=/etc/hadoop/conf

SOURCE="${BASH_SOURCE[0]}"

BIN_DIR="$( dirname "$SOURCE" )"

while [ -h "$SOURCE" ]

do

SOURCE="$(readlink "$SOURCE")"

[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

done

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

LIB_DIR=$BIN_DIR/../lib

export HADOOP_HOME=$LIB_DIR/hadoop

# Autodetect JAVA_HOME if not defined

. $LIB_DIR/bigtop-utils/bigtop-detect-javahome

exec $LIB_DIR/spark2/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"

配置 spark-sql 快捷方式

chmod +x /opt/cloudera/parcels/CDH/bin/spark2-sql

alternatives --install /usr/bin/spark-sql spark-sql /opt/cloudera/parcels/CDH/bin/spark2-sql 2

三 配置conf

3.1 spark3

cd /opt/cloudera/parcels/CDH/lib/spark3/conf

## 开启日志

mv log4j2.properties.template log4j2.properties

## spark-defaults.conf 配置

cp /opt/cloudera/parcels/CDH/lib/spark/conf/spark-defaults.conf ./

# 修改 spark-defaults.conf

vim /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-defaults.conf

删除 spark.extraListeners、spark.sql.queryExecutionListeners、spark.yarn.jars

添加 spark.yarn.jars=hdfs:///spark/3versionJars/*

hadoop fs -mkdir -p /spark/3versionJars

cd /opt/cloudera/parcels/CDH/lib/spark3/jars

hadoop fs -put *.jar /spark/3versionJars

3.2 spark2

cd /opt/cloudera/parcels/CDH/lib/spark2/conf

## 开启日志

mv log4j2.properties.template log4j2.properties

## spark-defaults.conf 配置

cp /opt/cloudera/parcels/CDH/lib/spark/conf/spark-defaults.conf ./

# 修改 spark-defaults.conf

vim /opt/cloudera/parcels/CDH/lib/spark3/conf/spark-defaults.conf

删除 spark.extraListeners、spark.sql.queryExecutionListeners、spark.yarn.jars

添加 spark.yarn.jars=hdfs:///spark/2versionJars/*

hadoop fs -mkdir -p /spark/2versionJars

cd /opt/cloudera/parcels/CDH/lib/spark2/jars

hadoop fs -put *.jar /spark/2versionJars

四 创建spark-submit

4.1 spark3

vim /opt/cloudera/parcels/CDH/bin/spark3-submit

#!/usr/bin/env bash

export HADOOP_CONF_DIR=/etc/hadoop/conf

export YARN_CONF_DIR=/etc/hadoop/conf

SOURCE="${BASH_SOURCE[0]}"

BIN_DIR="$( dirname "$SOURCE" )"

while [ -h "$SOURCE" ]

do

SOURCE="$(readlink "$SOURCE")"

[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

done

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

LIB_DIR=/opt/cloudera/parcels/CDH/lib

export HADOOP_HOME=$LIB_DIR/hadoop

# Autodetect JAVA_HOME if not defined

. $LIB_DIR/bigtop-utils/bigtop-detect-javahome

# disable randomized hash for string in Python 3.3+

export PYTHONHASHSEED=0

exec $LIB_DIR/spark3/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"

配置 spark3-submit 快捷方式:

chmod +755 /opt/cloudera/parcels/CDH/bin/spark3-submit

alternatives --install /usr/bin/spark-submit spark-submit /opt/cloudera/parcels/CDH/bin/spark3-submit 1

4.2 spark2

vim /opt/cloudera/parcels/CDH/bin/spark2-submit

#!/usr/bin/env bash

export HADOOP_CONF_DIR=/etc/hadoop/conf

export YARN_CONF_DIR=/etc/hadoop/conf

SOURCE="${BASH_SOURCE[0]}"

BIN_DIR="$( dirname "$SOURCE" )"

while [ -h "$SOURCE" ]

do

SOURCE="$(readlink "$SOURCE")"

[[ $SOURCE != /* ]] && SOURCE="$BIN_DIR/$SOURCE"

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

done

BIN_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

LIB_DIR=/opt/cloudera/parcels/CDH/lib

export HADOOP_HOME=$LIB_DIR/hadoop

# Autodetect JAVA_HOME if not defined

. $LIB_DIR/bigtop-utils/bigtop-detect-javahome

# disable randomized hash for string in Python 3.3+

export PYTHONHASHSEED=0

exec $LIB_DIR/spark2/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"

配置 spark2-submit 快捷方式:

chmod +755 /opt/cloudera/parcels/CDH/bin/spark2-submit

alternatives --install /usr/bin/spark-submit spark-submit /opt/cloudera/parcels/CDH/bin/spark2-submit 1

  • 发表于:
  • 原文链接https://page.om.qq.com/page/OQRBUeigjMRz5Azfk--7QzZQ0
  • 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号(企鹅号)传播渠道之一,根据《腾讯内容开放平台服务协议》转载发布内容。
  • 如有侵权,请联系 cloudcommunity@tencent.com 删除。

扫码

添加站长 进交流群

领取专属 10元无门槛券

私享最新 技术干货

扫码加入开发者社群
领券