前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >史上最强hadoop分布式集群的搭建

史上最强hadoop分布式集群的搭建

作者头像
润森
发布2020-03-12 11:38:22
5420
发布2020-03-12 11:38:22
举报
史上最强hadoop分布式集群的搭建

@Author:by Runsen

@data;2020-02-23

原创文章,禁止转载

原文首发CSDN:https://blog.csdn.net/weixin_44510615/article/details/104625802

9.1 Hadoop分布式集群的搭建

9.1.1 修改hosts文件

在上章中 CentOS 7已经配置了Java环境,采用搭建elasticsearch集群的三台 Linux CentOS 7机器,搭建三节点 Hadoop分布式集群,其中node01作为Master,node2和node3作为slaves。参考:http://hadoop.apache.org/docs/r3.2.1/hadoop-project-dist/hadoop-common/ClusterSetup.html

NodeName

IP地址

node01

192.168.92.90

node02

192.168.92.91

node03

192.168.92.92

永久设置主机名,需要修改hosts文件,设置虚拟机的ip和主机名的映射关系,并关闭防火墙

[root@node01 ~]# vim /etc/sysconfig/network
#########
HOSTNAME=node01
[root@node01 ~]# vim /etc/hosts
#########
192.168.92.90 node01
192.168.92.91 node02
192.168.92.92 node03
[root@node01 ~]# systemctl stop firewalld
[root@node01 ~]# systemctl disable firewalld.service

[root@node02 ~]# vim /etc/sysconfig/network
#########
HOSTNAME=node02
[root@node02 ~]# vim /etc/hosts
#########
192.168.92.90 node01
192.168.92.91 node02
192.168.92.92 node03
[root@node02 ~]# systemctl stop firewalld
[root@node02 ~]# systemctl disable firewalld.service

[root@node03 ~]# vim /etc/sysconfig/network
#########
HOSTNAME=node03
[root@node03 ~]# vim /etc/hosts
#########
192.168.92.90 node01
192.168.92.91 node02
192.168.92.92 node03
[root@node03 ~]# systemctl stop firewalld
[root@node03 ~]# systemctl disable firewalld.service

9.1.2 配置ssh免密码登录

hadoop通过SSH实现对各节点的管理,因此需要配置ssh免密码登录,,实现node01免密码登录到node02和node03。

[root@node01 ~]# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:UGcKoMkBmrZQXNzVTKoyOMFEWXPfY0LmZSZ7xfSwLrI root@node01
The key's randomart image is:
+---[RSA 2048]----+
|.+===oo.*+Bo+    |
|.*o+.o.B %o..+   |
|+.*   . B = . .  |
|o .o   o + o     |
| .o o . S . .    |
|   . o   o .     |
|        E        |
|                 |
|                 |
+----[SHA256]-----+
[root@node01 ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@node01 ~]# ssh node01
Last login: Mon Feb 24 12:50:34 2020
[root@node01 ~]# scp ~/.ssh/id_rsa.pub root@node02:~/
root@node02's password:
id_rsa.pub             100%  393   351.3KB/s   00:00

[root@node02 ~]$ mkdir ~/.ssh
[root@node02 ~]$ cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
[root@node02 ~]# rm -rf ~/id_rsa.pub

[root@node01 ~]# ssh node02
Last login: Mon Feb 24 15:49:50 2020 from node01
[root@node02 ~]#

[root@node01 ~]# scp ~/.ssh/id_rsa.pub root@node03:~/
root@node03's password:
id_rsa.pub             100%  393   351.3KB/s   00:00

[root@node03 ~]# mkdir ~/.ssh
[root@node03 ~]# cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
[root@node03 ~]# rm -rf ~/id_rsa.pub

[root@node01 ~]# ssh node03
Last login: Mon Feb 24 15:45:09 2020 from 192.168.92.1
[root@node03 ~]#

9.1.3 下载hadoop

下载hadoop官方二进制的版本,这里下载hadoop3.2.1版本,下载链接:http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz,下载完成后,我们选择解压安装至/usr/local/hadoop/

[root@node01 ~]# mkdir -p /root/opt/module/hadoop/
[root@node01 ~]# cd opt/module/hadoop/
[root@node01 hadoop ~]# wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
[root@node01 hadoop ~] # tar -zxvf hadoop-3.2.1.tar.gz
[root@node01 hadoop ~] # cd hadoop-3.2.1
[root@node01 hadoop-3.2.1]# ll
总用量 180
drwxr-xr-x. 2 hadoop hadoop    203 9月  11 00:51 bin
drwxr-xr-x. 3 hadoop hadoop     20 9月  10 23:58 etc
drwxr-xr-x. 2 hadoop hadoop    106 9月  11 00:51 include
drwxr-xr-x. 3 hadoop hadoop     20 9月  11 00:51 lib
drwxr-xr-x. 4 hadoop hadoop    288 9月  11 00:51 libexec
-rw-rw-r--. 1 hadoop hadoop 150569 9月  10 22:35 LICENSE.txt
-rw-rw-r--. 1 hadoop hadoop  22125 9月  10 22:35 NOTICE.txt
-rw-rw-r--. 1 hadoop hadoop   1361 9月  10 22:35 README.txt
drwxr-xr-x. 3 hadoop hadoop   4096 9月  10 23:58 sbin
drwxr-xr-x. 4 hadoop hadoop     31 9月  11 01:11 share

9.1.4 修改core-site.xml

需要在/etc/hadoop/目录下,修改配置文件core-site.xml。在配置文件中添加了两项内容,一个是fs.defaultFS,它是指定HDFS的主节点,即node01的所在的centos7机器。另一个是hadoop.tmp.dir,用于指定Hadoop缓存数据的目录需要手工创建该目录:mkdir -p /root/opt/data/tep

[root@node01 hadoop-3.2.1]# cd etc/hadoop/
[root@node01 hadoop]# vim core-site.xml
#############
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://node01:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/root/opt/data/tep</value>
    </property>
    <property>
  		<name>dfs.http.address</name>
  		<value>0.0.0.0:50070</value>
	</property>
</configuration>

9.1.5 修改 hdfs-site.xml

配置HDFS上的数据块副本个数的参数,默认设置下,副本个数为3,设置的副本数量必须小于等于机器数量。

[root@node01 hadoop]# vim hdfs-site.xml
#############
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
    <property>
        <name>dfs.permissions.enables</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>node01:50090</value>
    </property>
    <property>
    	<name>dfs.namenode.http-address</name>
    	<value>node01:9870</value>
    </property>
</configuration>

9.1.6 修改mapred-site.xml

为了确保MapReduce使用YARN来进行资源管理和调度,需要修改mapred-site.xml

[root@node01 hadoop]# vim mapred-site.xml
#############
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>

把 hdfs 中文件副本的数量设置为 1,因为现在伪分布集群只有一 个节点

9.1.7 修改 yarn-site.xml

指定mapreduceshuffl,在hadooop3.X 配置Hadoop相关变量,并指定resourcemanager所在的主机名为node01,

[root@node01 hadoop]# vim yarn-site.xml
#############
<configuration>
	<property>
		<name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
    	<name>yarn.nodemanager.env-whitelist</name>	   
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CL ASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    <property>
    	<name>yarn.resourcemanager.hostname</name>
    	<value>node01</value>
    </property>
</configuration>

9.1.8 Hadoop环境变量配置

[root@node01 hadoop]# vim /etc/profile
############
export HADOOP_HOME=/root/opt/module/hadoop/hadoop-3.2.1/
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@node01 hadoop]# source /etc/profile
[root@node01 hadoop]# hadoop version
Hadoop 3.2.1
Source code repository https://gitbox.apache.org/repos/asf/hadoop.git -r b3cbbb467e22ea829b3808f4b7b01d07e0bf3842
Compiled by rohithsharmaks on 2019-09-10T15:56Z
Compiled with protoc 2.5.0
From source with checksum 776eaf9eee9c0ffc370bcbc1888737
This command was run using /root/opt/module/hadoop/hadoop-3.2.1/share/hadoop/common/hadoop-common-3.2.1.jar
[root@node01 hadoop]# vim hadoop-env.sh
############
export JAVA_HOME=/usr/local/java/jdk1.8.0_231
export HADOOP_LOG_DIR=/root/opt/data/tep

[root@node01 hadoop]# vim yarn-env.sh
############
export JAVA_HOME=/usr/local/java/jdk1.8.0_231

[root@node01 hadoop]# vim workers
############
node02
node03

[root@node01 hadoop] cd ../..
[root@node01 hadoop-3.2.1]# vim sbin/start-dfs.sh
############
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root

[root@node01 hadoop-3.2.1]# vim sbin/stop-dfs.sh
############
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root


[root@node01 hadoop-3.2.1]# vim sbin/start-yarn.sh
############
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root

[root@node01 hadoop-3.2.1]# vim sbin/stop-yarn.sh
############
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root

9.1.9 搭建Zookeeper

ZooKeeper是一个分布式的,开放源码的分布式应用程序协调服务,是Google的Chubby一个开源的实现,是Hadoop的重要组件。

下面搭建Zookeeper

[root@node01] mkdir -p opt/module/zookeeper
[root@node01] cd  opt/module/zookeeper
[root@node01 zookeeper]# wget http://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.5.6/apache-zookeeper-3.5.6-bin.tar.gz
[root@node01 zookeeper]# tar -zxvf apache-zookeeper-3.5.6-bin.tar.gz
[root@node01 zookeeper]# cd apache-zookeeper-3.5.6-bin
[root@node01 apache-zookeeper-3.5.6-bin]# ll
总用量 32
drwxr-xr-x. 2 elsearch elsearch   232 10月  9 04:14 bin
drwxr-xr-x. 2 elsearch elsearch    70 2月  27 11:20 conf
drwxr-xr-x. 5 elsearch elsearch  4096 10月  9 04:15 docs
drwxr-xr-x. 2 root     root      4096 2月  27 11:02 lib
-rw-r--r--. 1 elsearch elsearch 11358 10月  5 19:27 LICENSE.txt
drwxr-xr-x. 2 root     root        46 2月  27 11:17 logs
-rw-r--r--. 1 elsearch elsearch   432 10月  9 04:14 NOTICE.txt
-rw-r--r--. 1 elsearch elsearch  1560 10月  9 04:14 README.md
-rw-r--r--. 1 elsearch elsearch  1347 10月  5 19:27 README_packaging.txt
drwxr-xr-x. 3 root     root        35 2月  27 11:30 zkdata
drwxr-xr-x. 3 root     root        23 2月  27 11:23 zklog
[root@node01 apache-zookeeper-3.5.6-bin]# pwd
/root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin
[root@node01 apache-zookeeper-3.5.6-bin]# mkdir zkdata
[root@node01 apache-zookeeper-3.5.6-bin]# mkdir zklog
[root@node01 apache-zookeeper-3.5.6-bin]# cd conf/
[root@node01 conf]# mv zoo_sample.cfg zoo.cfg
[root@node01 conf]# vim zoo.cfg
#############
dataDir=/root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/zkdata
dataLogDir=/root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/zklog
server.1=192.168.92.90:2888:3888
server.2=192.168.92.91:2888:3888
server.3=192.168.92.92:2888:3888
[root@node01 conf]# cd ../zkdata/
[root@node01 zkdata]# echo "1" >> myid
[root@node01 zkdata]# vim /etc/profile
##############
export ZOOKEEPER_HOME=/root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/
export PATH=$PATH:$ZOOKEEPER_HOME/bin
[root@node01 zkdata] # source /etc/profile

9.1.10 启动hadoop和Zookeeper集群

启动hadoop集群前,我们先拷贝主节点node01到node02和node03,第一次开启Hadoop集群需要格式化HDFS

[root@node02 ]# mkdir -p /root/opt
[root@node03 ]# mkdir -p /root/opt

[root@node01 ]# #拷贝到两个从节点
[root@node01 ]# scp -rp opt/ root@node02:/root/opt/
[root@node01 ]# scp -rp opt/ root@node03:/root/opt/

[root@node01 ]# scp -rp /etc/profile node02:/etc/profile
[root@node01 ]# scp -rp /etc/profile node03:/etc/profile

[root@node02]# source /etc/profile
[root@node03]# source /etc/profile

[root@node01 ]# cd ../../
[root@node01 hadoop-3.2.1]# #格式化HDFS
[root@node01 hadoop-3.2.1]# bin/hdfs namenode -format
# 如果在后面的日志信息中能看到这一行,则说明 namenode 格式化成功。
2020-02-24 15:21:28,893 INFO common.Storage: Storage directory /root/opt/data/tep/dfs/name has been successfully formatted.
# 启动hadoop
[root@node01 hadoop-3.2.1]# sbin/start-all.sh

我们可以在三台机器分别输入jps命令,来判断集群是否启动成功,如果看到以下服务则启动成功。在node01节点上可以看到NameNodeResourceManagerSecondrryNameNodejps 进程

[root@node01 ~]# jps
3601 ResourceManager
3346 SecondaryNameNode
4074 Jps
3069 NameNode
[root@node02 ~]# jps
3473 Jps
3234 NodeManager
3114 DataNode
[root@node03 ~]# jps
3031 NodeManager
3256 Jps
2909 DataNode

这时,我们可以访问http://192.168.92.90:9870或者http://node01:9870,Hadoop页面如下图9-1所示

Hadoop页面9-1

hadoop2.x向hadoop3.x进化过程中,网页访问端口也进行了更改由50070端口更改为9870端口

下面是Hadoop3.x版本改变的端口号:

类别

应用

Hadoop2.x

Hadoop3.x

NameNodePorts

NameNode

8020

9820

NameNodePorts

NameNode HTTP UI

50070

9870

NameNodePorts

NameNode HTTPS UI

50470

9871

SecondaryNameNode ports

SecondaryNameNode HTTP

50091

9869

SecondaryNameNode ports

SecondaryNameNode HTTP UI

50090

9868

DataNode ports

DataNode IPC

50020

9867

DataNode ports

DataNode

50010

9866

DataNode ports

DataNode HTTP UI

50075

9864

DataNode ports

NameNode

50475

9865

我们可以访问:http://192.168.92.90:8088/,查看集群状态,如下图9-2所示

hadoop集群状态9-2

如果我们想停止集群,执行命令sbin/stop-all.sh

# 停止hadoop
[root@node01 hadoop-3.2.1]# sbin/stop-all.sh

至此,Hadoop分布式集群搭建成功。

下面启动Zookeeper集群,首先修改分发的myid文件,再分别启动Zookeeper。

[root@node02 apache-zookeeper-3.5.6-bin]# vim zkdata/myid
2
[root@node03 apache-zookeeper-3.5.6-bin]# vim zkdata/myid
3

#分别启动Zookeeper
[root@node01 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh start
[root@node02 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh start
[root@node03 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh start

# 查看Zookeeper启动出现的节点QuorumPeerMain
[root@node01]# jps
16962 Jps
16005 NameNode
16534 ResourceManager
16903 QuorumPeerMain
16282 SecondaryNameNode

[root@node02]# jps
8402 Jps
8037 NodeManager
7914 DataNode
8202 QuorumPeerMain

#查看zookeeper选举状态
[root@node01 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
[root@node02 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: leader
[root@node03 apache-zookeeper-3.5.6-bin]# bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /root/opt/module/zookeeper/apache-zookeeper-3.5.6-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower

至此,Zookeeper集群搭建成功。在zookeeper集群中,node02为leader,node01和node03为follower

下面使用客户端命令简单操作zookeeper

[root@node02 apache-zookeeper-3.5.6-bin]# bin/zkCli.sh
[zk: localhost:2181(CONNECTED) 0]# ls查看当前 ZooKeeper 中所包含的内容
[zk: localhost:2181(CONNECTED) 0] ls /
[zookeeper]
[zk: localhost:2181(CONNECTED) 1] # ls2查看当前节点详细数据
[zk: localhost:2181(CONNECTED) 1] ls2 /
[zookeeper]
cZxid = 0x0
ctime = Thu Jan 01 08:00:00 CST 1970
mZxid = 0x0
mtime = Thu Jan 01 08:00:00 CST 1970
pZxid = 0x0
cversion = -1
dataVersion = 0
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 0
numChildren = 1
[zk: localhost:2181(CONNECTED) 2] # create创建节点
[zk: localhost:2181(CONNECTED) 2] create /zk myData
Created /zk
[zk: localhost:2181(CONNECTED) 3] # get获得节点的值
[zk: localhost:2181(CONNECTED) 3] get /zk
myData
[zk: localhost:2181(CONNECTED) 4] # set修改节点的值
[zk: localhost:2181(CONNECTED) 4] set /zk myData1
[zk: localhost:2181(CONNECTED) 5] get /zk
myData1
[zk: localhost:2181(CONNECTED) 6] # create创建子节点
[zk: localhost:2181(CONNECTED) 6] create /zk/zk01 myData2
Created /zk/zk01
[zk: localhost:2181(CONNECTED) 7] # stat检查状态
[zk: localhost:2181(CONNECTED) 7] stat /zk
cZxid = 0x100000008
ctime = Thu Feb 27 12:39:43 CST 2020
mZxid = 0x100000009
mtime = Thu Feb 27 12:42:37 CST 2020
pZxid = 0x10000000b
cversion = 1
dataVersion = 1
aclVersion = 0
ephemeralOwner = 0x0
dataLength = 7
numChildren = 1
[zk: localhost:2181(CONNECTED) 8] # rmr移除节点
[zk: localhost:2181(CONNECTED) 8] rmr /zk

至此,我们对zookeeper就算有了一个入门的了解,当然zookeeper远比我们这里描述的功能多,比如用zookeeper实现集群管理,分布式锁,分布式队列,zookeeper集群leader选举,Java API编程等。更多的zookeeper教程参考官方文档:https://zookeeper.apache.org/doc/r3.5.7/index.html

本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2020-03-03,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 小刘IT教程 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 9.1 Hadoop分布式集群的搭建
    • 9.1.1 修改hosts文件
      • 9.1.2 配置ssh免密码登录
        • 9.1.3 下载hadoop
          • 9.1.4 修改core-site.xml
            • 9.1.5 修改 hdfs-site.xml
              • 9.1.6 修改mapred-site.xml
                • 9.1.7 修改 yarn-site.xml
                  • 9.1.8 Hadoop环境变量配置
                    • 9.1.9 搭建Zookeeper
                      • 9.1.10 启动hadoop和Zookeeper集群
                      相关产品与服务
                      Elasticsearch Service
                      腾讯云 Elasticsearch Service(ES)是云端全托管海量数据检索分析服务,拥有高性能自研内核,集成X-Pack。ES 支持通过自治索引、存算分离、集群巡检等特性轻松管理集群,也支持免运维、自动弹性、按需使用的 Serverless 模式。使用 ES 您可以高效构建信息检索、日志分析、运维监控等服务,它独特的向量检索还可助您构建基于语义、图像的AI深度应用。
                      领券
                      问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档