Hadoop—HA集群搭建

hadoop版本2.10.2

下载地址:https://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.10.2/hadoop-2.10.2.tar.gz

zookeeper版本3.8.1

下载地址:https://dlcdn.apache.org/zookeeper/zookeeper-3.8.1/apache-zookeeper-3.8.1-bin.tar.gz

安装目录/opt

启动用户hadoop

服务部署

vm01 namenode resourcemanager datanode nodemanager zkfc JournalNode jobhistory zk
vm02 namenode resourcemanager datanode nodemanager zkfc JournalNode zk
vm03 datanode nodemanager zkfc JournalNode zk

三台节点修改hosts

[root@vm01 ~]# vi /etc/hosts
192.168.119.11 vm01
192.168.119.12 vm02
192.168.119.13 vm03

配置免密

[hadoop@vm01 opt]$ ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa): 
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:7GpT9PIBRe6o7seH4RctrHLAvepLgFl9aOakRtCGLws hadoop@vm01
The keys randomart image is:
+---[RSA 2048]----+
|  .o     ..      |
|  ..o. . ..      |
|   oo * ...      |
|E .=.* ooo       |
| .oo+..oS+..     |
|  .. .oo= * .    |
|      o=.B +     |
|     o+.O +      |
|     +BB o       |
+----[SHA256]-----+
[hadoop@vm01 ~]$ ssh-copy-id  vm01
[hadoop@vm01 ~]$ ssh-copy-id  vm02
[hadoop@vm01 ~]$ ssh-copy-id  vm03

添加权限

sudo chmod 777 /opt

添加JAVA_HOME

export JAVA_HOME=/usr/java/jdk1.8.0_131
export PATH=$JAVA_HOME/bin:$PATH

解压安装包

tar -zxvf apache-zookeeper-3.8.1.tar.gz
tar -zxvf hadoop-2.10.2.tar.gz

安装zookeeper

[hadoop@vm01 /opt]$ mv apache-zookeeper-3.8.1 zookeeper-3.8.1
[hadoop@vm01 /opt]$ cd /opt/zookeeper-3.8.1/conf
[hadoop@vm01 conf]$ mv zoo_sample.cfg zoo.cfg 

修改下面配置

[hadoop@vm01 zookeeper-3.8.1]$ vi conf/zoo.cfg 

dataDir=/opt/zookeeper-3.8.1/data
server.1=vm01:2888:3888
server.2=vm02:2888:3888
server.3=vm03:2888:3888

分发安装包到其他机器

[hadoop@vm01 zookeeper-3.8.1]$ mdkir data
[hadoop@vm01 zookeeper-3.8.1]$ echo 1 > data/myid
[hadoop@vm01 zookeeper-3.8.1]$ cd ..
[hadoop@vm01 opt]$ scp -rp zookeeper-3.8.1/ vm02:/opt/
[hadoop@vm01 opt]$ scp -rp zookeeper-3.8.1/ vm03:/opt/

vm02 vm03 修改id

[hadoop@vm02 data]$ echo 2 > /opt/zookeeper-3.8.1/data/myid
[hadoop@vm03 data]$ echo 3 > /opt/zookeeper-3.8.1/data/myid

启动三台节点

bin/zkServer.sh status

修改环境变量

[hadoop@vm01 opt]$ vi ~/.bashrc
export ZK_PATH=/opt/zookeeper-3.8.1
export PATH=$ZK_PATH/bin:$PATH
[hadoop@vm01 opt]$ source ~/.bashrc

安装hadoop

修改配置文件

路径/opt/hadoop-2.10.2/etc/hadoop/

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://mycluster</value>
  </property>

  <property>
    <name>ha.zookeeper.quorum</name>
    <value>vm01:2181,vm02:2181,vm03:2181</value>
  </property>

  <property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/hadoop/hadoop-2.10.2/data/tmp</value>
  </property>
</configuration>

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <!-- 完全分布式集群名称 -->
  <property>
    <name>dfs.nameservices</name>
    <value>mycluster</value>
  </property>
  <!-- 集群中NameNode节点都有哪些 -->
  <property>
    <name>dfs.ha.namenodes.mycluster</name>
    <value>nn1,nn2</value>
  </property>
  <!-- nn1的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.mycluster.nn1</name>
    <value>vm01:9000</value>
  </property>
  <!-- nn2的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.mycluster.nn2</name>
    <value>vm02:9000</value>
  </property>
  <!-- nn1的http通信地址 -->
  <property>
    <name>dfs.namenode.http-address.mycluster.nn1</name>
    <value>vm01:50070</value>
  </property>
  <!-- nn2的http通信地址 -->
  <property>
    <name>dfs.namenode.http-address.mycluster.nn2</name>
    <value>vm02:50070</value>
  </property>
  <!-- 指定NameNode元数据在JournalNode上的存放位置 -->
  <property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://vm01:8485;vm02:8485;vm03:8485/mycluster</value>
  </property>
  <!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
  </property>
  <!-- 使用隔离机制时需要ssh无秘钥登录-->
  <property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/home/hadoop/.ssh/id_rsa</value>
  </property>
  <!--journalnode存储目录-->
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/opt/hadoop-2.10.2/data/journal</value>
  </property>
  <!--namenode目录-->
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/opt/hadoop-2.10.2/data/name</value>
  </property>
  <!--datanode目录-->
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:/opt/hadoop-2.10.2/data/data</value>
  </property>
  <!--默认副本数-->
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <!-- 开启权限检查-->
  <property>
    <name>dfs.permissions.enable</name>
    <value>true</value>
  </property>
  <!-- 开启ACL权限控制-->
  <property>
    <name>dfs.namenode.acls.enabled</name>
    <value>true</value>
  </property>
  <!-- 访问代理类:client,mycluster,active配置失败自动切换实现方式-->
  <property>
    <name>dfs.client.failover.proxy.provider.mycluster</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>
  <property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<configuration>
  <!--关闭虚拟内存检查-->
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <!--开启日志聚合-->
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>
  <!--聚合日志在hdfs上的保留时间,以秒为单位-->
  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>604800</value>
  </property>
  <!--日志在hdfs保存位置-->
  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/logs</value>
  </property>
  <!-- log server的地址-->
  <property>
    <name>yarn.log.server.url</name>
    <value>http://vm01:19888/jobhistory/logs</value>
  </property>
  <!--本地日志在任务结束后保留多久-->
  <property>
    <name>yarn.nodemanager.delete.debug-delay-sec</name>
    <value>86400</value>
  </property>
  <!--开启RM-HA-->
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <!-- RM的Active/Standby的自动切换-->
  <property>  
    <name>yarn.resourcemanager.ha.automatic-failover.recover.enabled</name>  
    <value>true</value>  
  </property>
  <!--RM故障自动恢复-->  
  <property>  
    <name>yarn.resourcemanager.recovery.enabled</name>   
    <value>true</value>   
  </property> 
  <!--集群标识-->
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>rmha-1</value>
  </property>
  <!--RM 的逻辑ID列表。例如,“rm1,rm2”。-->
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <!--指定 RM 对应的主机名或IP地址-->
  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>vm01</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>vm02</value>
  </property>
  <!--指定 RM web 应用程序对应的 host:port-->
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>vm01:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>vm02:8088</value>
  </property>
  <!--管理员通过该地址向RM发送管理命令-->
  <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>vm01:8033</value>
  </property>
<!--NodeManager可用内存-->
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>61440</value>
  </property>
<!--NodeManager可用核心数-->
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>30</value>
  </property>
  <!--单个容器可以申请的最小内存-->
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>128</value>
  </property>
  <!--单个容器可以申请的最大内存-->
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>10240</value>
  </property>
  <!--FailoverProxyProvider 应尝试故障转移的最大次数-->
  <property>
    <name>yarn.client.failover-max-attempts</name>
    <value>3</value>
  </property>
  <!--zk地址,用于状态存储和leader选举-->
  <property>
    <name>hadoop.zk.address</name>
    <value>vm01:2181,vm02:2181,vm03:2181</value>
  </property>
  <!--配置classpath路径,value为hadoop classpath命令结果-->
  <property>
    <name>yarn.application.classpath</name>
    <value>/opt/hadoop-2.10.2/etc/hadoop:/opt/hadoop-2.10.2/share/hadoop/common/lib/*:/opt/hadoop-2.10.2/share/hadoop/common/*:/opt/hadoop-2.10.2/share/hadoop/hdfs:/opt/hadoop-2.10.2/share/hadoop/hdfs/lib/*:/opt/hadoop-2.10.2/share/hadoop/hdfs/*:/opt/hadoop-2.10.2/share/hadoop/yarn:/opt/hadoop-2.10.2/share/hadoop/yarn/lib/*:/opt/hadoop-2.10.2/share/hadoop/yarn/*:/opt/hadoop-2.10.2/share/hadoop/mapreduce/lib/*:/opt/hadoop-2.10.2/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar</value>
    </property>
  </configuration>

<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/tmp</value>
  </property>
</configuration>

slaves

vm01
vm02
vm03

复制到其他机器

scp -rp hadoop-2.10.2/ vm02:/opt/
scp -rp hadoop-2.10.2/ vm03:/opt/

修改环境变量

[hadoop@vm01 hadoop-2.10.2]$ vi ~/.bashrc
export HADOOP_HOME=/opt/hadoop-2.10.2
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
[hadoop@vm01 opt]$ source ~/.bashrc

启动三台journalnode

hadoop-daemons.sh  start journalnode

在vm01机器格式化namenode并启动

hdfs namenode -format
hadoop-daemon.sh start namenode

vm02机器执行同步namenode信息并启动

hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode

启动三台datanode

hadoop-daemons.sh start datanode

初始化HA在Zookeeper中状态

stop-dfs.sh
hdfs zkfc -formatZK
start-dfs.sh

vm01启动yarn

start-yarn.sh 

vm02执行

yarn-daemon.sh start resourcemanager


Hadoop—HA集群搭建
https://www.hechunyu.com/archives/1698215502144
作者
chunyu
发布于
2018年07月16日
许可协议