hadoop-ha配置
hadoop version:3.2.3
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>vm01:2181,vm02:2181,vm03:2181</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/hadoop-2.10.2/data/tmp</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 完全分布式集群名称 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- 集群中NameNode节点都有哪些 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>vm01:9000</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>vm02:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>vm01:50070</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>vm02:50070</value>
</property>
<!-- 指定NameNode元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://vm01:8485;vm02:8485;vm03:8485/mycluster</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔离机制时需要ssh无秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!--journalnode存储目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop-2.10.2/data/journal</value>
</property>
<!--namenode目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop-2.10.2/data/name</value>
</property>
<!--datanode目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/opt/hadoop-2.10.2/data/data</value>
</property>
<!--默认副本数-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 开启权限检查-->
<property>
<name>dfs.permissions.enable</name>
<value>true</value>
</property>
<!-- 开启ACL权限控制-->
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
</property>
<!-- 访问代理类:client,mycluster,active配置失败自动切换实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
<?xml version="1.0"?>
<configuration>
<!--关闭虚拟内存检查-->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--开启日志聚合-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--聚合日志在hdfs上的保留时间,以秒为单位-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!--日志在hdfs保存位置-->
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/logs</value>
</property>
<!-- log server的地址-->
<property>
<name>yarn.log.server.url</name>
<value>http://vm01:19888/jobhistory/logs</value>
</property>
<!--本地日志在任务结束后保留多久-->
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>86400</value>
</property>
<!--开启RM-HA-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- RM的Active/Standby的自动切换-->
<property>
<name>yarn.resourcemanager.ha.automatic-failover.recover.enabled</name>
<value>true</value>
</property>
<!--RM故障自动恢复-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--集群标识-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rmha-1</value>
</property>
<!--RM 的逻辑ID列表。例如,“rm1,rm2”。-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!--指定 RM 对应的主机名或IP地址-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>vm01</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>vm02</value>
</property>
<!--指定 RM web 应用程序对应的 host:port-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>vm01:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>vm02:8088</value>
</property>
<!--管理员通过该地址向RM发送管理命令-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>vm01:8033</value>
</property>
<!--FailoverProxyProvider 应尝试故障转移的最大次数-->
<property>
<name>yarn.client.failover-max-attempts</name>
<value>3</value>
</property>
<!--zk地址,用于状态存储和leader选举-->
<property>
<name>hadoop.zk.address</name>
<value>vm01:2181,vm02:2181,vm03:2181</value>
</property>
<!--配置classpath路径,value为hadoop classpath命令结果-->
<property>
<name>yarn.application.classpath</name>
<value>/opt/hadoop-2.10.2/etc/hadoop:/opt/hadoop-2.10.2/share/hadoop/common/lib/*:/opt/hadoop-2.10.2/share/hadoop/common/*:/opt/hadoop-2.10.2/share/hadoop/hdfs:/opt/hadoop-2.10.2/share/hadoop/hdfs/lib/*:/opt/hadoop-2.10.2/share/hadoop/hdfs/*:/opt/hadoop-2.10.2/share/hadoop/yarn:/opt/hadoop-2.10.2/share/hadoop/yarn/lib/*:/opt/hadoop-2.10.2/share/hadoop/yarn/*:/opt/hadoop-2.10.2/share/hadoop/mapreduce/lib/*:/opt/hadoop-2.10.2/share/hadoop/mapreduce/*:/contrib/capacity-scheduler/*.jar</value>
</property>
</configuration>
hadoop-ha配置
https://www.hechunyu.com/archives/1698215548312