只配置HDFS和YARN
master-base
# Hadoop 完全分布式
# 解压缩
cd /opt/software
tar -zxvf hadoop-3.1.3.tar.gz -C /opt/module
# 配置环境变量
vim /etc/profile.d/hadoop.sh
#hadoop
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin
source /etc/profile
# 修改配置文件
cd $HADOOP_HOME/etc/hadoop
# 配置Hadoop守护进程环境变量
# JAVA_HOME 在当前环境是多余的,但这是个好习惯
# namenode datanode secondarynamenode resourcemanager nodemanager 的用户
vim hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_171
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
# 配置HADOOP全局
# fs.defaultFS: HDFS的地址(必填)
# hadoop.tmp.dir: 临时目录(默认/tmp会自动清理,照成问题)
# hadoop.http.staticuser.user: 网页登录的静态用户(获取ui上的读写权限)
vim core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop/tmp</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
# 配置HDFS
# dfs.namenode.http-address: namenode的服务监听socket
# dfs.namenode.secondary.http-address: secondarynamenode的服务监听socket
vim hdfs-site.xml
<property>
<name>dfs.namenode.http-address</name>
<value>master:9870</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>slave1:9868</value>
</property>
# 配置Yarn
# yarn.resourcemanager.hostname:resourcemanager的hostname
# yarn.nodemanager.vmem-check-enabled:nodemanager虚拟内存检查(开启时,其他组件on Yarn运行时,大概率失败)
vim yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
# 配置工作节点
vim workers
master
slave1
slave2
# 同步hadoop环境变量和hadoop到集群
# 格式化namenode
hdfs namenode -format
cd $HADOOP_HOME
# 启动整个hadoop集群
sbin/start-dfs.sh
sbin/start-yarn.sh
mapred --daemon start historyserver
master-hadoop
# 安装Zookeeper
cd /opt/software
tar -zxvf /opt/software/apache-zookeeper-3.5.7-bin.tar.gz -C /opt/module
vim /etc/profile.d/zookeeper.sh
# zookeeper
export ZOOKEEPER_HOME=/opt/module/apache-zookeeper-3.5.7-bin
export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile
cd $ZOOKEEPER_HOME/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
dataDir=/opt/module/apache-zookeeper-3.5.7-bin/data
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
cd $ZOOKEEPER_HOME
mkdir -p $ZOOKEEPER_HOME/data
cd $ZOOKEEPER_HOME/data
vim myid
1
# 同步hadoop环境变量和hadoop到集群
slave1-hadoop
source /etc/profile
vim $ZOOKEEPER_HOME/data/myid
2
slave2-hadoop
source /etc/profile
vim $ZOOKEEPER_HOME/data/myid
3
master-hadoop
zkServer.sh start
slave1-hadoop
zkServer.sh start
slave2-hadoop
zkServer.sh start
配置Hadoop HA
# Hadoop HA
# 解压缩
cd /opt/software
tar -zxvf hadoop-3.1.3.tar.gz -C /opt/module
# 配置环境变量
vim /etc/profile.d/hadoop.sh
#hadoop
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin
source /etc/profile
# 修改配置文件
cd $HADOOP_HOME/etc/hadoop
# 配置Hadoop守护进程环境变量
# JAVA_HOME 在当前环境是多余的,但这是个好习惯
# namenode datanode secondarynamenode resourcemanager nodemanager zkfc journalnode 的用户
vim hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_171
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
# 配置HADOOP全局
# fs.defaultFS: HDFS的地址(必填)
# hadoop.tmp.dir: 临时目录(默认/tmp会自动清理,照成问题)
# hadoop.http.staticuser.user: 网页登录的静态用户(获取ui上的读写权限)
# ha.zookeeper.quorum: ZooKeeper 集群的地址
vim core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop/tmp</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
# 配置HDFS
# dfs.nameservices: nameservices的逻辑名称
# fs.ha.namenodes.mycluster: NameNode 节点列表
# dfs.namenode.rpc-address.mycluster.nn?: 某个NameNode节点的RPC地址
# dfs.namenode.http-address.mycluster.nn?:某个NameNode节点的HTTP地址
# dfs.namenode.shared.edits.dir: 多个NameNode节点之间共享的编辑日志目录
# dfs.journalnode.edits.dir: JournalNode节点存储 HDFS编辑日志的目录
# dfs.ha.automatic-failover.enabled: 自动故障转移功能
# dfs.client.failover.proxy.provider.mycluster: 客户端使用的故障转移代理的类
# dfs.ha.fencing.methods: 使用的切换机制
# dfs.namenode.secondary.http-address secondarynamenode的服务监听socket
vim hdfs-site.xml
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>master:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>master:9870</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>slave1:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>slave1:9870</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/var/hadoop/dfs/journalnode/</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>slave2:9868</value>
</property>
# 配置Yarn
# yarn.resourcemanager.ha.enabled: 启用或禁用ResourceManager的HA
# yarn.resourcemanager.cluster-id: YARN集群指定唯一标识符
# yarn.resourcemanager.ha.rm-ids: RM的ID列表
# yarn.resourcemanager.hostname.rm?: 某个RM的主机名
# yarn.resourcemanager.zk-address: ZooKeeper群集地址
# yarn.log-aggregation-enable: 启用或禁用日志聚合
# yarn.log-aggregation.retain-seconds: 保留聚合日志的时间量
# yarn.resourcemanager.recovery.enabled: 启用或禁用ResourceManager恢复
# yarn.resourcemanager.store.class: 提供ResourceManager状态存储的类
# yarn.nodemanager.vmem-check-enabled:nodemanager虚拟内存检查(开启时,其他组件on Yarn运行时,大概率失败)
vim yarn-site.xml
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>slave1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave2</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
# 配置工作节点
vim workers
master
slave1
slave2
# 同步hadoop环境变量和hadoop到集群
# 格式化namenode、zkfc
# 在所有虚拟机上启动journalnode
master-hadoop
hdfs --daemon start journalnode
slave1-hadoop
hdfs --daemon start journalnode
slave2-hadoop
hdfs --daemon start journalnode
master-hadoop
# 格式化其中一个namenode
hadoop namenode -format
# 单独启动namenode
hdfs namenode
slave1-hadoop
# 同步格式化好的namenode
hdfs namenode -bootstrapStandby
master-hadoop
# 格式化zkfc
hdfs zkfc -formatZK
cd $HADOOP_HOME
# 启动hadoop集群
sbin/start-dfs.sh
sbin/start-yarn.sh
hadoop完全分布式
hadoop3.x集群部署-配置HDFS、yarn、MapReduce
Hadoop3.1.1全分布式安装部署
Hadoop 3 default ports
Hadoop解决 java.net.BindException: Port in use: hadoop103:8088
Hadoop hdfs 访问权限问题
hadoop ha
七、Hadoop3.3.1 HA 高可用集群QJM (基于Zookeeper,NameNode高可用+Yarn高可用)
Hadoop3.x入门-搭建3节点Hadoop HA集群
Hadoop集群大数据解决方案之搭建Hadoop3.X+HA模式(二)
hadoop2.0 HA的主备自动切换
HDFS中的HA原理解析
Hadoop3.x配置3-5个NameNode-HA
YARN HA 配置文件设置
Hadoop3 HA高可用集群搭建
Hadoop3 Yarn ha搭建及测试排错
Yarn Active ResourceManager 重启作业不中断配置
在linux的shell中/bin/true是什么意思?
YARN 聚合日志配置
格式化namenode时报连接错误
解决启动zookeeper时Could not find or Load main class org.apache.zookeeper.server.quorum.QuorumPeerMain的报错
HDFS ha 格式化报错:a shared edits dir must not be specified if HA is not enabled
hadoop HA 集群启动发现现datanode没有启动,namenode clusterID与datanode clusterID不兼容,不匹配。
hadoop 三节点集群搭建后只有一个 datanode?
弃用部分
MapReduce的配置,集群直接使用Spark和Flink作为计算框架,完全不需要MapReduce,只需要配置HDFS和YARN
- hadoop 完全分布式
# 配置Mapreduce(并不会用到Mapreduce,请忽略)
# mapreduce.framework.name: mapreduce工作模式
vim mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
- hadoop ha
```shell
# 配置Mapreduce
# mapreduce.framework.name: mapreduce工作模式
# mapreduce.jobhistory.address: JobHistory服务器的socket
# mapreduce.jobhistory.webapp.address: JobHistory服务器Web应用程序的socket
vim mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>