Configure zookeeper. The HA of NN depends on it.
Configuration
Vi/opt/zookeeper/conf/zoo. cfg
DataDir =/home1/hadoop/zookeeper/data
DataLogDir =/home1/hadoop/zookeeper/zk_log
LogDir =/home1/hadoop/zookeeper/logs
Server.110 = Server110: 2888: 3888
Server.111 = Server111: 2888: 3888
Server.112 = server 112: 2888: 3888
Create directories and myid files on all zk machines
Mkdir-p/home1/hadoop/zookeeper/data
Mkdir-p/home1/hadoop/zookeeper/zk_log
Mkdir-p/home1/hadoop/zookeeper/logs
Ifconfig | grep 192 | awk-F'. ''{print $4} '| awk' {print $1} '>/home1/hadoop/zookeeper/data/myid
Start on all zk machines
/Opt/zookeeper/bin/zkServer. sh start
Configure Hadoop
1. Configure Environment Variables
Vi/etc/profile
Vi/opt/hadoop/etc/hadoop/hadoop-env.sh.
Export JAVA_HOME =/usr/java/jdk1.7.0 _ 25
Export HADOOP_PREFIX = "/opt/hadoop"
Export PATH = $ PATH: $ HADOOP_PREFIX/bin
Export PATH = $ PATH: $ HADOOP_PREFIX/sbin
Export HADOOP_MAPRED_HOME =$ {HADOOP_PREFIX}
Export HADOOP_COMMON_HOME =$ {HADOOP_PREFIX}
Export HADOOP_HDFS_HOME =$ {HADOOP_PREFIX}
Export YARN_HOME =$ {HADOOP_PREFIX}
Export HADOOP_SSH_OPTS = "-p 9922"
Export HADOOP_CONF_DIR =$ {HADOOP_PREFIX}/etc/hadoop
Export HDFS_CONF_DIR =$ {HADOOP_PREFIX}/etc/hadoop
Export YARN_CONF_DIR =$ {HADOOP_PREFIX}/etc/hadoop
2. Configure/opt/hadoop/etc/hadoop
Configure core-site.xml files
<Property>
<Name> hadoop. tmp. dir </name>
<Value>/home1/hadoop/tmp </value>
</Property>
<Property>
<Name> fs. default. name </name>
<Value> hdfs: // hadoop110: 8020 </value>
</Property>
--- Create and configure slaves: vi slaves and add the following content
Server110
Server111
Server112
Server113
Server114
--- Configure hdfs-site.xml
<Configuration>
<Property>
<Name> dfs. replication </name>
<Value> 2 </value>
</Property>
<Property>
<Name> dfs. nameservices </name>
<Value> ns1 </value>
</Property>
<Property>
<Name> dfs. ha. namenodes. ns1 </name>
<Value> ns1n1, ns1n2 </value>
</Property>
<Property>
<Name> dfs. namenode. rpc-address.ns1.ns1n1 </name>
<Value> server110: 8020 </value>
</Property>
<Property>
<Name> dfs. namenode. rpc-address.ns1.ns1n2 </name>
<Value> Server113: 8020 </value>
</Property>
<Property>
<Name> dfs. namenode. http-address.ns1.ns1n1 </name>
<Value> server110: 50070 </value>
</Property>
<Property>
<Name> dfs. namenode. http-address.ns1.ns1n2 </name>
<Value> Server113: 50070 </value>
</Property>
<Property>
<Name> dfs. namenode. shared. edits. dir </name>
<Value> qjournal: // Server110: 8485; Server111: 8485; Server112: 8485/ns1 </value>
</Property>
<Property>
<Name> dfs. client. failover. proxy. provider. ns1 </name>
<Value> org. apache. hadoop. hdfs. server. namenode. ha. ConfiguredFailoverProxyProvider </value>
</Property>
<Property>
<Name> dfs. ha. fencing. methods </name>
<Value> sshfence (huang169: 9922) </value>
</Property>
<Property>
<Name> dfs. ha. fencing. ssh. private-key-files </name>
<Value>/home/hadoop/. ssh/id_rsa </value>
</Property>
<Property>
<Name> dfs. ha. fencing. ssh. connect-timeout </name>
<Value> 30000 </value>
</Property>
<Property>
<Name> dfs. journalnode. edits. dir </name>
<Value>/home1/hadoop/journal/node/local/data </value>
</Property>
<Property>
<Name> dfs. ha. automatic-failover.enabled </name>
<Value> true </value>
</Property>
<Property>
<Name> ha. zookeeper. quorum </name>
<Value> Server110: 2181, Server111: 2181, Server112: 2181 </value>
</Property>
<Property>
<Name> dfs. namenode. name. dir </name>
<Value> file:/home1/hadoop/hdfs/name </value>
<Final> true </final>
</Property>
<Property>
<Name> dfs. federation. nameservice. id </name>
<Value> ns1 </value>
</Property>
<Property>
<Name> dfs. federation. nameservices </name>
<Value> ns1 </value>
<! -- <Value> ns1, NS2. </value> -->
</Property>
<Property>
<Name> dfs. namenode. rpc-address.ns1 </name>
<Value> server110: 8020 </value>
</Property>
<Property>
<Name> dfs. namenode. rpc-address.ns2 </name>
<Value> Server111: 8020 </value>
</Property>
<Property>
<Name> dfs. namenode. http-address.ns1 </name>
<Value> server110: 23001 </value>
</Property>
<Property>
<Name> dfs. namenode. http-address.ns2 </name>
<Value> Server111: 13001 </value>
</Property>
<Property>
<Name> dfs. dataname. data. dir </name>
<Value> file:/home1/hadoop/hdfs/data </value>
<Final> true </final>
</Property>
<! --
If HA is used, backupnode and secondary namenode do not need to be configured.
<Property>
<Name> dfs. namenode. backup. address. ns1 </name>
<Value> Server113: 50100 </value>
</Property>
<Property>
<Name> dfs. namenode. backup. http-address.ns1 </name>
<Value> Server113: 50105 </value>
</Property>
<Property>
<Name> dfs. namenode. secondary. http-address.ns1 </name>
<Value> server110: 23002 </value>
</Property>
<Property>
<Name> dfs. namenode. secondary. http-address.ns2 </name>
<Value> Server111: 23002 </value>
</Property>
<Property>
<Name> dfs. namenode. secondary. http-address.ns1 </name>
<Value> server110: 23003 </value>
</Property>
<Property>
<Name> dfs. namenode. secondary. http-address.ns2 </name>
<Value> Server111: 23003 </value>
</Property>
-->
</Configuration>
---- Configure yarn-site.xml
<Configuration>
<! -- Site specific YARN configuration properties -->
<Property>
<Name> yarn. resourcemanager. address </name>
<Value> server110: 18040 </value>
</Property>
<Property>
<Name> yarn. resourcemanager. schedager. address </name>
<Value> server110: 18030 </value>
</Property>
<Property>
<Name> yarn. resourcemanager. webapp. address </name>
<Value> server110: 18088 </value>
</Property>
<Property>
<Name> yarn. resourcemanager. resource-tracker.address </name>
<Value> server110: 18025 </value>
</Property>
<Property>
<Name> yarn. resourcemanager. admin. address </name>
<Value> server110: 18141 </value>
</Property>
<Property>
<Name> yarn. nodemanager. Aux-services </Name>
<Value> mapreduce. Shuffle </value>
</Property>
</Configuration>
3. initialize namenode
HDFS zkfc-formatzk
Run on all NN machines
/Opt/hadoop/bin/HDFS namenode-format NS1
4. Start
Sbin/start-dfs.sh
Sbin/start-yarn.sh