//安裝SSH
[root@localhost /]# sudo yum install ssh
//產生密鑰
[root@localhost /]# ssh-keygen (可以一路斷行符號)產生下面兩個檔案:/root/.ssh/id_rsa/root/.ssh/id_rsa.pub
[root@localhost .ssh]# cd /root/.ssh/
//實際情況是把公開金鑰複製到另外一台機器上,並且寫入到另外一台機器上的authorized_keys檔案中
[root@localhost .ssh]# cat ./id_rsa.pub>>./authorized_keys
[root@localhost .ssh]# cd /home
//配置JDK環境變數
[root@localhost opt]# vi /etc/profile
export JAVA_HOME=/opt/jdk1.6.0_31export PATH=$JAVA_HOME/bin:$PATH:.
//使配置生效
[root@localhost opt]# source /etc/profile
//安裝Hadoop 1.0.3
[root@localhost opt]# rpm -i hadoop-1.0.3-1.x86_64.rpm
//查看安裝後的Hadoop版本號碼資訊
[root@localhost opt]# hadoop version
(如果報錯,請檢查 hadoop-env.sh 中的java路徑配置是否正確)
修改hadoop設定檔(/etc/hadoop)
[root@localhost hadoop]# cd /etc/hadoop [root@localhost hadoop]# vi hadoop-env.sh
export JAVA_HOME=/opt/jdk1.6.0_31
[root@localhost hadoop]# vi core-site.xml
<configuration><property><name>fs.default.name</name><value>hdfs://192.168.1.101:9000</value></property><property><name>hadoop.tmp.dir</name><value>/hadoop</value></property></configuration>
[root@localhost hadoop]# vi hdfs-site.xml
<configuration><property><name>dfs.replication</name><value>1</value></property></configuration>
[root@localhost hadoop]# vi mapred-site.xml
<configuration><property><name>mapred.job.tracker</name><value>192.168.1.101:9001</value></property></configuration>
//格式檔案系統
[root@localhost opt]# hadoop namenode -format
//啟動Hadoop相關的所有服務 (/usr/sbin)[root@localhost sbin]# start-all.sh或 [root@localhost opt]# /usr/sbin/start-all.sh
(如果沒有執行許可權,需要將/usr/sbin目錄下的相關sh檔案設定執行許可權)說明:start-all.shstop-all.shstart-dfs.shstop-dfs.shstart-mapred.shstop-mapred.shslaves.sh
//jps查看已經啟動的服務進程資訊
[root@localhost hadoop]# jps
5131 NameNode5242 DataNode5361 SecondaryNameNode5583 TaskTracker5463 JobTracker6714 Jps
防火牆需要開放的連接埠:9000
9001
50010
(訪問 http://192.168.1.101:50070 http://192.168.1.101:50030)[root@localhost hadoop]# hadoop dfsadmin -report
為運行例子 wordcount 作準備[root@localhost opt]# hadoop fs -mkdir input
[root@localhost opt]# echo "Hello World Bye World" > file01[root@localhost opt]# echo "Hello Hadoop Goodbye Hadoop" > file02
[root@localhost opt]# hadoop fs -copyFromLocal ./file0* input
運行例子 wordcount[root@localhost opt]# hadoop jar /usr/share/hadoop/hadoop-examples-1.0.3.jar wordcount input output
12/08/11 12:00:30 INFO input.FileInputFormat: Total input paths to process : 212/08/11 12:00:30 INFO util.NativeCodeLoader: Loaded the native-hadoop library12/08/11 12:00:30 WARN snappy.LoadSnappy: Snappy native library not loaded12/08/11 12:00:31 INFO mapred.JobClient: Running job: job_201208111137_000112/08/11 12:00:32 INFO mapred.JobClient: map 0% reduce 0%12/08/11 12:01:05 INFO mapred.JobClient: map 100% reduce 0%12/08/11 12:01:20 INFO mapred.JobClient: map 100% reduce 100%12/08/11 12:01:25 INFO mapred.JobClient: Job complete: job_201208111137_000112/08/11 12:01:25 INFO mapred.JobClient: Counters: 2912/08/11 12:01:25 INFO mapred.JobClient: Job Counters 12/08/11 12:01:25 INFO mapred.JobClient: Launched reduce tasks=112/08/11 12:01:25 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=4949912/08/11 12:01:25 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=012/08/11 12:01:25 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=012/08/11 12:01:25 INFO mapred.JobClient: Launched map tasks=212/08/11 12:01:25 INFO mapred.JobClient: Data-local map tasks=212/08/11 12:01:25 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=1283912/08/11 12:01:25 INFO mapred.JobClient: File Output Format Counters 12/08/11 12:01:25 INFO mapred.JobClient: Bytes Written=4112/08/11 12:01:25 INFO mapred.JobClient: FileSystemCounters12/08/11 12:01:25 INFO mapred.JobClient: FILE_BYTES_READ=7912/08/11 12:01:25 INFO mapred.JobClient: HDFS_BYTES_READ=27612/08/11 12:01:25 INFO mapred.JobClient: FILE_BYTES_WRITTEN=6470512/08/11 12:01:25 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=4112/08/11 12:01:25 INFO mapred.JobClient: File Input Format Counters 12/08/11 12:01:25 INFO mapred.JobClient: Bytes Read=5012/08/11 12:01:25 INFO mapred.JobClient: Map-Reduce Framework12/08/11 12:01:25 INFO mapred.JobClient: Map output materialized bytes=8512/08/11 12:01:25 INFO mapred.JobClient: Map input records=212/08/11 12:01:25 INFO mapred.JobClient: Reduce shuffle bytes=8512/08/11 12:01:25 INFO mapred.JobClient: Spilled Records=1212/08/11 12:01:25 INFO mapred.JobClient: Map output bytes=8212/08/11 12:01:25 INFO mapred.JobClient: CPU time spent (ms)=477012/08/11 12:01:25 INFO mapred.JobClient: Total committed heap usage (bytes)=24675123212/08/11 12:01:25 INFO mapred.JobClient: Combine input records=812/08/11 12:01:25 INFO mapred.JobClient: SPLIT_RAW_BYTES=22612/08/11 12:01:25 INFO mapred.JobClient: Reduce input records=612/08/11 12:01:25 INFO mapred.JobClient: Reduce input groups=512/08/11 12:01:25 INFO mapred.JobClient: Combine output records=612/08/11 12:01:25 INFO mapred.JobClient: Physical memory (bytes) snapshot=39163494412/08/11 12:01:25 INFO mapred.JobClient: Reduce output records=512/08/11 12:01:25 INFO mapred.JobClient: Virtual memory (bytes) snapshot=315978137612/08/11 12:01:25 INFO mapred.JobClient: Map output records=8
//查看統計結果[root@localhost opt]# hadoop fs -cat output/part-r-00000
Bye1Goodbye1Hadoop2Hello2World2
//---------------------------------------
作業日誌存放目錄:
/var/log/hadoop/root/userlogs/
//---------------------------------------安裝 hadoop-1.0.3-1 後,存放的目錄有:/etc/hadoop/var/run/hadoop/var/log/hadoop/usr/share/hadoop/usr/share/doc/hadoop/usr/etc/hadoop/usr/bin/hadoop(檔案)/usr/include/hadoop