一、HDFS
1、初始化文件系统
#master上
[root@master hadoop-2.5.0]# pwd
/opt/app/hadoop-2.5.0[root@master hadoop-2.5.0]# bin/hdfs namenode -format #最后几行看到successfully,代表成功
2、启动
#启动
[root@master hadoop-2.5.0]# sbin/start-dfs.sh
集群批量命令脚本,这个脚本也可以改用循环:
vim xcall.sh
#!/bin/bash
#
params=$@
AP=$(which $@)echo ====== master $params ======
ssh master $APecho ====== slave1 $params ======
ssh slave1 $APecho ====== slave2 $params ======
ssh slave2 $AP
添加权限、软链接:
chmod +x xcall.shln -s /usr/local/hadoop_shell/xcall.sh /usr/local/bin/xcall
查看启动情况:
[root@master hadoop-2.5.0]# xcall jps
====== master jps ======
6808 Jps
2549 DataNode
2425 NameNode====== slave1 jps ======
5287 Jps
2324 DataNode====== slave2 jps ======
2389 SecondaryNameNode
2327 DataNode
7120 Jps
3、目录、文件操作
创建目录:
#创建用户主目录
[root@master hadoop-2.5.0]# bin/hdfs dfs -mkdir -p /user/root/#创建测试目录
[root@master hadoop-2.5.0]# bin/hdfs dfs -mkdir -p /user/root/tmp/conf[root@master hadoop-2.5.0]# bin/hdfs dfs -ls -R /user
19/04/17 09:45:44 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
drwxr-xr-x - root supergroup 0 2019-04-17 09:45 /user/root
drwxr-xr-x - root supergroup 0 2019-04-17 09:45 /user/root/tmp
drwxr-xr-x - root supergroup 0 2019-04-17 09:45 /user/root/tmp/conf
上传测试文件:
[root@master hadoop-2.5.0]# bin.hdfs dfs -put etc/hadoop/*-site.xml /user/root/tmp/conf[root@master hadoop-2.5.0]# bin/hdfs dfs -ls -R /user/root/tmp/conf
19/04/17 10:04:09 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
-rw-r--r-- 3 root supergroup 1083 2019-04-17 10:02 /user/root/tmp/conf/core-site.xml
-rw-r--r-- 3 root supergroup 883 2019-04-17 10:02 /user/root/tmp/conf/hdfs-site.xml
-rw-r--r-- 3 root supergroup 620 2019-04-17 10:02 /user/root/tmp/conf/httpfs-site.xml
-rw-r--r-- 3 root supergroup 1069 2019-04-17 10:02 /user/root/tmp/conf/mapred-site.xml
-rw-r--r-- 3 root supergroup 1372 2019-04-17 10:02 /user/root/tmp/conf/yarn-site.xml
二、yarn
1、启动
#slave1
[root@slave1 hadoop-2.5.0]# sbin/yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /opt/app/hadoop-2.5.0/logs/yarn-root-resourcemanager-slave1.out
[root@slave1 hadoop-2.5.0]# sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /opt/app/hadoop-2.5.0/logs/yarn-root-nodemanager-slave1.out#master
[root@master hadoop-2.5.0]# sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /opt/app/hadoop-2.5.0/logs/yarn-root-nodemanager-master.out#slave2
[root@slave2 hadoop-2.5.0]# sbin/yarn-daemon.sh start nodemanager
starting nodemanager, logging to /opt/app/hadoop-2.5.0/logs/yarn-root-nodemanager-slave2.out#master上查看启动情况
[root@master hadoop-2.5.0]# xcall jps
====== master jps ======
2549 DataNode
2425 NameNode
7919 Jps
7750 NodeManager====== slave1 jps ======
5644 ResourceManager
5899 NodeManager
2324 DataNode
6094 Jps====== slave2 jps ======
7743 Jps
2389 SecondaryNameNode
7575 NodeManager
2327 DataNode
也可以在master上用start-yarn.sh启动,然后再去slave1上启动resourcemanager;
2、测试
#创建测试目录
[root@master hadoop-2.5.0]# bin/hdfs dfs -mkdir -p /user/root/mapreduce/wordcount/input#上传测试文件
[root@master hadoop-2.5.0]# bin/hdfs dfs -put /opt/app/hadoop-2.5.0/wc.input /user/root/mapreduce/wordcount/input#在yarn上运行MapReduce
[root@master hadoop-2.5.0]# bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount /user/root/mapreduce/wordcount/input /user/root/mapreduce/wordcount/output#查看结果
[root@master hadoop-2.5.0]# bin/hdfs dfs -text /user/root/mapreduce/wordcount/output/part-r-00000
19/04/17 10:26:12 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
data 1
hadoop 1
hive 1
hue 1
node 2
yarn 2
3、yarn web
resourcemanager在slave1上;
在浏览器输入slave1的ip+port,应该就能打开web页面;
三、集群基准测试
基本测试上面已经测试完了;
基本测试:测试集群是否可用;
基准测试:测试集群性能;
HDFS:
读数据
写数据
网上有很多例子,可以看一下;
四、集群时间同步
1、
如果考虑到去外网同步时间不安全或者不方便;
可以在内网找一台作为时间服务器,所有的机器与这台机器时间进行定时的同步,比如每十分钟,同步一次时间;
2、
这里可以用master当作时间服务器;
#检查是否安装
[root@master hadoop-2.5.0]# rpm -qa |grep ntp
ntp-4.2.6p5-15.el6.centos.x86_64
fontpackages-filesystem-1.41-1.1.el6.noarch
ntpdate-4.2.6p5-15.el6.centos.x86_64#配置成时间服务器
vim /etc/ntp.conf #改三处#第一处
restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap #去掉此行的注释,并改为集群网段#第二处,注释掉下面几行
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst#第三处,添加下面两行
server 127.127.1.0 # local clock
fudge 127.127.1.0 stratum 10#设置同步更新本地hwclock
[root@master hadoop-2.5.0]# vim /etc/sysconfig/ntpd
# Drop root to id 'ntp:ntp' by default.
SYNC_HWCLOCK=yes #添加此行
OPTIONS="-u ntp:ntp -p /var/run/ntpd.pid -g"#启动ntpd
[root@master hadoop-2.5.0]# service ntpd status[root@master hadoop-2.5.0]# service ntpd start[root@master hadoop-2.5.0]#chkconfig ntpd on
3、在slave上设置同步时间脚本
#salve1
[root@slave1 hadoop-2.5.0]# crontab -l
#去master同步时间,10分钟1次
0-59/10 * * * * /usr/sbin/ntpdate master#slave2
[root@slave2 hadoop-2.5.0]# crontab -l
#去master同步时间,10分钟1次
0-59/10 * * * * /usr/sbin/ntpdate master