安装
centos:
yum -y install epel-release
yum -y install nginx keepalived
keepalived配置解析
- /etc/keepalived/keepalived.conf
! Configuration File for keepalived
# 全局变量
global_defs {router_id nginx_ha # 主从保持一致script_user root # 执行健康检查的用户enable_script_security
}# 健康检查脚本,名为check_nginx
vrrp_script check_nginx {script "/etc/keepalived/check_nginx.sh" interval 15 # 执行间隔weight -5 # 健康检查失败后,主节点减权重fall 3 # 失败多少次算失败rise 2 # 成功多少次算成功timeout 15 # 脚本执行超时时间
}# vrrp实例设置
vrrp_instance VI_1 {state MASTER # 主节点,从节点用BACKUPinterface ens33 # 绑定网卡,vip会添加到此网卡virtual_router_id 51 # 主从保持一致priority 150 # 权重,设置主节点权重大于从节点advert_int 1 # 主从节点认证方式和密码authentication {auth_type PASSauth_pass keepalive123#ABC}# 漂移的vipvirtual_ipaddress {192.168.10.65}# 指定使用的健康检查脚本的块名,和上面vrrp_script check_nginx{...}对应track_script {check_nginx }
}
网络拓扑
2个节点,都部署了nginx、keepalived, 模式为一主一从。
当主机点宕机,VIP 192.168.10.65 将漂移到从节点(IP: 192.168.10.64)
IP | 角色 | VIP |
---|---|---|
192.168.10.63 | 主 | 192.168.10.65 |
192.168.10.64 | 从 |
主节点配置
! Configuration File for keepalivedglobal_defs {router_id nginx_hascript_user rootenable_script_security
}vrrp_script check_nginx {script "/etc/keepalived/check_nginx.sh"interval 15weight -5fall 3rise 2timeout 15
}vrrp_instance VI_1 {state MASTERinterface ens33virtual_router_id 51priority 150advert_int 1authentication {auth_type PASSauth_pass keepalive123#ABC}virtual_ipaddress {192.168.10.65}track_script {check_nginx}
}
从节点配置
! Configuration File for keepalivedglobal_defs {router_id nginx_hascript_user rootenable_script_security
}vrrp_script check_nginx {script "/etc/keepalived/check_nginx.sh"interval 15weight -5fall 3rise 2timeout 15
}vrrp_instance VI_1 {state BACKUPinterface ens33virtual_router_id 51priority 100advert_int 1authentication {auth_type PASSauth_pass keepalive123#ABC}virtual_ipaddress {192.168.10.65}track_script {check_nginx}
}
健康检查脚本
- /etc/keepalived/check_nginx.sh
#!/bin/bash
counter=$(ps -C nginx --no-heading|wc -l)
if [ "${counter}" = "0" ]; thensystemctl restart nginxsleep 10counter=$(ps -C nginx --no-heading|wc -l)if [ "${counter}" = "0" ]; thensystemctl stop keepalivedfi
fi
故障测试
关闭keepalived模拟主节点宕机
systemctl stop keepalived
发现VIP漂移到了从节点:
- 主节点:
# ens33 VIP丢失了
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00inet 127.0.0.1/8 scope host lovalid_lft forever preferred_lft foreverinet6 ::1/128 scope host valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000link/ether 00:0c:29:ff:c8:c8 brd ff:ff:ff:ff:ff:ffinet 192.168.10.63/24 brd 192.168.10.255 scope global noprefixroute ens33valid_lft forever preferred_lft foreverinet6 fe80::42e4:89b:26ae:5311/64 scope link tentative noprefixroute dadfailed valid_lft forever preferred_lft foreverinet6 fe80::14ea:46c1:2170:47a6/64 scope link noprefixroute valid_lft forever preferred_lft forever
- 从节点
[root@localhost ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00inet 127.0.0.1/8 scope host lovalid_lft forever preferred_lft foreverinet6 ::1/128 scope host valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000link/ether 00:0c:29:39:fd:d4 brd ff:ff:ff:ff:ff:ffinet 192.168.10.64/24 brd 192.168.10.255 scope global noprefixroute ens33valid_lft forever preferred_lft foreverinet 192.168.10.65/32 scope global ens33valid_lft forever preferred_lft foreverinet6 fe80::42e4:89b:26ae:5311/64 scope link noprefixroute valid_lft forever preferred_lft forever
重启keepalived, 发现vip又回到主节点。
关闭主节点nginx, 模拟主节点宕机
发现nginx过一会儿,能够自动启动起来
关掉主节点,模拟主节点宕机
VIP漂移到从节点, 可以看到从节点的keepalived日志:
$ journalctl -xeu keepalived
Jul 02 05:08:05 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) Received advert with higher priority 150, ours 100
Jul 02 05:08:05 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) Entering BACKUP STATE
Jul 02 05:08:05 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) removing protocol VIPs.
Jul 02 05:09:04 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) Transition to MASTER STATE
Jul 02 05:09:05 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) Entering MASTER STATE
Jul 02 05:09:05 localhost.localdomain Keepalived_vrrp[8416]: VRRP_Instance(VI_1) setting protocol VIPs.
Jul 02 05:09:05 localhost.localdomain Keepalived_vrrp[8416]: Sending gratuitous ARP on ens33 for 192.168.10.65
关闭keepalived模拟主节点宕机
常见问题
关闭防火墙
一定要关闭防火墙
systemctl stop firewalld
/etc/keepalived/check_nginx.sh exited with status 127
- 解决方案:
将SELinux状态更改为permissive模式,
setenforce 0
WARNING - script ‘/etc/keepalived/check_nginx.sh’ is not executable for uid:gid 0:0 - disabling.
- 解决方案:
脚本没有执行权限,添加执行权限:
chmod +x /etc/keepalived/check_nginx.sh
WARNING - default user ‘keepalived_script’ for script execution does not exist - please create.
- 解决方案:
指定root用户执行脚本:
global_defs {router_id nginx_hascript_user root
}
SECURITY VIOLATION - scripts are being executed but script_security not enabled.
- 解决方案:
enable_script_security
配置的作用是/etc/keepalived/check_nginx.sh
路径中,只要任何一个路径普通用户有写权限,执行脚本时,就不会以root权限执行。比如/etc
、/etc/keepalived
,普通用户有写权限,keepalived就不会以root权限执行脚本。
! Configuration File for keepalivedglobal_defs {router_id nginx_hascript_user rootenable_script_security
}
/etc/keepalived/check_nginx.sh exited due to signal 15
- 解决方案:
通常是脚本运行时长超过设定:
Jul 02 03:47:36 : /etc/keepalived/check_nginx.sh exited due to signal 15
Jul 02 03:47:46 : VRRP_Script(check_nginx) timed out
Jul 02 03:47:46 : /etc/keepalived/check_nginx.sh exited due to signal 15
Jul 02 03:47:47 : VRRP_Instance(VI_1) Changing effective priority from 150 to 145
Jul 02 03:47:56 : /etc/keepalived/check_nginx.sh exited due to signal 15
把interval
调大
vrrp_script check_nginx {script "/etc/keepalived/check_nginx.sh"interval 15weight -5fall 3rise 2timeout 15
}