Keepalived 高可用Nginx
环境准备
| 服务 | IP | 说明 |
|---|---|---|
| lb-master | 192.168.148.210 | Keepalived主服务器(Nginx主负载均衡器) |
| lb-backup | 192.168.148.211 | Keepalived备服务器(Nginx备负载均衡器) |
| web-01 | 192.168.148.212 | web01服务器 |
| web-02 | 192.168.148.213 | web02服务器 |
| VIP | 192.168.148.200 | 虚拟VIP地址 |
安装Nginx服务
所有服务器安装
bash
dnf -y install nginx
hostname -I | awk '{print $1}' > /usr/shar/nginx/html/index.html
systemctl start nginx配置负载均衡
在lb-master和lb-backup修改/etc/nginx/nginx.conf,修改完重新加载nginx
ini
http {
upstream backend {
# 默认轮询策略
server 192.168.148.210:80;
server 192.168.148.211:80;
}
server {
listen 80;
location / {
proxy_pass http://backend;
proxy_set_header Host $host;
}
}
}在nginx配置中添加健康检查接口
nginx
cat > /etc/nginx/default.d/health.conf << "EOF"
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
EOF部署keepalived
在lb-master和lb-backup安装
安装
bash
dnf -y install keepalived
#
useradd -Ms /sbin/nologin keepalived_script
# 创建存放脚目录
mkdir -p /opt/scripts /var/log/keepalived
chown keepalived_script:keepalived_script /var/log/keepalived
# 设置sudo权限
echo "keepalived_script ALL=(root) NOPASSWD: /usr/bin/systemctl stop keepalived, /usr/bin/systemctl kill keepalived" > /etc/sudoers.d/keepalived_script
chmod 440 /etc/sudoers.d/keepalived_scriptlb-master
bash
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived
global_defs {
router_id LB_01 # 不能和其他Keepalived节点相同(全局唯一)
enable_script_security
script_user keepalived_script
}
vrrp_script chk_nginx { # 自定义健康检查脚本
script "/opt/scripts/check_nginx.sh" # 脚本路径(需可执行权限)
interval 2 # 检查间隔(秒)
weight -5 # 失败时优先级降低值(-253~253)
rise 2 # 成功次数阈值(连续成功2次才认为健康)
fall 3 # 失败次数阈值(连续失败3次才认为不健康)
user keepalived_script
}
vrrp_instance VI_1 {
state BACKUP # 主备都设置BACKUP
interface ens32 # 通信(心跳)接口为ens32,此参数备节点设置和主节点相同
virtual_router_id 51 # 实例ID为51,要和备节点相同
priority 100 # 优先级(1-255)master节点高于backup节点
advert_int 1
#nopreempt
preempt_delay 60 # 1分钟延迟
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.148.200/24 dev ens32 label ens32:1 # VIP配置
}
track_script { # 关联健康检查脚本
chk_nginx
}
# 脑裂检测脚本
notify_master "/opt/scripts/check_split_brain.sh"
}
EOFlb-backup
bash
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived
global_defs {
router_id LB_02 # 不能和其他Keepalived节点相同(全局唯一)
enable_script_security
script_user keepalived_script
}
vrrp_script chk_nginx { # 自定义健康检查脚本
script "/opt/scripts/check_nginx.sh" # 脚本路径(需可执行权限)
interval 2 # 检查间隔(秒)
weight -5 # 失败时优先级降低值(-253~253)
rise 2 # 成功次数阈值(连续成功2次才认为健康)
fall 3 # 失败次数阈值(连续失败3次才认为不健康)
user keepalived_script
}
vrrp_instance VI_1 {
state BACKUP # 主备都设置BACKUP
interface ens32 # 通信(心跳)接口为ens32,此参数备节点设置和主节点相同
virtual_router_id 51 # 实例ID为51,要和备节点相同
priority 90 # 优先级(1-255)master节点高于backup节点
advert_int 1
#nopreempt
preempt_delay 60 # 1分钟延迟
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.148.200/24 dev ens32 label ens32:1 # VIP配置
}
track_script { # 关联健康检查脚本
chk_nginx
}
# 脑裂检测脚本
notify_master "/opt/scripts/check_split_brain.sh"
}
EOF检测脚本
环境准备
bash
# 需要安装sshpass进行免密免指纹操作
dnf -y install sshpass仲裁脚本
bash
cat > /opt/scripts/check_split_brain.sh << "EOF"
#!/bin/bash
# 脑裂检测脚本
REMOTE_IP="192.168.148.210" # 对端IP
VIP="192.168.148.200"
LOG_FILE="/var/log/keepalived/check_split_brain.log"
# 日志函数
function log() {
local level="${2:-INFO}" # 默认日志级别
echo "[$(date '+%F %T')] [$level] $1" | tee -a "$LOG_FILE"
}
# 检查对端是否也持有VIP
ping -c 1 -W 1 $REMOTE_IP >/dev/null 2>&1
if [ $? -eq 0 ]; then
# 对端可达,检查是否也绑定了VIP
sshpass -p "123456" ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no root@$REMOTE_IP \
"ip addr show | grep $VIP" >/dev/null 2>&1
if [ $? -eq 0 ]; then
log "发现脑裂!立即释放VIP" "ERROR"
sudo -n /usr/bin/systemctl stop keepalived 2>&1
# 发送告警通知
# curl -X POST "your_alert_webhook" -d "Split brain detected on $(hostname)"
exit 1
fi
fi
log "未发现脑裂,集群正常"
exit 0
EOF
chmod +x /opt/scripts/check_split_brain.sh
chown keepalived_script:keepalived_script /opt/scripts/check_split_brain.sh检测Nginx服务脚本
bash
cat > /opt/scripts/check_nginx.sh << "EOF"
#!/bin/bash
VIP="192.168.148.200"
CHECK_URL="http://127.0.0.1/health"
LOG_FILE="/var/log/keepalived/check_nginx.log"
SERVER_PORT="80"
# 日志函数
function log() {
local level="${2:-INFO}" # 默认日志级别
echo "[$(date '+%F %T')] [$level] $1" | tee -a "$LOG_FILE"
}
function notify(){
local level=${1:-"INFO"}
# 发送告警通知
# curl -X POST "your_alert_webhook" -d "Split brain detected on $(hostname)"
if [ "$level" = "ERROR" ];then
sudo -n /usr/bin/systemctl stop keepalived 2>&1
sleep 2
if systemctl is-active --quiet keepalived; then
log "无法停止keepalived,尝试强制停止" "CRITICAL"
sudo -n /usr/bin/systemctl kill --signal=SIGKILL keepalived
fi
fi
}
# 检查进程是否存在
if ! systemctl is-active --quiet nginx; then
log "Nginx 服务未运行" "ERROR"
notify "ERROR"
exit 1
fi
# 检查端口是否监听
ss -tlnp | grep ":$SERVER_PORT " >/dev/null 2>&1
if [ $? -ne 0 ]; then
log "Nginx port $SERVER_PORT 无法请求"
notify "ERROR"
exit 1
fi
# 真实HTTP请求检查(关键)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 5 $CHECK_URL)
if [ "$HTTP_CODE" != "200" ]; then
log "Nginx 健康检查失败, HTTP code: $HTTP_CODE"
# 尝试重启nginx
systemctl restart nginx
sleep 2
# 再次检查
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 5 $CHECK_URL)
if [ "$HTTP_CODE" != "200" ]; then
log "Nginx 重启检查失败"
notify "ERROR"
exit 1
fi
fi
# 检查系统资源
LOAD=$(awk '{print $1}' /proc/loadavg)
if (( $(echo "$LOAD > 10" | bc -l) )); then
log "系统负载过高: $LOAD" "WARING"
notify "WARING"
exit 1
fi
# 检查内存使用
MEM_USAGE=$(awk '/MemTotal/{total=$2}/MemAvailable/{avail=$2}END{printf("%.2f", (total-avail)/total*100)}' /proc/meminfo)
if (( $(echo "$MEM_USAGE > 90" | bc -l) )); then
log "内存使用过高: $MEM_USAGE%" "WARING"
notify "WARING"
exit 1
fi
log "Nginx 健康检查正常"
exit 0
EOF
chmod +x /opt/scripts/check_nginx.sh
chown keepalived_script:keepalived_script /opt/scripts/check_nginx.sh启动服务
bash
systemctl start keepalived
systemctl enable keepalived高可用测试
bash
# 查看lb-master服务器的VIP信息
ip a
# 请求VIP地址检测高可用
curl 192.168.148.200
# 停止lb-master服务器keepalived服务模拟宕机
systemctl stop kepalived
# 再次请求VIP地址检测高可用是否可用
curl 192.168.148.200