Skip to content

Keepalived 高可用Nginx

环境准备

服务IP说明
lb-master192.168.148.210Keepalived主服务器(Nginx主负载均衡器)
lb-backup192.168.148.211Keepalived备服务器(Nginx备负载均衡器)
web-01192.168.148.212web01服务器
web-02192.168.148.213web02服务器
VIP192.168.148.200虚拟VIP地址

安装Nginx服务

所有服务器安装

bash
dnf -y install nginx
hostname -I | awk '{print $1}' > /usr/shar/nginx/html/index.html
systemctl start nginx

配置负载均衡

在lb-master和lb-backup修改/etc/nginx/nginx.conf,修改完重新加载nginx

ini
http {
    upstream backend {
        # 默认轮询策略
        server 192.168.148.210:80;
        server 192.168.148.211:80;
    }
    
    server {
        listen 80;
        
        location / {
            proxy_pass http://backend;
            proxy_set_header Host $host;
        }
    }
    
}

在nginx配置中添加健康检查接口

nginx
cat > /etc/nginx/default.d/health.conf << "EOF"
location /health {
    access_log off;
    return 200 "healthy\n";
    add_header Content-Type text/plain;
}
EOF

部署keepalived

在lb-master和lb-backup安装

安装

bash
dnf -y install keepalived

# 
useradd -Ms /sbin/nologin keepalived_script
# 创建存放脚目录
mkdir -p /opt/scripts /var/log/keepalived
chown keepalived_script:keepalived_script /var/log/keepalived

# 设置sudo权限
echo "keepalived_script ALL=(root) NOPASSWD: /usr/bin/systemctl stop keepalived, /usr/bin/systemctl kill keepalived" > /etc/sudoers.d/keepalived_script
chmod 440 /etc/sudoers.d/keepalived_script

lb-master

bash
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived

global_defs {
   router_id LB_01				# 不能和其他Keepalived节点相同(全局唯一)
   enable_script_security
   script_user keepalived_script
}
vrrp_script chk_nginx {           		# 自定义健康检查脚本
    script "/opt/scripts/check_nginx.sh"	# 脚本路径(需可执行权限)
    interval 2                    	# 检查间隔(秒)
    weight -5                     	# 失败时优先级降低值(-253~253)
    rise 2                        	# 成功次数阈值(连续成功2次才认为健康)
    fall 3                        	# 失败次数阈值(连续失败3次才认为不健康)
    user keepalived_script
}
vrrp_instance VI_1 {
    state BACKUP				# 主备都设置BACKUP
    interface ens32				# 通信(心跳)接口为ens32,此参数备节点设置和主节点相同
    virtual_router_id 51			# 实例ID为51,要和备节点相同
    priority 100				# 优先级(1-255)master节点高于backup节点
    advert_int 1
    #nopreempt
    preempt_delay 60   			# 1分钟延迟
    
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.148.200/24 dev ens32 label ens32:1	# VIP配置
    }
    track_script {               # 关联健康检查脚本
        chk_nginx
    }
    # 脑裂检测脚本
    notify_master "/opt/scripts/check_split_brain.sh"
}
EOF

lb-backup

bash
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived

global_defs {
   router_id LB_02				# 不能和其他Keepalived节点相同(全局唯一)
   enable_script_security
   script_user keepalived_script
}
vrrp_script chk_nginx {           		# 自定义健康检查脚本
    script "/opt/scripts/check_nginx.sh"	# 脚本路径(需可执行权限)
    interval 2                    	# 检查间隔(秒)
    weight -5                     	# 失败时优先级降低值(-253~253)
    rise 2                        	# 成功次数阈值(连续成功2次才认为健康)
    fall 3                        	# 失败次数阈值(连续失败3次才认为不健康)
    user keepalived_script
}
vrrp_instance VI_1 {
    state BACKUP				# 主备都设置BACKUP
    interface ens32				# 通信(心跳)接口为ens32,此参数备节点设置和主节点相同
    virtual_router_id 51			# 实例ID为51,要和备节点相同
    priority 90						# 优先级(1-255)master节点高于backup节点
    advert_int 1
    #nopreempt
    preempt_delay 60   			# 1分钟延迟
    
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.148.200/24 dev ens32 label ens32:1	# VIP配置
    }
    track_script {               # 关联健康检查脚本
        chk_nginx
    }
    # 脑裂检测脚本
    notify_master "/opt/scripts/check_split_brain.sh"
}
EOF

检测脚本

环境准备

bash
# 需要安装sshpass进行免密免指纹操作
dnf -y install sshpass

仲裁脚本

bash
cat > /opt/scripts/check_split_brain.sh << "EOF"
#!/bin/bash

# 脑裂检测脚本
REMOTE_IP="192.168.148.210"			# 对端IP
VIP="192.168.148.200"
LOG_FILE="/var/log/keepalived/check_split_brain.log"

# 日志函数
function log() {
    local level="${2:-INFO}" # 默认日志级别
    echo "[$(date '+%F %T')] [$level] $1" | tee -a "$LOG_FILE"
}
# 检查对端是否也持有VIP
ping -c 1 -W 1 $REMOTE_IP >/dev/null 2>&1
if [ $? -eq 0 ]; then
    # 对端可达,检查是否也绑定了VIP
    sshpass -p "123456" ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no root@$REMOTE_IP \
        "ip addr show | grep $VIP" >/dev/null 2>&1
    
    if [ $? -eq 0 ]; then
        log "发现脑裂!立即释放VIP" "ERROR"
        sudo -n /usr/bin/systemctl stop keepalived 2>&1
        # 发送告警通知
        # curl -X POST "your_alert_webhook" -d "Split brain detected on $(hostname)"
        exit 1
    fi
fi
log "未发现脑裂,集群正常"
exit 0
EOF

chmod +x /opt/scripts/check_split_brain.sh
chown keepalived_script:keepalived_script /opt/scripts/check_split_brain.sh

检测Nginx服务脚本

bash
cat > /opt/scripts/check_nginx.sh << "EOF"
#!/bin/bash

VIP="192.168.148.200"
CHECK_URL="http://127.0.0.1/health"
LOG_FILE="/var/log/keepalived/check_nginx.log"
SERVER_PORT="80"

# 日志函数
function log() {
    local level="${2:-INFO}" # 默认日志级别
    echo "[$(date '+%F %T')] [$level] $1" | tee -a "$LOG_FILE"
}
function notify(){
  local level=${1:-"INFO"}
  # 发送告警通知
  # curl -X POST "your_alert_webhook" -d "Split brain detected on $(hostname)"
  
  if [ "$level" = "ERROR" ];then
      sudo -n /usr/bin/systemctl stop keepalived 2>&1
      sleep 2
      if systemctl is-active --quiet keepalived; then
        log "无法停止keepalived,尝试强制停止" "CRITICAL"
        sudo -n /usr/bin/systemctl kill --signal=SIGKILL keepalived
    fi
  fi
}

# 检查进程是否存在
if ! systemctl is-active --quiet nginx; then
    log "Nginx 服务未运行" "ERROR"
    notify "ERROR"
    exit 1
fi

# 检查端口是否监听
ss -tlnp | grep ":$SERVER_PORT " >/dev/null 2>&1
if [ $? -ne 0 ]; then
    log "Nginx port $SERVER_PORT 无法请求"
    notify "ERROR"
    exit 1
fi

# 真实HTTP请求检查(关键)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 5 $CHECK_URL)
if [ "$HTTP_CODE" != "200" ]; then
    log "Nginx 健康检查失败, HTTP code: $HTTP_CODE"
    # 尝试重启nginx
    systemctl restart nginx
    sleep 2
    # 再次检查
    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 2 --max-time 5 $CHECK_URL)
    if [ "$HTTP_CODE" != "200" ]; then
        log "Nginx 重启检查失败"
        notify "ERROR"
        exit 1
    fi
fi

# 检查系统资源
LOAD=$(awk '{print $1}' /proc/loadavg)
if (( $(echo "$LOAD > 10" | bc -l) )); then
    log "系统负载过高: $LOAD" "WARING"
    notify "WARING"
    exit 1
fi

# 检查内存使用
MEM_USAGE=$(awk '/MemTotal/{total=$2}/MemAvailable/{avail=$2}END{printf("%.2f", (total-avail)/total*100)}' /proc/meminfo)
if (( $(echo "$MEM_USAGE > 90" | bc -l) )); then
    log "内存使用过高: $MEM_USAGE%" "WARING"
    notify "WARING"
    exit 1
fi

log "Nginx 健康检查正常"
exit 0

EOF

chmod +x /opt/scripts/check_nginx.sh
chown keepalived_script:keepalived_script /opt/scripts/check_nginx.sh

启动服务

bash
systemctl start keepalived
systemctl enable keepalived

高可用测试

bash
# 查看lb-master服务器的VIP信息
ip a

# 请求VIP地址检测高可用
curl 192.168.148.200

# 停止lb-master服务器keepalived服务模拟宕机
systemctl stop kepalived

# 再次请求VIP地址检测高可用是否可用
curl 192.168.148.200