Skip to content

Kubernetes-高可用集群

此案例全部基于openEuler 24.03(LTS)操作系统操作

环境准备

主机准备

主机IP地址主机名主机配置(最少)存储(最少)主机角色软件列表
192.168.148.191k8s-master012C4G40Gmasterkube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce
192.168.148.192k8s-master022C4G40Gmasterkube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce
192.168.148.193k8s-master032C4G40Gmasterkube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce
192.168.148.194k8s-worker011C2G40Gworkerkubelet、kube-proxy、docker-ce
192.168.148.195k8s-worker021C2G40Gworkerkubelet、kube-proxy、docker-ce
192.168.148.196k8s-ha-master1C1G20GLBhaproxy、keepalived
192.168.148.197k8s-ha-backup1C1G20GLBhaproxy、keepalived
192.168.148.190----VIP

Kubernetes 网络划分

网络类型网段用途说明
Node网络192.168.148.0/24物理主机通信网络,承载节点间SSH、API Server访问等基础流量
Service网络10.96.0.0/16ClusterIP服务虚拟IP段,通过kube-proxy实现内部服务发现和负载均衡
Pod网络10.244.0.0/16容器运行时网络空间,由CNI插件(如Calico/Flannel)分配,每个Pod获得独立IP地址

磁盘扩容

bash
pvcreate /dev/sdb
vgextend openeuler /dev/sdb
lvextend -l +100%FREE /dev/mapper/openeuler-root 
resize2fs /dev/mapper/openeuler-root

配置主机名和hosts

bash
# 设置主机名:对应主机设置对应的主机名
hostnamectl set-hostname k8s-master01 && exec bash
hostnamectl set-hostname k8s-master02 && exec bash
hostnamectl set-hostname k8s-master03 && exec bash
hostnamectl set-hostname k8s-worker01 && exec bash
hostnamectl set-hostname k8s-worker02 && exec bash
hostnamectl set-hostname k8s-ha-master && exec bash
hostnamectl set-hostname k8s-ha-backup && exec bash

# 设置主机hosts:所有k8s集群设置
cat >> /etc/hosts << EOF
192.168.148.191 k8s-master01
192.168.148.192 k8s-master02
192.168.148.193 k8s-master03
192.168.148.194 k8s-worker01
192.168.148.195 k8s-worker02
192.168.148.196 k8s-ha-master
192.168.148.197 k8s-ha-backup
EOF

关闭防火墙和SELinux

在所有主机上执行包括k8s和ha

bash
# 临时关闭selinux
setenforce 0
# 永久关闭(需要重启生效)
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config

# 关闭防火墙
systemctl disable firewalld --now

关闭swap

所有k8s集群主机执行

bash
# 临时关闭
swapoff -a && sysctl -w vm.swappiness=0
# 永久关闭
sed -ri 's/.*swap.*/#&/' /etc/fstab

修改句柄数 ulimit

所有主机执行包括k8s和ha

bash
# 临时生效
ulimit -SHn 65535

# 永久生效
vi /etc/security/limits.conf
# 末尾添加如下内容
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited

ssh免密

所有主机执行包括k8s和ha,免密需要主机之间互信,所有需要都执行免密的操作

bash
# 生成密钥对
ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N ""

# 安装sshpass
dnf -y install sshpass

# 将公钥上传到目标主机
for i in k8s-master01 k8s-master02 k8s-master03 k8s-worker01 k8s-worker02 k8s-ha-master k8s-ha-backup;do 
	sshpass -p "123456" ssh-copy-id -i ~/.ssh/id_rsa.pub -o StrictHostKeyChecking=no root@$i
done

安装ipvs

所有主机执行

bash
# 安装软件
dnf install ipvsadm ipset sysstat conntrack libseccomp -y

# 修改ipvs配置,加入以下内容,配置ipvsadm模块加载方式,主要设置开机自启
cat >> /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF

# 授权、运行、检查是否加载
chmod 755 /etc/sysconfig/modules/ipvs.modules && exec bash
/etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack

内核优化

所有节点执行

bash
# k8s.conf
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
net.ipv4.conf.all.route_localnet = 1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16768
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16768
EOF

# 开启sysctl.conf的ipv4转发功能
sed -i.bak 's/net.ipv4.ip_forward=0/net.ipv4.ip_forward=1/' /etc/sysctl.conf 

# 内核生效
sysctl --system

安装常用工具(可选)

所有节点执行

bash
dnf install wget jq psmisc vim net-tools telnet lvm2 git lrzsz -y

创建目录

所有节点执行

bash
mkdir -p /etc/kubernetes/pki

安装haproxy和keepalived

安装软件

只在k8s-ha-master和k8s-ha-backup上执行,配置VIP

bash
dnf -y install haproxy keepalived

# 创建程序用户
useradd -Ms /sbin/nologin keepalived

haproxy 配置

只在k8s-ha-master和k8s-ha-backup上执行

bash
cat > /etc/haproxy/haproxy.cfg << "EOF"
global
  maxconn 2000
  ulimit-n 16384
  log 127.0.0.1:514 local0 info
  stats timeout 30s
  tune.ssl.default-dh-param 2048

defaults
  log global
  mode http
  option httplog
  timeout connect 5000
  timeout client 50000
  timeout server 50000
  timeout http-request 15s
  timeout http-keep-alive 15s

frontend monitor-in
  bind *:33305
  mode http
  option httplog
  stats enable
  stats uri /monitor
  stats auth admin:SecurePassword123!

frontend k8s-master
  bind 0.0.0.0:6443
  bind 127.0.0.1:6443
  mode tcp
  option tcplog
  tcp-request inspect-delay 5s
  tcp-request content accept if { req.ssl_hello_type 1 }
  default_backend k8s-master

backend k8s-master
  mode tcp
  option tcplog
  option tcp-check
  option tcp-smart-accept
  balance roundrobin
  server k8s-master01 192.168.148.191:6443 check inter 10s rise 2 fall 2 maxconn 250
  server k8s-master02 192.168.148.192:6443 check inter 10s rise 2 fall 2 maxconn 250
  server k8s-master03 192.168.148.193:6443 check inter 10s rise 2 fall 2 maxconn 250
EOF

keepalived 配置

k8s-ha-master

bash
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived

global_defs {
  router_id KUBERNETES_HA				# 标识虚拟路由器组,主备相同
  script_user keepalived
  enable_script_security
}

vrrp_script chk_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 5
  weight -5
  fall 2
  rise 1
  user keepalived
}

vrrp_instance VI_1 {
  state MASTER							# 主节点名
  interface ens32
  mcast_src_ip 192.168.148.196			# 当前节点IP
  virtual_router_id 51					# 集群内统一
  priority 100							# 范围 1-255,分数高于backup
  advert_int 2
  authentication {
    auth_type PASS
    auth_pass K8SHA_KA_AUTH
  }
  virtual_ipaddress {
    192.168.148.190						# 设置VIP
  }
  track_script {
    chk_apiserver
  }
}
EOF

k8s-ha-backup

bash
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived

global_defs {
  router_id KUBERNETES_HA				# 标识虚拟路由器组,主备相同
  script_user keepalived
  enable_script_security
}

vrrp_script chk_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 5
  weight -5
  fall 2
  rise 1
  user keepalived
}

vrrp_instance VI_1 {
  state BACKUP							# 备节点名
  interface ens32
  mcast_src_ip 192.168.148.197			# 当前节点IP
  virtual_router_id 51					# 集群内统一
  priority 99							# 范围 1-255,分数低于master
  advert_int 2
  authentication {
    auth_type PASS
    auth_pass K8SHA_KA_AUTH
  }
  virtual_ipaddress {
    192.168.148.190						# 设置VIP
  }
  track_script {
    chk_apiserver
  }
}
EOF

健康检测脚本

只在k8s-ha-master和k8s-ha-backup上执行

bash
cat > /etc/keepalived/check_apiserver.sh <<"EOF"
#!/bin/bash

err=0
for k in $(seq 1 3); do
    if ! systemctl is-active haproxy >/dev/null 2>&1; then
        err=$((err + 1))
        sleep 1
        continue
    else
        err=0
        break
    fi
done

if [ "$err" -ge 3 ]; then
    systemctl stop keepalived
    exit 1
else
    exit 0
fi
EOF

chown keepalived:keepalived /etc/keepalived/check_apiserver.sh
chmod +x /etc/keepalived/check_apiserver.sh

启动服务

只在k8s-ha-master和k8s-ha-backup上执行

bash
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalived

K8s集群搭建

安装Docker

安装服务

所有k8s主机执行

bash
# 注册阿里云repo
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

# 替换centos识别版本号
sed -i 's+$releasever+9+' /etc/yum.repos.d/docker-ce.repo

# 安装docker-ce和依赖
dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin

Docker-配置文件

bash
cat > /etc/docker/daemon.json << EOF
{
    "data-root":"/data/docker",
    "registry-mirrors":[
        "https://uynf11ba.mirror.aliyuncs.com",
        "https://docker.m.daocloud.io",
        "https://noohub.ru",
        "https://huecker.io",
        "https://dockerhub.timeweb.cloud"
    ],
    "bip": "172.19.0.1/16",
    "iptables": true,
    "ip-masq": true,
    "exec-opts":["native.cgroupdriver=systemd"],
    "live-restore":true,
    "ipv6": false
}
EOF

# 创建docker工作目录
mkdir /data/docker -p
# 启动docker
systemctl daemon-reload 
systemctl enable docker --now

安装cri-dockerd

CAUTION

Kubernetes 1.24+ 版本已彻底移除 dockershim,即使你安装了 Docker,也需要通过 cri-dockerd 适配器才能兼容,因此我们这里使用containerd 作为容器引擎。

所有k8s主机都安装

bash
# 下载
wget https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.14/cri-dockerd-0.3.14-3.el7.x86_64.rpm
# 安装
dnf -y install ./cri-dockerd-0.3.14-3.el7.x86_64.rpm

#修改ExecStart参数 指向阿里云
sed -i 's,^ExecStart.*,& --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.9,' /usr/lib/systemd/system/cri-docker.service

#启动cri-docker
systemctl daemon-reload
systemctl enable cri-docker --now
systemctl status cri-docker

证书工具

证书工具生成的证书在k8s-master01节点执行即可

下载证书工具

bash
# 下载cfssl
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.4/cfssl_1.6.4_linux_amd64
chmod +x cfssl_1.6.4_linux_amd64
mv cfssl_1.6.4_linux_amd64 /usr/local/bin/cfssl

# 下载cfssljson
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.4/cfssljson_1.6.4_linux_amd64
chmod +x cfssljson_1.6.4_linux_amd64
mv cfssljson_1.6.4_linux_amd64 /usr/local/bin/cfssljson

ca根配置

bash
mkdir -p /etc/kubernetes/pki

cat > /etc/kubernetes/pki/ca-config.json <<"EOF"
{
  "signing": {
    "default": {
      "expiry": "87600h"
    },
    "profiles": {
      "kubernetes": {
        "usages": [
          "signing",
          "key encipherment",
          "server auth",
          "client auth"
        ],
        "expiry": "87600h"
      }
    }
  }
}
EOF

ca签名请求

CSR是Certificate Signing Request的英文缩写,即证书签名请求文件

bash
cat > /etc/kubernetes/pki/ca-csr.json <<"EOF"
{
  "CN": "kubernetes",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Beijing",
      "L": "Beijing",
      "O": "kubemsb",
      "OU": "CN"
    }
  ],
  "ca": {
     "expiry": "87600h"
  }
}
EOF
  • CN(Common Name):公用名(Common Name)必须填写,一般可以是网站域
  • O(Organization):Organization(组织名)是必须填写的,如果申请的是OV、EV型证书,组织名称必须严格和企业在政府登记名称一致,一般需要和营业执照上的名称完全一致。不可以使用缩写或者商标。如果需要使用英文名称,需要有DUNS编码或者律师信证明。
  • OU(Organization Unit):OU单位部门,这里一般没有太多限制,可以直接填写IT DEPT等皆可。
  • C(City):City是指申请单位所在的城市。
  • ST(State/Province):ST是指申请单位所在的省份。
  • C(Country Name):C是指国家名称,这里用的是两位大写的国家代码,中国是CN。

生成证书

bash
cd /etc/kubernetes/pki
cfssl gencert -initca ca-csr.json | cfssljson -bare ca

etcd高可用搭建

etcd文档

下载etcd

在k8s-master{1,2,3}节点所有的master节点安装

bash
# 给所有master节点,发送etcd包准备部署etcd高可用
wget https://github.com/etcd-io/etcd/releases/download/v3.5.16/etcd-v3.5.16-linux-amd64.tar.gz

etcd证书

https://etcd.io/docs/next/op-guide/hardware/#small-cluster 安装参考

在k8s-master01节点生成然后同步到其他master节点,这里需要修改master节点IP地址

bash
cat > /etc/kubernetes/pki/etcd-csr.json <<"EOF"
{
  "CN": "etcd",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "hosts": [
    "127.0.0.1",
    "k8s-master01",
    "k8s-master02",
    "k8s-master03",
    "192.168.148.191",
    "192.168.148.192",
    "192.168.148.193"
  ],
  "names": [
    {
        "C": "CN",
        "L": "beijing",
        "O": "etcd",
        "ST": "beijing",
        "OU": "System"
    }
  ]
}
EOF

# 生成证书

cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes etcd-csr.json | cfssljson -bare etcd

# 把生成的etcd证书,复制给其他master节点
# 另外两个master节点需要创建目录/etc/kubernetes/pki/
mkdir -p /etc/kubernetes/pki/

for i in k8s-master02 k8s-master03;
do 
	scp -r /etc/kubernetes/pki/* root@$i:/etc/kubernetes/pki
done

安装etcd

bash
tar -xf etcd-v3.5.16-linux-amd64.tar.gz
cp -p etcd-v3.5.16-linux-amd64/etcd* /usr/local/bin/

# 向其它master节点分发etcd软件
scp etcd-v3.5.16-linux-amd64/etcd* k8s-master02:/usr/local/bin/
scp etcd-v3.5.16-linux-amd64/etcd* k8s-master03:/usr/local/bin/

etcd配置信息

k8s-master01

在k8s-master01节点上执行,需要修改IP地址

bash
mkdir -p /etc/etcd
mkdir -p /data/etcd

cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd1"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
k8s-master02

在k8s-master02节点上执行

bash
mkdir -p /etc/etcd
mkdir -p /data/etcd

cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd2"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
k8s-master03

在k8s-master03节点上执行

bash
mkdir -p /etc/etcd
mkdir -p /data/etcd

cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd3"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
字段说明
  • ETCD_NAME:节点名称,集群中唯一
  • ETCD_DATA_DIR:数据目录
  • ETCD_LISTEN_PEER_URLS:集群通信监听地址
  • ETCD_LISTEN_CLIENT_URLS:客户端访问监听地址
  • ETCD_INITIAL_ADVERTISE_PEER_URLS:集群通告地址
  • ETCD_ADVERTISE_CLIENT_URLS:客户端通告地址
  • ETCD_INITIAL_CLUSTER:集群节点地址
  • ETCD_INITIAL_CLUSTER_TOKEN:集群Token
  • ETCD_INITIAL_CLUSTER_STATE:加入集群的当前状态,new是新集群,existing表示加入已有集群

配置service

在所有k8s-master节点执行

bash
cat > /usr/lib/systemd/system/etcd.service <<"EOF"
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=-/etc/etcd/etcd.conf
WorkingDirectory=/data/etcd
ExecStart=/usr/local/bin/etcd \
  --cert-file=/etc/kubernetes/pki/etcd.pem \
  --key-file=/etc/kubernetes/pki/etcd-key.pem \
  --trusted-ca-file=/etc/kubernetes/pki/ca.pem \
  --peer-cert-file=/etc/kubernetes/pki//etcd.pem \
  --peer-key-file=/etc/kubernetes/pki/etcd-key.pem \
  --peer-trusted-ca-file=/etc/kubernetes/pki/ca.pem \
  --peer-client-cert-auth \
  --client-cert-auth
Restart=on-failure
RestartSec=5
StandardOutput=syslog
StandardError=syslog
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF

# 加载&开机启动
systemctl daemon-reload
systemctl enable --now etcd
# 启动有问题,使用 journalctl -u 服务名排查
journalctl -u etcd

测试etcd访问

bash
# 验证etcd集群状态
etcdctl member list

各列的含义:
 ENDPOINT: etcd 集群中的节点地址。
 HEALTH: 节点的健康状况,true 表示节点正常,false 表示节点不健康。
 TOOK: 该健康检查请求所花的时间。
 ERROR: 如果某个节点不健康,这里会显示相关的错误信息。


ETCDCTL_API=3 /usr/local/bin/etcdctl \
--write-out=table \
--cacert=/etc/kubernetes/pki/ca.pem \
--cert=/etc/kubernetes/pki/etcd.pem \
--key=/etc/kubernetes/pki/etcd-key.pem \
--endpoints=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
endpoint health

k8s安装包下载

K8s离线安装包

https://github.com/kubernetes/kubernetes 找到changelog对应版本

可以在k8s-master01下载

bash
wget https://dl.k8s.io/v1.31.0/kubernetes-server-linux-amd64.tar.gz
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do 
	scp kubernetes-server-linux-amd64.tar.gz root@$i:/root/
done

master节点准备

所有master节点解压kubelet,kubectl等到 /usr/local/bin。

bash
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}

worker节点准备

master需要全部组件,worker节点只需要 /usr/local/bin kubelet、kube-proxy

bash
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,-proxy}

部署 apiserver

在k8s-master01节点执行然后同步到其它master节点

创建csr证书

bash
# 10.96.0.1 为service网段。可以自定义 如: 66.66.0.1
# 192.168.148.190 是高可用VIP
# 说明:
# 如果 hosts 字段不为空则需要指定授权使用该证书的 IP(含VIP) 或域名列表。由于该证书被 集群使
# 用,需要将节点的IP都填上,为了方便后期扩容可以多写几个预留的IP。同时还需要填写 service 网络的
# 首个IP(一般是 kube-apiserver 指定的 service-cluster-ip-range 网段的第一个IP,如10.96.0.1)。

cat > /etc/kubernetes/pki/kube-apiserver-csr.json << "EOF"
{
  "CN": "kube-apiserver",
  "hosts": [
    "10.96.0.1",
    "127.0.0.1",
    "192.168.148.190",
    "192.168.148.191",
    "192.168.148.192",
    "192.168.148.193",
    "192.168.148.194",
    "192.168.148.195",
    "kubernetes",
    "kubernetes.default",
    "kubernetes.default.svc",
    "kubernetes.default.svc.cluster",
    "kubernetes.default.svc.cluster.local"
  ],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "L": "BeiJing",
      "ST": "BeiJing",
      "O": "Kubernetes",
      "OU": "Kubernetes"
    }
  ]
}
EOF

# 生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-apiserver-csr.json | cfssljson -bare kube-apiserver

# 同步到其它master节点
for i in k8s-master02 k8s-master03;do 
	scp -r /etc/kubernetes/pki/* root@$i:/etc/kubernetes/pki
done

生成token

在k8s-master01节点执行然后同步到其它master节点

bash
# 说明:
# 创建TLS机制所需TOKEN TLS Bootstraping:Master apiserver启用TLS认证后,Node节点kubelet和kube-proxy与kube-apiserver进行通信,必须使用CA签发的有效证书才可以,当Node节点很多时,这种客户端证书颁发需要大量工作,同样也会增加集群扩展复杂度。为了简化流程,Kubernetes引入了TLS bootstraping机制来自动颁发客户端证书,kubelet会以一个低权限用户自动向apiserver申请证书,kubelet的证书由apiserver动态签署。所以强烈建议在Node上使用这种方式,目前主要用于kubelet,kube-proxy还是由我们统一颁发一个证书。

cat > /etc/kubernetes/token.csv << EOF
$(head -c 16 /dev/urandom | od -An -t x | tr -d ' '),kubelet-bootstrap,10001,"system:kubelet-bootstrap"
EOF

# 同步到其它master节点
for i in k8s-master02 k8s-master03;
do
	scp -r /etc/kubernetes/token.csv root@$i:/etc/kubernetes
done

配置apiserver服务

  • 所有Master节点创建kube-apiserver.service ,
  • 以下文档使用的k8s service网段为10.96.0.0/16 ,该网段不能和宿主机的网段、Pod网段的重复
  • 特别注意:docker的网桥默认为 172.17.0.1/16 。不要使用这个网段

在所有k8s-master节点执行

bash
cat > /etc/kubernetes/kube-apiserver.conf << EOF
KUBE_APISERVER_OPTS="--enable-admission-plugins=NamespaceLifecycle,NodeRestriction,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota \
--anonymous-auth=false \
--bind-address=$(hostname -I | awk '{print $1}') \
--advertise-address=$(hostname -I | awk '{print $1}') \
--authorization-mode=Node,RBAC \
--runtime-config=api/all=true \
--enable-bootstrap-token-auth \
--service-cluster-ip-range=10.96.0.0/16 \
--token-auth-file=/etc/kubernetes/token.csv \
--service-node-port-range=30000-32767 \
--tls-cert-file=/etc/kubernetes/pki/kube-apiserver.pem \
--tls-private-key-file=/etc/kubernetes/pki/kube-apiserver-key.pem \
--client-ca-file=/etc/kubernetes/pki/ca.pem \
--kubelet-client-certificate=/etc/kubernetes/pki/kube-apiserver.pem \
--kubelet-client-key=/etc/kubernetes/pki/kube-apiserver-key.pem \
--service-account-key-file=/etc/kubernetes/pki/ca-key.pem \
--service-account-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
--service-account-issuer=api \
--etcd-cafile=/etc/kubernetes/pki/ca.pem \
--etcd-certfile=/etc/kubernetes/pki/etcd.pem \
--etcd-keyfile=/etc/kubernetes/pki/etcd-key.pem \
--etcd-servers=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
--allow-privileged=true \
--apiserver-count=3 \
--audit-log-maxage=30 \
--audit-log-maxbackup=3 \
--audit-log-maxsize=100 \
--audit-log-path=/var/log/kube-apiserver-audit.log \
--event-ttl=1h \
--v=4"
EOF

# 创建service管理文件
cat > /usr/lib/systemd/system/kube-apiserver.service << "EOF"
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=etcd.service
Wants=etcd.service
[Service]
EnvironmentFile=-/etc/kubernetes/kube-apiserver.conf
ExecStart=/usr/local/bin/kube-apiserver $KUBE_APISERVER_OPTS
Restart=on-failure
RestartSec=5
Type=notify
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF

# 启动apiserver服务
systemctl daemon-reload
systemctl enable --now kube-apiserver
# 查看状态
systemctl status kube-apiserver

# 验证访问(返回json格式code:401是正常的)
curl --insecure https://k8s-master01:6443

部署 kubectl

这个操作在k8s-master01节点执行即可

创建csr证书

bash
cat > /etc/kubernetes/pki/admin-csr.json << "EOF"
{
  "CN": "admin",
  "hosts": [],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
        "C": "CN",
        "ST": "Beijing",
        "L": "Beijing",
        "O": "system:masters",
        "OU": "system"
    }
  ]
}
EOF

# 生成证书
cd /etc/kubernetes/pki/
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes admin-csr.json | cfssljson -bare admin

生成配置文件

这里需要修改VIP地址

bash
cd /etc/kubernetes/pki

kubectl config set-cluster kubernetes \
  --certificate-authority=ca.pem \
  --embed-certs=true \
  --server=https://192.168.148.190:6443 \
  --kubeconfig=kube.config

kubectl config set-credentials admin \
  --client-certificate=admin.pem \
  --client-key=admin-key.pem \
  --embed-certs=true \
  --kubeconfig=kube.config

kubectl config set-context kubernetes --cluster=kubernetes --user=admin --kubeconfig=kube.config

kubectl config use-context kubernetes --kubeconfig=kube.config

进行角色绑定

bash
mkdir ~/.kube
cp kube.config ~/.kube/config
kubectl create clusterrolebinding kube-apiserver:kubelet-apis --clusterrole=system:kubelet-api-admin --user kubernetes --kubeconfig=/root/.kube/config

查看集群状态

bash
export KUBECONFIG=$HOME/.kube/config
kubectl cluster-info

# 这里应该可以看到etcd-0的STATUS:Healthy
kubectl get componentstatuses
kubectl get all --all-namespaces

同步到其他master节点上

bash
# k8s-master02,k8s-master03节点创建目录
mkdir ~/.kube

scp /root/.kube/config k8s-master02:/root/.kube/config
scp /root/.kube/config k8s-master03:/root/.kube/config

# 查看集群状态
kubectl cluster-info

部署 controller-manage

这个操作在k8s-master01节点执行即可

创建csr证书

bash
cat > /etc/kubernetes/pki/kube-controller-manager-csr.json << "EOF"
{
  "CN": "system:kube-controller-manager",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "hosts": [
    "127.0.0.1",
    "192.168.148.191",
    "192.168.148.192",
    "192.168.148.193"
  ],
  "names": [
    {
      "C": "CN",
      "ST": "Beijing",
      "L": "Beijing",
      "O": "system:kube-controller-manager",
      "OU": "system"
    }
  ]
}
EOF

#说明:hosts 列表包含所有 kube-controller-manager 节点 IP

签发证书

bash
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager

创建kubeconfig

bash
cd /etc/kubernetes/pki

# 1. 设置集群配置
kubectl config set-cluster kubernetes \
  --certificate-authority=ca.pem \
  --embed-certs=true \
  --server=https://192.168.148.190:6443 \
  --kubeconfig=kube-controller-manager.kubeconfig

# 2. 设置用户凭证
kubectl config set-credentials system:kube-controller-manager \
  --client-certificate=kube-controller-manager.pem \
  --client-key=kube-controller-manager-key.pem \
  --embed-certs=true \
  --kubeconfig=kube-controller-manager.kubeconfig

# 3. 设置上下文
kubectl config set-context system:kube-controller-manager \
  --cluster=kubernetes \
  --user=system:kube-controller-manager \
  --kubeconfig=kube-controller-manager.kubeconfig

# 4. 使用上下文
kubectl config use-context system:kube-controller-manager \
  --kubeconfig=kube-controller-manager.kubeconfig

# 5. 移动配置文件
mv kube-controller-manager.kubeconfig /etc/kubernetes/

生成controller-manager配置文件

bash
cat > /etc/kubernetes/kube-controller-manager.conf << "EOF"
KUBE_CONTROLLER_MANAGER_OPTS= \
  --secure-port=10257 \
  --bind-address=0.0.0.0 \
  --kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \
  --service-cluster-ip-range=10.96.0.0/16 \
  --cluster-name=kubernetes \
  --cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \
  --cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
  --allocate-node-cidrs=true \
  --cluster-cidr=10.244.0.0/16 \
  --root-ca-file=/etc/kubernetes/pki/ca.pem \
  --service-account-private-key-file=/etc/kubernetes/pki/ca-key.pem \
  --leader-elect=true \
  --feature-gates=RotateKubeletServerCertificate=true \
  --controllers=*,bootstrapsigner,tokencleaner \
  --tls-cert-file=/etc/kubernetes/pki/kube-controller-manager.pem \
  --tls-private-key-file=/etc/kubernetes/pki/kube-controller-manager-key.pem \
  --use-service-account-credentials=true \
  --v=2"
EOF

创建service管理文件

所有master节点都执行

bash
cat > /usr/lib/systemd/system/kube-controller-manager.service << "EOF"
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=-/etc/kubernetes/kube-controller-manager.conf
ExecStart=/usr/local/bin/kube-controller-manager $KUBE_CONTROLLER_MANAGER_OPTS
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF

同步到其他master节点

bash
scp -pr /etc/kubernetes/pki/* k8s-master02:/etc/kubernetes/pki/
scp -pr /etc/kubernetes/pki/* k8s-master03:/etc/kubernetes/pki/

scp /etc/kubernetes/kube-controller-manager.kubeconfig /etc/kubernetes/kube-controller-manager.conf k8s-master02:/etc/kubernetes/
scp /etc/kubernetes/kube-controller-manager.kubeconfig /etc/kubernetes/kube-controller-manager.conf k8s-master03:/etc/kubernetes

启动

bash
systemctl daemon-reload
systemctl enable --now kube-controller-manager
systemctl status kube-controller-manager

部署kube-schedulers

这个操作在k8s-master01节点执行即可

创建csr证书

bash
cat > /etc/kubernetes/pki/kube-scheduler-csr.json << "EOF"
{
  "CN": "system:kube-scheduler",
  "hosts": [
    "127.0.0.1",
    "192.168.148.191",
    "192.168.148.192",
    "192.168.148.193"
  ],
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Beijing",
      "L": "Beijing",
      "O": "system:kube-scheduler",
      "OU": "system"
    }
  ]
}
EOF

签发证书

bash
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-scheduler-csr.json | cfssljson -bare kube-scheduler

生成配置

bash
cd /etc/kubernetes/pki

# 设置集群配置(注意文件名统一为 kube-scheduler.kubeconfig)
kubectl config set-cluster kubernetes \
  --certificate-authority=ca.pem \
  --embed-certs=true \
  --server=https://192.168.148.190:6443 \
  --kubeconfig=kube-scheduler.kubeconfig

# 设置用户凭证(修正文件名和路径)
kubectl config set-credentials system:kube-scheduler \
  --client-certificate=/etc/kubernetes/pki/kube-scheduler.pem \
  --client-key=/etc/kubernetes/pki/kube-scheduler-key.pem \
  --embed-certs=true \
  --kubeconfig=kube-scheduler.kubeconfig

# 设置上下文
kubectl config set-context system:kube-scheduler \
  --cluster=kubernetes \
  --user=system:kube-scheduler \
  --kubeconfig=kube-scheduler.kubeconfig

# 使用上下文
kubectl config use-context system:kube-scheduler \
  --kubeconfig=kube-scheduler.kubeconfig
  
cat > /etc/kubernetes/kube-scheduler.conf << "EOF"
KUBE_SCHEDULER_OPTS= \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig \
--leader-elect=true \
--v=2"
EOF

mv kube-scheduler.kubeconfig /etc/kubernetes/

创建sevice管理文件

所有master节点都执行

bash
cat > /usr/lib/systemd/system/kube-scheduler.service << "EOF"
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=-/etc/kubernetes/kube-scheduler.conf
ExecStart=/usr/local/bin/kube-scheduler $KUBE_SCHEDULER_OPTS
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF

同步文件至集群master节点

bash
scp /etc/kubernetes/pki/kube-scheduler*.pem k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/kube-scheduler*.pem k8s-master03:/etc/kubernetes/pki/

scp /etc/kubernetes/kube-scheduler.kubeconfig /etc/kubernetes/kube-scheduler.conf k8s-master02:/etc/kubernetes/
scp /etc/kubernetes/kube-scheduler.kubeconfig /etc/kubernetes/kube-scheduler.conf k8s-master03:/etc/kubernetes/

启动服务

bash
systemctl daemon-reload
systemctl enable --now kube-scheduler
systemctl status kube-scheduler

部署kubelet

这个操作在k8s-master01节点执行即可

创建kubeconfig

bash
cd /etc/kubernetes/pki

# 获取 bootstrap token
BOOTSTRAP_TOKEN=$(awk -F "," '{print $1}' /etc/kubernetes/token.csv)

# 设置集群配置(注意文件名统一为 kubelet-bootstrap.kubeconfig)
kubectl config set-cluster kubernetes \
  --certificate-authority=ca.pem \
  --embed-certs=true \
  --server=https://192.168.148.190:6443 \
  --kubeconfig=kubelet-bootstrap.kubeconfig


# 设置用户凭证(使用 bootstrap token)
kubectl config set-credentials kubelet-bootstrap \
  --token=${BOOTSTRAP_TOKEN} \
  --kubeconfig=kubelet-bootstrap.kubeconfig

# 设置上下文
kubectl config set-context default \
  --cluster=kubernetes \
  --user=kubelet-bootstrap \
  --kubeconfig=kubelet-bootstrap.kubeconfig

# 使用上下文
kubectl config use-context default \
  --kubeconfig=kubelet-bootstrap.kubeconfig

kubectl create clusterrolebinding cluster-system-anonymous \
  --clusterrole=cluster-admin \
  --user=kubelet-bootstrap

kubectl create clusterrolebinding kubelet-bootstrap \
  --clusterrole=system:node-bootstrapper \
  --user=kubelet-bootstrap \
  --kubeconfig=kubelet-bootstrap.kubeconfig

kubectl describe clusterrolebinding cluster-system-anonymous
kubectl describe clusterrolebinding kubelet-bootstrap

cp kubelet-bootstrap.kubeconfig /etc/kubernetes/

创建kubelet配置文件

在所有k8s节点执行

bash
cat > /etc/kubernetes/kubelet.json << EOF
{
  "kind": "KubeletConfiguration",
  "apiVersion": "kubelet.config.k8s.io/v1beta1",
  "authentication": {
    "x509": {
      "clientCAFile": "/etc/kubernetes/pki/ca.pem"
    },
    "webhook": {
      "enabled": true,
      "cacheTTL": "2m0s"
    },
    "anonymous": {
      "enabled": false
    }
  },
  "authorization": {
    "mode": "Webhook",
    "webhook": {
      "cacheAuthorizedTTL": "5m0s",
      "cacheUnauthorizedTTL": "30s"
    }
  },
  "address": "$(hostname -I | awk '{print $1}')",
  "port": 10250,
  "readOnlyPort": 10255,
  "cgroupDriver": "systemd",                    
  "hairpinMode": "promiscuous-bridge",
  "serializeImagePulls": false,
  "clusterDomain": "cluster.local.",
  "clusterDNS": ["10.96.0.2"]
}
EOF

# 说明:kubelet.json中address需要修改为当前主机IP地址。

创建service管理文件

bash
cat > /usr/lib/systemd/system/kubelet.service << "EOF"
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/kubernetes/kubernetes
After=docker.service
Requires=docker.service

[Service]
WorkingDirectory=/var/lib/kubelet
ExecStart=/usr/local/bin/kubelet \
  --bootstrap-kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig \
  --cert-dir=/etc/kubernetes/pki \
  --kubeconfig=/etc/kubernetes/kubelet.kubeconfig \
  --config=/etc/kubernetes/kubelet.json \
  --rotate-certificates \
  --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.9 \
  --container-runtime-endpoint=unix:///run/cri-dockerd.sock \
  --v=2
Restart=on-failure
RestartSec=5

[Install]
WantedBy=multi-user.target
EOF

同步到所有节点上

bash
for i in  k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do 
	scp /etc/kubernetes/kubelet-bootstrap.kubeconfig $i:/etc/kubernetes/
done

for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
	scp -r /etc/kubernetes/pki/*  root@$i:/etc/kubernetes/pki
done

创建目录及启动服务

在所有k8s节点执行

bash
mkdir -p /var/lib/kubelet
mkdir -p /var/log/kubernetes

systemctl daemon-reload
systemctl enable --now kubelet

systemctl status kubelet

注:如果kubelt报错找不到节点,请删除kubelet旧的证书重启kubelet。

bash
cd /etc/kubernetes/pki
rm -rf /etc/kubernetes/pki/kubelet-client-*.pem kubelet-client-current.pem kubelet.crt kubelet.key 
systemctl restart kubelet

部署kube-proxy

创建 csr 证书

bash
cat > /etc/kubernetes/pki/kube-proxy-csr.json << "EOF"
{
  "CN": "system:kube-proxy",
  "key": {
    "algo": "rsa",
    "size": 2048
  },
  "names": [
    {
      "C": "CN",
      "ST": "Beijing",
      "L": "Beijing",
      "O": "kubemsb",
      "OU": "CN"
    }
  ]
}
EOF

签发证书

bash
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy

创建kubeconfig文件

注意修改VIP地址

cd /etc/kubernetes/pki

kubectl config set-cluster kubernetes --certificate-authority=ca.pem --embed-certs=true --server=https://192.168.148.190:6443 --kubeconfig=kube-proxy.kubeconfig

kubectl config set-credentials kube-proxy --client-certificate=kube-proxy.pem --client-key=kube-proxy-key.pem --embed-certs=true --kubeconfig=kube-proxy.kubeconfig

kubectl config set-context default --cluster=kubernetes --user=kube-proxy --kubeconfig=kube-proxy.kubeconfig

kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig

mv kube-proxy.kubeconfig /etc/kubernetes/

创建服务配置文件

在所有k8s节点执行

bash
cat > /etc/kubernetes/kube-proxy.yaml << EOF
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress:  $(hostname -I | awk '{print $1}')
clientConnection:
  kubeconfig: /etc/kubernetes/kube-proxy.kubeconfig
clusterCIDR: 10.244.0.0/16
healthzBindAddress: $(hostname -I | awk '{print $1}'):10256
kind: KubeProxyConfiguration
metricsBindAddress: $(hostname -I | awk '{print $1}'):10249
mode: "ipvs"
EOF

# 说明:修改kube-proxy.yaml中IP地址为当前主机IP.

创建service管理文件

在所有k8s节点执行

bash
cat >  /usr/lib/systemd/system/kube-proxy.service << "EOF"
[Unit]
Description=Kubernetes Kube-Proxy Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target

[Service]
WorkingDirectory=/var/lib/kube-proxy
ExecStart=/usr/local/bin/kube-proxy \
  --config=/etc/kubernetes/kube-proxy.yaml \
  --v=2
Restart=on-failure
RestartSec=5
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

同步文件到集群工作节点主机

bash
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
	scp /etc/kubernetes/kube-proxy.kubeconfig /etc/kubernetes/kube-proxy.yaml $i:/etc/kubernetes/
done

#### 服务启动

bash
mkdir -p /var/lib/kube-proxy
systemctl daemon-reload
systemctl enable --now kube-proxy

systemctl status kube-proxy

安装网络组件calico

在k8s-master01执行即可

下载

bash
# tigera-operator
wget https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/tigera-operator.yaml

# custom-resources
wget https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/custom-resources.yaml

修改文件

bash
# 修改文件第13行,修改为使用kubeadm init ----pod-network-cidr对应的IP地址段
vim custom-resources.yaml
......
 11     ipPools:
 12     - blockSize: 26
 13       cidr: 10.244.0.0/16 
 14       encapsulation: VXLANCrossSubnet
......

应用文件

bash
# 应用资源清单文件
kubectl create -f tigera-operator.yaml
kubectl create -f custom-resources.yaml

# 监视calico-sysem命名空间中pod运行情况
watch -n 1 kubectl get pods -n calico-system
kubectl get pods -n calico-system

# 正常会出现以下结果
NAME                                      READY   STATUS    RESTARTS   AGE
calico-kube-controllers-7cc695686-pwz8w   1/1     Running   0          80s
calico-node-5vms8                         1/1     Running   0          80s
calico-node-7w2vc                         1/1     Running   0          80s
calico-typha-94dcb75c9-znrrq              1/1     Running   0          80s
csi-node-driver-9854p                     2/2     Running   0          80s
csi-node-driver-vfl9h                     2/2     Running   0          80s

部署CoreDNS

bash
cat > coredns.yaml << "EOF"
apiVersion: v1
kind: ServiceAccount
metadata:
  name: coredns
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
  name: system:coredns
rules:
  - apiGroups:
    - ""
    resources:
    - endpoints
    - services
    - pods
    - namespaces
    verbs:
    - list
    - watch
  - apiGroups:
    - discovery.k8s.io
    resources:
    - endpointslices
    verbs:
    - list
    - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations:
    rbac.authorization.kubernetes.io/autoupdate: "true"
  labels:
    kubernetes.io/bootstrapping: rbac-defaults
  name: system:coredns
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:coredns
subjects:
- kind: ServiceAccount
  name: coredns
  namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: coredns
  namespace: kube-system
data:
  Corefile: |
    .:53 {
        errors
        health {
          lameduck 5s
        }
        ready
        kubernetes cluster.local  in-addr.arpa ip6.arpa {
          fallthrough in-addr.arpa ip6.arpa
        }
        prometheus :9153
        forward . /etc/resolv.conf {
          max_concurrent 1000
        }
        cache 30
        loop
        reload
        loadbalance
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: coredns
  namespace: kube-system
  labels:
    k8s-app: kube-dns
    kubernetes.io/name: "CoreDNS"
spec:
  # replicas: not specified here:
  # 1. Default is 1.
  # 2. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
  selector:
    matchLabels:
      k8s-app: kube-dns
  template:
    metadata:
      labels:
        k8s-app: kube-dns
    spec:
      priorityClassName: system-cluster-critical
      serviceAccountName: coredns
      tolerations:
        - key: "CriticalAddonsOnly"
          operator: "Exists"
      nodeSelector:
        kubernetes.io/os: linux
      affinity:
         podAntiAffinity:
           preferredDuringSchedulingIgnoredDuringExecution:
           - weight: 100
             podAffinityTerm:
               labelSelector:
                 matchExpressions:
                   - key: k8s-app
                     operator: In
                     values: ["kube-dns"]
               topologyKey: kubernetes.io/hostname
      containers:
      - name: coredns
        image: coredns/coredns:1.10.1
        imagePullPolicy: IfNotPresent
        resources:
          limits:
            memory: 170Mi
          requests:
            cpu: 100m
            memory: 70Mi
        args: [ "-conf", "/etc/coredns/Corefile" ]
        volumeMounts:
        - name: config-volume
          mountPath: /etc/coredns
          readOnly: true
        ports:
        - containerPort: 53
          name: dns
          protocol: UDP
        - containerPort: 53
          name: dns-tcp
          protocol: TCP
        - containerPort: 9153
          name: metrics
          protocol: TCP
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - all
          readOnlyRootFilesystem: true
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
            scheme: HTTP
          initialDelaySeconds: 60
          timeoutSeconds: 5
          successThreshold: 1
          failureThreshold: 5
        readinessProbe:
          httpGet:
            path: /ready
            port: 8181
            scheme: HTTP
      dnsPolicy: Default
      volumes:
        - name: config-volume
          configMap:
            name: coredns
            items:
            - key: Corefile
              path: Corefile
---
apiVersion: v1
kind: Service
metadata:
  name: kube-dns
  namespace: kube-system
  annotations:
    prometheus.io/port: "9153"
    prometheus.io/scrape: "true"
  labels:
    k8s-app: kube-dns
    kubernetes.io/cluster-service: "true"
    kubernetes.io/name: "CoreDNS"
spec:
  selector:
    k8s-app: kube-dns
  clusterIP: 10.96.0.2
  ports:
  - name: dns
    port: 53
    protocol: UDP
  - name: dns-tcp
    port: 53
    protocol: TCP
  - name: metrics
    port: 9153
    protocol: TCP
 
EOF


kubectl apply -f coredns.yaml

验证集群可用性

查看各个节点

bash
[root@k8s-master01 ~]# kubectl get nodes
NAME           STATUS   ROLES    AGE   VERSION
k8s-master01   Ready    <none>   33m   v1.31.0
k8s-master02   Ready    <none>   33m   v1.31.0
k8s-master03   Ready    <none>   33m   v1.31.0
k8s-worker01   Ready    <none>   33m   v1.31.0
k8s-worker02   Ready    <none>   33m   v1.31.0

查看系统内各个资源运行情况

bash
[root@k8s-master01 ~]# kubectl get pod -A
NAMESPACE          NAME                                       READY   STATUS    RESTARTS   AGE
calico-apiserver   calico-apiserver-87b55f54c-r88g8           1/1     Running   0          12m
calico-apiserver   calico-apiserver-87b55f54c-vsq87           1/1     Running   0          12m
calico-system      calico-kube-controllers-569c6fc6c4-vv72m   1/1     Running   0          31m
calico-system      calico-node-c9ffb                          1/1     Running   0          31m
calico-system      calico-node-df57w                          1/1     Running   0          31m
calico-system      calico-node-fmxfw                          1/1     Running   0          31m
calico-system      calico-node-lh96h                          1/1     Running   0          31m
calico-system      calico-node-vhwbl                          1/1     Running   0          31m
calico-system      calico-typha-65d7bb9f55-4b22f              1/1     Running   0          31m
calico-system      calico-typha-65d7bb9f55-rt9wp              1/1     Running   0          31m
calico-system      calico-typha-65d7bb9f55-z959b              1/1     Running   0          31m
calico-system      csi-node-driver-bklrt                      2/2     Running   0          31m
calico-system      csi-node-driver-jc4nk                      2/2     Running   0          31m
calico-system      csi-node-driver-jnmj4                      2/2     Running   0          31m
calico-system      csi-node-driver-r865b                      2/2     Running   0          31m
calico-system      csi-node-driver-stjd7                      2/2     Running   0          31m
kube-system        coredns-5d44bf67b6-f8gqr                   1/1     Running   0          65s
tigera-operator    tigera-operator-55748b469f-x8fcl           1/1     Running   0          31m

查看网络是否可用

bash
[root@k8s-master01 ~]# dig -t -a www.baidu.com @10.96.0.2
;; Warning, ignoring invalid type -a

; <<>> DiG 9.18.21 <<>> -t -a www.baidu.com @10.96.0.2
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 16820
;; flags: qr rd ra; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 1232
; COOKIE: 548d065d6d7403e9 (echoed)
;; QUESTION SECTION:
;www.baidu.com.			IN	A

;; ANSWER SECTION:
www.baidu.com.		5	IN	A	39.156.70.239
www.baidu.com.		5	IN	A	39.156.70.46

;; Query time: 28 msec
;; SERVER: 10.96.0.2#53(10.96.0.2) (UDP)
;; WHEN: Sun Aug 10 17:10:00 CST 2025
;; MSG SIZE  rcvd: 112

启动一个资源

bash
cat > nginx.yaml  << "EOF"
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-web
spec:
  replicas: 2
  selector:
    matchLabels:
      name: nginx
  template:
    metadata:
      labels:
        name: nginx
    spec:
      containers:
        - name: nginx
          image: nginx:1.19.6
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
  name: nginx-service-nodeport
spec:
  ports:
    - port: 80
      targetPort: 80
      nodePort: 30001
      protocol: TCP
  type: NodePort
  selector:
    name: nginx

EOF
kubectl apply -f nginx.yaml

Kubernetes 高可用测试

API Server 故障转移

bash
# 在任意节点测试 VIP 的 API Server 可用性
curl -k https://192.168.148.190:6443/healthz

# 随机停止一个 master 节点的 API Server(如 master01)
systemctl stop kube-apiserver

# 再次测试 VIP(应自动切换到其他 master)
curl -k https://192.168.148.190:6443/healthz  # 观察响应时间变化

etcd 集群容错

bash
# 检查 etcd 集群状态(在任一 master 执行)
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--write-out=table \
--cacert=/etc/kubernetes/pki/ca.pem \
--cert=/etc/kubernetes/pki/etcd.pem \
--key=/etc/kubernetes/pki/etcd-key.pem \
--endpoints=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
endpoint health

# 模拟 etcd 节点故障(如停止 master03 的 etcd)
systemctl stop etcd

# 验证集群操作(应仍可正常工作)
kubectl create deployment nginx --image=nginx

Keepalived 主备切换

bash
# 查看当前 VIP 绑定(应在 ha-master 或 ha-backup)
ssh k8s-ha-master "ip addr show | grep 192.168.148.190"

# 手动停止主节点 keepalived
ssh k8s-ha-master "sudo systemctl stop keepalived"

# 检查 VIP 是否漂移到备节点(应 10 秒内完成)
ssh k8s-ha-backup "ip addr show | grep 192.168.148.190"

HAProxy 流量分发

bash
# 检查 HAProxy 后端状态
ssh k8s-ha-master "echo 'show stat' | sudo socat /var/run/haproxy.sock stdio"

# 模拟后端故障(如关闭 master02 的 API Server)
ssh k8s-master02 "sudo systemctl stop kube-apiserver"

# 观察 HAProxy 自动剔除故障节点
watch -n 1 'ssh k8s-ha-master "echo show stat | sudo socat /var/run/haproxy.sock stdio | grep k8s-master02"'