Kubernetes-高可用集群
此案例全部基于openEuler 24.03(LTS)操作系统操作
环境准备
主机准备
| 主机IP地址 | 主机名 | 主机配置(最少) | 存储(最少) | 主机角色 | 软件列表 |
|---|---|---|---|---|---|
| 192.168.148.191 | k8s-master01 | 2C4G | 40G | master | kube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce |
| 192.168.148.192 | k8s-master02 | 2C4G | 40G | master | kube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce |
| 192.168.148.193 | k8s-master03 | 2C4G | 40G | master | kube-apiserver、kube-controllermanager、kube-scheduler、etcd、kubelet、kube-proxy、docker-ce |
| 192.168.148.194 | k8s-worker01 | 1C2G | 40G | worker | kubelet、kube-proxy、docker-ce |
| 192.168.148.195 | k8s-worker02 | 1C2G | 40G | worker | kubelet、kube-proxy、docker-ce |
| 192.168.148.196 | k8s-ha-master | 1C1G | 20G | LB | haproxy、keepalived |
| 192.168.148.197 | k8s-ha-backup | 1C1G | 20G | LB | haproxy、keepalived |
| 192.168.148.190 | - | - | - | - | VIP |
Kubernetes 网络划分
| 网络类型 | 网段 | 用途说明 |
|---|---|---|
| Node网络 | 192.168.148.0/24 | 物理主机通信网络,承载节点间SSH、API Server访问等基础流量 |
| Service网络 | 10.96.0.0/16 | ClusterIP服务虚拟IP段,通过kube-proxy实现内部服务发现和负载均衡 |
| Pod网络 | 10.244.0.0/16 | 容器运行时网络空间,由CNI插件(如Calico/Flannel)分配,每个Pod获得独立IP地址 |
磁盘扩容
pvcreate /dev/sdb
vgextend openeuler /dev/sdb
lvextend -l +100%FREE /dev/mapper/openeuler-root
resize2fs /dev/mapper/openeuler-root配置主机名和hosts
# 设置主机名:对应主机设置对应的主机名
hostnamectl set-hostname k8s-master01 && exec bash
hostnamectl set-hostname k8s-master02 && exec bash
hostnamectl set-hostname k8s-master03 && exec bash
hostnamectl set-hostname k8s-worker01 && exec bash
hostnamectl set-hostname k8s-worker02 && exec bash
hostnamectl set-hostname k8s-ha-master && exec bash
hostnamectl set-hostname k8s-ha-backup && exec bash
# 设置主机hosts:所有k8s集群设置
cat >> /etc/hosts << EOF
192.168.148.191 k8s-master01
192.168.148.192 k8s-master02
192.168.148.193 k8s-master03
192.168.148.194 k8s-worker01
192.168.148.195 k8s-worker02
192.168.148.196 k8s-ha-master
192.168.148.197 k8s-ha-backup
EOF关闭防火墙和SELinux
在所有主机上执行包括k8s和ha
# 临时关闭selinux
setenforce 0
# 永久关闭(需要重启生效)
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
# 关闭防火墙
systemctl disable firewalld --now关闭swap
所有k8s集群主机执行
# 临时关闭
swapoff -a && sysctl -w vm.swappiness=0
# 永久关闭
sed -ri 's/.*swap.*/#&/' /etc/fstab修改句柄数 ulimit
所有主机执行包括k8s和ha
# 临时生效
ulimit -SHn 65535
# 永久生效
vi /etc/security/limits.conf
# 末尾添加如下内容
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimitedssh免密
所有主机执行包括k8s和ha,免密需要主机之间互信,所有需要都执行免密的操作
# 生成密钥对
ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N ""
# 安装sshpass
dnf -y install sshpass
# 将公钥上传到目标主机
for i in k8s-master01 k8s-master02 k8s-master03 k8s-worker01 k8s-worker02 k8s-ha-master k8s-ha-backup;do
sshpass -p "123456" ssh-copy-id -i ~/.ssh/id_rsa.pub -o StrictHostKeyChecking=no root@$i
done安装ipvs
所有主机执行
# 安装软件
dnf install ipvsadm ipset sysstat conntrack libseccomp -y
# 修改ipvs配置,加入以下内容,配置ipvsadm模块加载方式,主要设置开机自启
cat >> /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF
# 授权、运行、检查是否加载
chmod 755 /etc/sysconfig/modules/ipvs.modules && exec bash
/etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack内核优化
所有节点执行
# k8s.conf
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
net.ipv4.conf.all.route_localnet = 1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16768
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16768
EOF
# 开启sysctl.conf的ipv4转发功能
sed -i.bak 's/net.ipv4.ip_forward=0/net.ipv4.ip_forward=1/' /etc/sysctl.conf
# 内核生效
sysctl --system安装常用工具(可选)
所有节点执行
dnf install wget jq psmisc vim net-tools telnet lvm2 git lrzsz -y创建目录
所有节点执行
mkdir -p /etc/kubernetes/pki安装haproxy和keepalived
安装软件
只在k8s-ha-master和k8s-ha-backup上执行,配置VIP
dnf -y install haproxy keepalived
# 创建程序用户
useradd -Ms /sbin/nologin keepalivedhaproxy 配置
只在k8s-ha-master和k8s-ha-backup上执行
cat > /etc/haproxy/haproxy.cfg << "EOF"
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1:514 local0 info
stats timeout 30s
tune.ssl.default-dh-param 2048
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
stats enable
stats uri /monitor
stats auth admin:SecurePassword123!
frontend k8s-master
bind 0.0.0.0:6443
bind 127.0.0.1:6443
mode tcp
option tcplog
tcp-request inspect-delay 5s
tcp-request content accept if { req.ssl_hello_type 1 }
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
option tcp-smart-accept
balance roundrobin
server k8s-master01 192.168.148.191:6443 check inter 10s rise 2 fall 2 maxconn 250
server k8s-master02 192.168.148.192:6443 check inter 10s rise 2 fall 2 maxconn 250
server k8s-master03 192.168.148.193:6443 check inter 10s rise 2 fall 2 maxconn 250
EOFkeepalived 配置
k8s-ha-master
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived
global_defs {
router_id KUBERNETES_HA # 标识虚拟路由器组,主备相同
script_user keepalived
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
user keepalived
}
vrrp_instance VI_1 {
state MASTER # 主节点名
interface ens32
mcast_src_ip 192.168.148.196 # 当前节点IP
virtual_router_id 51 # 集群内统一
priority 100 # 范围 1-255,分数高于backup
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.148.190 # 设置VIP
}
track_script {
chk_apiserver
}
}
EOFk8s-ha-backup
cat >/etc/keepalived/keepalived.conf<<"EOF"
! Configuration File for keepalived
global_defs {
router_id KUBERNETES_HA # 标识虚拟路由器组,主备相同
script_user keepalived
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
user keepalived
}
vrrp_instance VI_1 {
state BACKUP # 备节点名
interface ens32
mcast_src_ip 192.168.148.197 # 当前节点IP
virtual_router_id 51 # 集群内统一
priority 99 # 范围 1-255,分数低于master
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.148.190 # 设置VIP
}
track_script {
chk_apiserver
}
}
EOF健康检测脚本
只在k8s-ha-master和k8s-ha-backup上执行
cat > /etc/keepalived/check_apiserver.sh <<"EOF"
#!/bin/bash
err=0
for k in $(seq 1 3); do
if ! systemctl is-active haproxy >/dev/null 2>&1; then
err=$((err + 1))
sleep 1
continue
else
err=0
break
fi
done
if [ "$err" -ge 3 ]; then
systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
chown keepalived:keepalived /etc/keepalived/check_apiserver.sh
chmod +x /etc/keepalived/check_apiserver.sh启动服务
只在k8s-ha-master和k8s-ha-backup上执行
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalivedK8s集群搭建
安装Docker
安装服务
所有k8s主机执行
# 注册阿里云repo
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 替换centos识别版本号
sed -i 's+$releasever+9+' /etc/yum.repos.d/docker-ce.repo
# 安装docker-ce和依赖
dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-pluginDocker-配置文件
cat > /etc/docker/daemon.json << EOF
{
"data-root":"/data/docker",
"registry-mirrors":[
"https://uynf11ba.mirror.aliyuncs.com",
"https://docker.m.daocloud.io",
"https://noohub.ru",
"https://huecker.io",
"https://dockerhub.timeweb.cloud"
],
"bip": "172.19.0.1/16",
"iptables": true,
"ip-masq": true,
"exec-opts":["native.cgroupdriver=systemd"],
"live-restore":true,
"ipv6": false
}
EOF
# 创建docker工作目录
mkdir /data/docker -p
# 启动docker
systemctl daemon-reload
systemctl enable docker --now安装cri-dockerd
CAUTION
Kubernetes 1.24+ 版本已彻底移除 dockershim,即使你安装了 Docker,也需要通过 cri-dockerd 适配器才能兼容,因此我们这里使用containerd 作为容器引擎。
所有k8s主机都安装
# 下载
wget https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.14/cri-dockerd-0.3.14-3.el7.x86_64.rpm
# 安装
dnf -y install ./cri-dockerd-0.3.14-3.el7.x86_64.rpm
#修改ExecStart参数 指向阿里云
sed -i 's,^ExecStart.*,& --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.9,' /usr/lib/systemd/system/cri-docker.service
#启动cri-docker
systemctl daemon-reload
systemctl enable cri-docker --now
systemctl status cri-docker证书工具
证书工具生成的证书在k8s-master01节点执行即可
下载证书工具
# 下载cfssl
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.4/cfssl_1.6.4_linux_amd64
chmod +x cfssl_1.6.4_linux_amd64
mv cfssl_1.6.4_linux_amd64 /usr/local/bin/cfssl
# 下载cfssljson
wget https://github.com/cloudflare/cfssl/releases/download/v1.6.4/cfssljson_1.6.4_linux_amd64
chmod +x cfssljson_1.6.4_linux_amd64
mv cfssljson_1.6.4_linux_amd64 /usr/local/bin/cfssljsonca根配置
mkdir -p /etc/kubernetes/pki
cat > /etc/kubernetes/pki/ca-config.json <<"EOF"
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"kubernetes": {
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
],
"expiry": "87600h"
}
}
}
}
EOFca签名请求
CSR是Certificate Signing Request的英文缩写,即证书签名请求文件
cat > /etc/kubernetes/pki/ca-csr.json <<"EOF"
{
"CN": "kubernetes",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Beijing",
"L": "Beijing",
"O": "kubemsb",
"OU": "CN"
}
],
"ca": {
"expiry": "87600h"
}
}
EOF- CN(Common Name):公用名(Common Name)必须填写,一般可以是网站域
- O(Organization):Organization(组织名)是必须填写的,如果申请的是OV、EV型证书,组织名称必须严格和企业在政府登记名称一致,一般需要和营业执照上的名称完全一致。不可以使用缩写或者商标。如果需要使用英文名称,需要有DUNS编码或者律师信证明。
- OU(Organization Unit):OU单位部门,这里一般没有太多限制,可以直接填写IT DEPT等皆可。
- C(City):City是指申请单位所在的城市。
- ST(State/Province):ST是指申请单位所在的省份。
- C(Country Name):C是指国家名称,这里用的是两位大写的国家代码,中国是CN。
生成证书
cd /etc/kubernetes/pki
cfssl gencert -initca ca-csr.json | cfssljson -bare caetcd高可用搭建
etcd文档
- etcd示例:https://etcd.io/docs/v3.4/demo/ 参照示例学习etcd使用
- etcd构建:https://etcd.io/docs/v3.4/dl-build/ 参照etcd-k8s集群量规划指南。大家参照这个标准建立集群
- etcd部署:https://etcd.io/docs/v3.4/op-guide/ 参照部署手册,学习etcd配置和集群部署
下载etcd
在k8s-master{1,2,3}节点所有的master节点安装
# 给所有master节点,发送etcd包准备部署etcd高可用
wget https://github.com/etcd-io/etcd/releases/download/v3.5.16/etcd-v3.5.16-linux-amd64.tar.gzetcd证书
https://etcd.io/docs/next/op-guide/hardware/#small-cluster 安装参考
在k8s-master01节点生成然后同步到其他master节点,这里需要修改master节点IP地址
cat > /etc/kubernetes/pki/etcd-csr.json <<"EOF"
{
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
},
"hosts": [
"127.0.0.1",
"k8s-master01",
"k8s-master02",
"k8s-master03",
"192.168.148.191",
"192.168.148.192",
"192.168.148.193"
],
"names": [
{
"C": "CN",
"L": "beijing",
"O": "etcd",
"ST": "beijing",
"OU": "System"
}
]
}
EOF
# 生成证书
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes etcd-csr.json | cfssljson -bare etcd
# 把生成的etcd证书,复制给其他master节点
# 另外两个master节点需要创建目录/etc/kubernetes/pki/
mkdir -p /etc/kubernetes/pki/
for i in k8s-master02 k8s-master03;
do
scp -r /etc/kubernetes/pki/* root@$i:/etc/kubernetes/pki
done安装etcd
tar -xf etcd-v3.5.16-linux-amd64.tar.gz
cp -p etcd-v3.5.16-linux-amd64/etcd* /usr/local/bin/
# 向其它master节点分发etcd软件
scp etcd-v3.5.16-linux-amd64/etcd* k8s-master02:/usr/local/bin/
scp etcd-v3.5.16-linux-amd64/etcd* k8s-master03:/usr/local/bin/etcd配置文件示例: https://etcd.io/docs/v3.4/op-guide/configuration/
etcd高可用安装示例: https://etcd.io/docs/v3.4/op-guide/clustering/
etcd配置信息
k8s-master01
在k8s-master01节点上执行,需要修改IP地址
mkdir -p /etc/etcd
mkdir -p /data/etcd
cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd1"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOFk8s-master02
在k8s-master02节点上执行
mkdir -p /etc/etcd
mkdir -p /data/etcd
cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd2"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOFk8s-master03
在k8s-master03节点上执行
mkdir -p /etc/etcd
mkdir -p /data/etcd
cat > /etc/etcd/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd3"
ETCD_DATA_DIR="/data/etcd"
ETCD_LISTEN_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_LISTEN_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379,http://127.0.0.1:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://$(hostname -I | awk '{print $1}'):2380"
ETCD_ADVERTISE_CLIENT_URLS="https://$(hostname -I | awk '{print $1}'):2379"
ETCD_INITIAL_CLUSTER="etcd1=https://192.168.148.191:2380,etcd2=https://192.168.148.192:2380,etcd3=https://192.168.148.193:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF字段说明
- ETCD_NAME:节点名称,集群中唯一
- ETCD_DATA_DIR:数据目录
- ETCD_LISTEN_PEER_URLS:集群通信监听地址
- ETCD_LISTEN_CLIENT_URLS:客户端访问监听地址
- ETCD_INITIAL_ADVERTISE_PEER_URLS:集群通告地址
- ETCD_ADVERTISE_CLIENT_URLS:客户端通告地址
- ETCD_INITIAL_CLUSTER:集群节点地址
- ETCD_INITIAL_CLUSTER_TOKEN:集群Token
- ETCD_INITIAL_CLUSTER_STATE:加入集群的当前状态,new是新集群,existing表示加入已有集群
配置service
在所有k8s-master节点执行
cat > /usr/lib/systemd/system/etcd.service <<"EOF"
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=-/etc/etcd/etcd.conf
WorkingDirectory=/data/etcd
ExecStart=/usr/local/bin/etcd \
--cert-file=/etc/kubernetes/pki/etcd.pem \
--key-file=/etc/kubernetes/pki/etcd-key.pem \
--trusted-ca-file=/etc/kubernetes/pki/ca.pem \
--peer-cert-file=/etc/kubernetes/pki//etcd.pem \
--peer-key-file=/etc/kubernetes/pki/etcd-key.pem \
--peer-trusted-ca-file=/etc/kubernetes/pki/ca.pem \
--peer-client-cert-auth \
--client-cert-auth
Restart=on-failure
RestartSec=5
StandardOutput=syslog
StandardError=syslog
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
# 加载&开机启动
systemctl daemon-reload
systemctl enable --now etcd
# 启动有问题,使用 journalctl -u 服务名排查
journalctl -u etcd测试etcd访问
# 验证etcd集群状态
etcdctl member list
各列的含义:
● ENDPOINT: etcd 集群中的节点地址。
● HEALTH: 节点的健康状况,true 表示节点正常,false 表示节点不健康。
● TOOK: 该健康检查请求所花的时间。
● ERROR: 如果某个节点不健康,这里会显示相关的错误信息。
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--write-out=table \
--cacert=/etc/kubernetes/pki/ca.pem \
--cert=/etc/kubernetes/pki/etcd.pem \
--key=/etc/kubernetes/pki/etcd-key.pem \
--endpoints=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
endpoint healthk8s安装包下载
K8s离线安装包
https://github.com/kubernetes/kubernetes 找到changelog对应版本
可以在k8s-master01下载
wget https://dl.k8s.io/v1.31.0/kubernetes-server-linux-amd64.tar.gz
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
scp kubernetes-server-linux-amd64.tar.gz root@$i:/root/
donemaster节点准备
所有master节点解压kubelet,kubectl等到 /usr/local/bin。
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}worker节点准备
master需要全部组件,worker节点只需要 /usr/local/bin kubelet、kube-proxy
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,-proxy}部署 apiserver
在k8s-master01节点执行然后同步到其它master节点
创建csr证书
# 10.96.0.1 为service网段。可以自定义 如: 66.66.0.1
# 192.168.148.190 是高可用VIP
# 说明:
# 如果 hosts 字段不为空则需要指定授权使用该证书的 IP(含VIP) 或域名列表。由于该证书被 集群使
# 用,需要将节点的IP都填上,为了方便后期扩容可以多写几个预留的IP。同时还需要填写 service 网络的
# 首个IP(一般是 kube-apiserver 指定的 service-cluster-ip-range 网段的第一个IP,如10.96.0.1)。
cat > /etc/kubernetes/pki/kube-apiserver-csr.json << "EOF"
{
"CN": "kube-apiserver",
"hosts": [
"10.96.0.1",
"127.0.0.1",
"192.168.148.190",
"192.168.148.191",
"192.168.148.192",
"192.168.148.193",
"192.168.148.194",
"192.168.148.195",
"kubernetes",
"kubernetes.default",
"kubernetes.default.svc",
"kubernetes.default.svc.cluster",
"kubernetes.default.svc.cluster.local"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "BeiJing",
"ST": "BeiJing",
"O": "Kubernetes",
"OU": "Kubernetes"
}
]
}
EOF
# 生成证书
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-apiserver-csr.json | cfssljson -bare kube-apiserver
# 同步到其它master节点
for i in k8s-master02 k8s-master03;do
scp -r /etc/kubernetes/pki/* root@$i:/etc/kubernetes/pki
done生成token
在k8s-master01节点执行然后同步到其它master节点
# 说明:
# 创建TLS机制所需TOKEN TLS Bootstraping:Master apiserver启用TLS认证后,Node节点kubelet和kube-proxy与kube-apiserver进行通信,必须使用CA签发的有效证书才可以,当Node节点很多时,这种客户端证书颁发需要大量工作,同样也会增加集群扩展复杂度。为了简化流程,Kubernetes引入了TLS bootstraping机制来自动颁发客户端证书,kubelet会以一个低权限用户自动向apiserver申请证书,kubelet的证书由apiserver动态签署。所以强烈建议在Node上使用这种方式,目前主要用于kubelet,kube-proxy还是由我们统一颁发一个证书。
cat > /etc/kubernetes/token.csv << EOF
$(head -c 16 /dev/urandom | od -An -t x | tr -d ' '),kubelet-bootstrap,10001,"system:kubelet-bootstrap"
EOF
# 同步到其它master节点
for i in k8s-master02 k8s-master03;
do
scp -r /etc/kubernetes/token.csv root@$i:/etc/kubernetes
done配置apiserver服务
- 所有Master节点创建kube-apiserver.service ,
- 以下文档使用的k8s service网段为10.96.0.0/16 ,该网段不能和宿主机的网段、Pod网段的重复
- 特别注意:docker的网桥默认为 172.17.0.1/16 。不要使用这个网段
在所有k8s-master节点执行
cat > /etc/kubernetes/kube-apiserver.conf << EOF
KUBE_APISERVER_OPTS="--enable-admission-plugins=NamespaceLifecycle,NodeRestriction,LimitRanger,ServiceAccount,DefaultStorageClass,ResourceQuota \
--anonymous-auth=false \
--bind-address=$(hostname -I | awk '{print $1}') \
--advertise-address=$(hostname -I | awk '{print $1}') \
--authorization-mode=Node,RBAC \
--runtime-config=api/all=true \
--enable-bootstrap-token-auth \
--service-cluster-ip-range=10.96.0.0/16 \
--token-auth-file=/etc/kubernetes/token.csv \
--service-node-port-range=30000-32767 \
--tls-cert-file=/etc/kubernetes/pki/kube-apiserver.pem \
--tls-private-key-file=/etc/kubernetes/pki/kube-apiserver-key.pem \
--client-ca-file=/etc/kubernetes/pki/ca.pem \
--kubelet-client-certificate=/etc/kubernetes/pki/kube-apiserver.pem \
--kubelet-client-key=/etc/kubernetes/pki/kube-apiserver-key.pem \
--service-account-key-file=/etc/kubernetes/pki/ca-key.pem \
--service-account-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
--service-account-issuer=api \
--etcd-cafile=/etc/kubernetes/pki/ca.pem \
--etcd-certfile=/etc/kubernetes/pki/etcd.pem \
--etcd-keyfile=/etc/kubernetes/pki/etcd-key.pem \
--etcd-servers=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
--allow-privileged=true \
--apiserver-count=3 \
--audit-log-maxage=30 \
--audit-log-maxbackup=3 \
--audit-log-maxsize=100 \
--audit-log-path=/var/log/kube-apiserver-audit.log \
--event-ttl=1h \
--v=4"
EOF
# 创建service管理文件
cat > /usr/lib/systemd/system/kube-apiserver.service << "EOF"
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=etcd.service
Wants=etcd.service
[Service]
EnvironmentFile=-/etc/kubernetes/kube-apiserver.conf
ExecStart=/usr/local/bin/kube-apiserver $KUBE_APISERVER_OPTS
Restart=on-failure
RestartSec=5
Type=notify
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
# 启动apiserver服务
systemctl daemon-reload
systemctl enable --now kube-apiserver
# 查看状态
systemctl status kube-apiserver
# 验证访问(返回json格式code:401是正常的)
curl --insecure https://k8s-master01:6443部署 kubectl
这个操作在k8s-master01节点执行即可
创建csr证书
cat > /etc/kubernetes/pki/admin-csr.json << "EOF"
{
"CN": "admin",
"hosts": [],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Beijing",
"L": "Beijing",
"O": "system:masters",
"OU": "system"
}
]
}
EOF
# 生成证书
cd /etc/kubernetes/pki/
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes admin-csr.json | cfssljson -bare admin生成配置文件
这里需要修改VIP地址
cd /etc/kubernetes/pki
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=https://192.168.148.190:6443 \
--kubeconfig=kube.config
kubectl config set-credentials admin \
--client-certificate=admin.pem \
--client-key=admin-key.pem \
--embed-certs=true \
--kubeconfig=kube.config
kubectl config set-context kubernetes --cluster=kubernetes --user=admin --kubeconfig=kube.config
kubectl config use-context kubernetes --kubeconfig=kube.config进行角色绑定
mkdir ~/.kube
cp kube.config ~/.kube/config
kubectl create clusterrolebinding kube-apiserver:kubelet-apis --clusterrole=system:kubelet-api-admin --user kubernetes --kubeconfig=/root/.kube/config查看集群状态
export KUBECONFIG=$HOME/.kube/config
kubectl cluster-info
# 这里应该可以看到etcd-0的STATUS:Healthy
kubectl get componentstatuses
kubectl get all --all-namespaces同步到其他master节点上
# k8s-master02,k8s-master03节点创建目录
mkdir ~/.kube
scp /root/.kube/config k8s-master02:/root/.kube/config
scp /root/.kube/config k8s-master03:/root/.kube/config
# 查看集群状态
kubectl cluster-info部署 controller-manage
这个操作在k8s-master01节点执行即可
创建csr证书
cat > /etc/kubernetes/pki/kube-controller-manager-csr.json << "EOF"
{
"CN": "system:kube-controller-manager",
"key": {
"algo": "rsa",
"size": 2048
},
"hosts": [
"127.0.0.1",
"192.168.148.191",
"192.168.148.192",
"192.168.148.193"
],
"names": [
{
"C": "CN",
"ST": "Beijing",
"L": "Beijing",
"O": "system:kube-controller-manager",
"OU": "system"
}
]
}
EOF
#说明:hosts 列表包含所有 kube-controller-manager 节点 IP签发证书
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager创建kubeconfig
cd /etc/kubernetes/pki
# 1. 设置集群配置
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=https://192.168.148.190:6443 \
--kubeconfig=kube-controller-manager.kubeconfig
# 2. 设置用户凭证
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=kube-controller-manager.pem \
--client-key=kube-controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=kube-controller-manager.kubeconfig
# 3. 设置上下文
kubectl config set-context system:kube-controller-manager \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=kube-controller-manager.kubeconfig
# 4. 使用上下文
kubectl config use-context system:kube-controller-manager \
--kubeconfig=kube-controller-manager.kubeconfig
# 5. 移动配置文件
mv kube-controller-manager.kubeconfig /etc/kubernetes/生成controller-manager配置文件
cat > /etc/kubernetes/kube-controller-manager.conf << "EOF"
KUBE_CONTROLLER_MANAGER_OPTS= \
--secure-port=10257 \
--bind-address=0.0.0.0 \
--kubeconfig=/etc/kubernetes/kube-controller-manager.kubeconfig \
--service-cluster-ip-range=10.96.0.0/16 \
--cluster-name=kubernetes \
--cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \
--cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
--allocate-node-cidrs=true \
--cluster-cidr=10.244.0.0/16 \
--root-ca-file=/etc/kubernetes/pki/ca.pem \
--service-account-private-key-file=/etc/kubernetes/pki/ca-key.pem \
--leader-elect=true \
--feature-gates=RotateKubeletServerCertificate=true \
--controllers=*,bootstrapsigner,tokencleaner \
--tls-cert-file=/etc/kubernetes/pki/kube-controller-manager.pem \
--tls-private-key-file=/etc/kubernetes/pki/kube-controller-manager-key.pem \
--use-service-account-credentials=true \
--v=2"
EOF创建service管理文件
所有master节点都执行
cat > /usr/lib/systemd/system/kube-controller-manager.service << "EOF"
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=-/etc/kubernetes/kube-controller-manager.conf
ExecStart=/usr/local/bin/kube-controller-manager $KUBE_CONTROLLER_MANAGER_OPTS
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF同步到其他master节点
scp -pr /etc/kubernetes/pki/* k8s-master02:/etc/kubernetes/pki/
scp -pr /etc/kubernetes/pki/* k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/kube-controller-manager.kubeconfig /etc/kubernetes/kube-controller-manager.conf k8s-master02:/etc/kubernetes/
scp /etc/kubernetes/kube-controller-manager.kubeconfig /etc/kubernetes/kube-controller-manager.conf k8s-master03:/etc/kubernetes启动
systemctl daemon-reload
systemctl enable --now kube-controller-manager
systemctl status kube-controller-manager部署kube-schedulers
这个操作在k8s-master01节点执行即可
创建csr证书
cat > /etc/kubernetes/pki/kube-scheduler-csr.json << "EOF"
{
"CN": "system:kube-scheduler",
"hosts": [
"127.0.0.1",
"192.168.148.191",
"192.168.148.192",
"192.168.148.193"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Beijing",
"L": "Beijing",
"O": "system:kube-scheduler",
"OU": "system"
}
]
}
EOF签发证书
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-scheduler-csr.json | cfssljson -bare kube-scheduler生成配置
cd /etc/kubernetes/pki
# 设置集群配置(注意文件名统一为 kube-scheduler.kubeconfig)
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=https://192.168.148.190:6443 \
--kubeconfig=kube-scheduler.kubeconfig
# 设置用户凭证(修正文件名和路径)
kubectl config set-credentials system:kube-scheduler \
--client-certificate=/etc/kubernetes/pki/kube-scheduler.pem \
--client-key=/etc/kubernetes/pki/kube-scheduler-key.pem \
--embed-certs=true \
--kubeconfig=kube-scheduler.kubeconfig
# 设置上下文
kubectl config set-context system:kube-scheduler \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=kube-scheduler.kubeconfig
# 使用上下文
kubectl config use-context system:kube-scheduler \
--kubeconfig=kube-scheduler.kubeconfig
cat > /etc/kubernetes/kube-scheduler.conf << "EOF"
KUBE_SCHEDULER_OPTS= \
--kubeconfig=/etc/kubernetes/kube-scheduler.kubeconfig \
--leader-elect=true \
--v=2"
EOF
mv kube-scheduler.kubeconfig /etc/kubernetes/创建sevice管理文件
所有master节点都执行
cat > /usr/lib/systemd/system/kube-scheduler.service << "EOF"
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
[Service]
EnvironmentFile=-/etc/kubernetes/kube-scheduler.conf
ExecStart=/usr/local/bin/kube-scheduler $KUBE_SCHEDULER_OPTS
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF同步文件至集群master节点
scp /etc/kubernetes/pki/kube-scheduler*.pem k8s-master02:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/kube-scheduler*.pem k8s-master03:/etc/kubernetes/pki/
scp /etc/kubernetes/kube-scheduler.kubeconfig /etc/kubernetes/kube-scheduler.conf k8s-master02:/etc/kubernetes/
scp /etc/kubernetes/kube-scheduler.kubeconfig /etc/kubernetes/kube-scheduler.conf k8s-master03:/etc/kubernetes/启动服务
systemctl daemon-reload
systemctl enable --now kube-scheduler
systemctl status kube-scheduler部署kubelet
这个操作在k8s-master01节点执行即可
创建kubeconfig
cd /etc/kubernetes/pki
# 获取 bootstrap token
BOOTSTRAP_TOKEN=$(awk -F "," '{print $1}' /etc/kubernetes/token.csv)
# 设置集群配置(注意文件名统一为 kubelet-bootstrap.kubeconfig)
kubectl config set-cluster kubernetes \
--certificate-authority=ca.pem \
--embed-certs=true \
--server=https://192.168.148.190:6443 \
--kubeconfig=kubelet-bootstrap.kubeconfig
# 设置用户凭证(使用 bootstrap token)
kubectl config set-credentials kubelet-bootstrap \
--token=${BOOTSTRAP_TOKEN} \
--kubeconfig=kubelet-bootstrap.kubeconfig
# 设置上下文
kubectl config set-context default \
--cluster=kubernetes \
--user=kubelet-bootstrap \
--kubeconfig=kubelet-bootstrap.kubeconfig
# 使用上下文
kubectl config use-context default \
--kubeconfig=kubelet-bootstrap.kubeconfig
kubectl create clusterrolebinding cluster-system-anonymous \
--clusterrole=cluster-admin \
--user=kubelet-bootstrap
kubectl create clusterrolebinding kubelet-bootstrap \
--clusterrole=system:node-bootstrapper \
--user=kubelet-bootstrap \
--kubeconfig=kubelet-bootstrap.kubeconfig
kubectl describe clusterrolebinding cluster-system-anonymous
kubectl describe clusterrolebinding kubelet-bootstrap
cp kubelet-bootstrap.kubeconfig /etc/kubernetes/创建kubelet配置文件
在所有k8s节点执行
cat > /etc/kubernetes/kubelet.json << EOF
{
"kind": "KubeletConfiguration",
"apiVersion": "kubelet.config.k8s.io/v1beta1",
"authentication": {
"x509": {
"clientCAFile": "/etc/kubernetes/pki/ca.pem"
},
"webhook": {
"enabled": true,
"cacheTTL": "2m0s"
},
"anonymous": {
"enabled": false
}
},
"authorization": {
"mode": "Webhook",
"webhook": {
"cacheAuthorizedTTL": "5m0s",
"cacheUnauthorizedTTL": "30s"
}
},
"address": "$(hostname -I | awk '{print $1}')",
"port": 10250,
"readOnlyPort": 10255,
"cgroupDriver": "systemd",
"hairpinMode": "promiscuous-bridge",
"serializeImagePulls": false,
"clusterDomain": "cluster.local.",
"clusterDNS": ["10.96.0.2"]
}
EOF
# 说明:kubelet.json中address需要修改为当前主机IP地址。创建service管理文件
cat > /usr/lib/systemd/system/kubelet.service << "EOF"
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/kubernetes/kubernetes
After=docker.service
Requires=docker.service
[Service]
WorkingDirectory=/var/lib/kubelet
ExecStart=/usr/local/bin/kubelet \
--bootstrap-kubeconfig=/etc/kubernetes/kubelet-bootstrap.kubeconfig \
--cert-dir=/etc/kubernetes/pki \
--kubeconfig=/etc/kubernetes/kubelet.kubeconfig \
--config=/etc/kubernetes/kubelet.json \
--rotate-certificates \
--pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.9 \
--container-runtime-endpoint=unix:///run/cri-dockerd.sock \
--v=2
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF同步到所有节点上
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
scp /etc/kubernetes/kubelet-bootstrap.kubeconfig $i:/etc/kubernetes/
done
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
scp -r /etc/kubernetes/pki/* root@$i:/etc/kubernetes/pki
done创建目录及启动服务
在所有k8s节点执行
mkdir -p /var/lib/kubelet
mkdir -p /var/log/kubernetes
systemctl daemon-reload
systemctl enable --now kubelet
systemctl status kubelet注:如果kubelt报错找不到节点,请删除kubelet旧的证书重启kubelet。
cd /etc/kubernetes/pki
rm -rf /etc/kubernetes/pki/kubelet-client-*.pem kubelet-client-current.pem kubelet.crt kubelet.key
systemctl restart kubelet部署kube-proxy
创建 csr 证书
cat > /etc/kubernetes/pki/kube-proxy-csr.json << "EOF"
{
"CN": "system:kube-proxy",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "Beijing",
"L": "Beijing",
"O": "kubemsb",
"OU": "CN"
}
]
}
EOF签发证书
cd /etc/kubernetes/pki
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy创建kubeconfig文件
注意修改VIP地址
cd /etc/kubernetes/pki
kubectl config set-cluster kubernetes --certificate-authority=ca.pem --embed-certs=true --server=https://192.168.148.190:6443 --kubeconfig=kube-proxy.kubeconfig
kubectl config set-credentials kube-proxy --client-certificate=kube-proxy.pem --client-key=kube-proxy-key.pem --embed-certs=true --kubeconfig=kube-proxy.kubeconfig
kubectl config set-context default --cluster=kubernetes --user=kube-proxy --kubeconfig=kube-proxy.kubeconfig
kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig
mv kube-proxy.kubeconfig /etc/kubernetes/创建服务配置文件
在所有k8s节点执行
cat > /etc/kubernetes/kube-proxy.yaml << EOF
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: $(hostname -I | awk '{print $1}')
clientConnection:
kubeconfig: /etc/kubernetes/kube-proxy.kubeconfig
clusterCIDR: 10.244.0.0/16
healthzBindAddress: $(hostname -I | awk '{print $1}'):10256
kind: KubeProxyConfiguration
metricsBindAddress: $(hostname -I | awk '{print $1}'):10249
mode: "ipvs"
EOF
# 说明:修改kube-proxy.yaml中IP地址为当前主机IP.创建service管理文件
在所有k8s节点执行
cat > /usr/lib/systemd/system/kube-proxy.service << "EOF"
[Unit]
Description=Kubernetes Kube-Proxy Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
WorkingDirectory=/var/lib/kube-proxy
ExecStart=/usr/local/bin/kube-proxy \
--config=/etc/kubernetes/kube-proxy.yaml \
--v=2
Restart=on-failure
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF同步文件到集群工作节点主机
for i in k8s-master02 k8s-master03 k8s-worker01 k8s-worker02;do
scp /etc/kubernetes/kube-proxy.kubeconfig /etc/kubernetes/kube-proxy.yaml $i:/etc/kubernetes/
done#### 服务启动
mkdir -p /var/lib/kube-proxy
systemctl daemon-reload
systemctl enable --now kube-proxy
systemctl status kube-proxy安装网络组件calico
在k8s-master01执行即可
下载
# tigera-operator
wget https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/tigera-operator.yaml
# custom-resources
wget https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/custom-resources.yaml修改文件
# 修改文件第13行,修改为使用kubeadm init ----pod-network-cidr对应的IP地址段
vim custom-resources.yaml
......
11 ipPools:
12 - blockSize: 26
13 cidr: 10.244.0.0/16
14 encapsulation: VXLANCrossSubnet
......应用文件
# 应用资源清单文件
kubectl create -f tigera-operator.yaml
kubectl create -f custom-resources.yaml
# 监视calico-sysem命名空间中pod运行情况
watch -n 1 kubectl get pods -n calico-system
kubectl get pods -n calico-system
# 正常会出现以下结果
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-7cc695686-pwz8w 1/1 Running 0 80s
calico-node-5vms8 1/1 Running 0 80s
calico-node-7w2vc 1/1 Running 0 80s
calico-typha-94dcb75c9-znrrq 1/1 Running 0 80s
csi-node-driver-9854p 2/2 Running 0 80s
csi-node-driver-vfl9h 2/2 Running 0 80s部署CoreDNS
cat > coredns.yaml << "EOF"
apiVersion: v1
kind: ServiceAccount
metadata:
name: coredns
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
kubernetes.io/bootstrapping: rbac-defaults
name: system:coredns
rules:
- apiGroups:
- ""
resources:
- endpoints
- services
- pods
- namespaces
verbs:
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- endpointslices
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
labels:
kubernetes.io/bootstrapping: rbac-defaults
name: system:coredns
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:coredns
subjects:
- kind: ServiceAccount
name: coredns
namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
fallthrough in-addr.arpa ip6.arpa
}
prometheus :9153
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30
loop
reload
loadbalance
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: "CoreDNS"
spec:
# replicas: not specified here:
# 1. Default is 1.
# 2. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
selector:
matchLabels:
k8s-app: kube-dns
template:
metadata:
labels:
k8s-app: kube-dns
spec:
priorityClassName: system-cluster-critical
serviceAccountName: coredns
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"
nodeSelector:
kubernetes.io/os: linux
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: k8s-app
operator: In
values: ["kube-dns"]
topologyKey: kubernetes.io/hostname
containers:
- name: coredns
image: coredns/coredns:1.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
memory: 170Mi
requests:
cpu: 100m
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
readOnly: true
ports:
- containerPort: 53
name: dns
protocol: UDP
- containerPort: 53
name: dns-tcp
protocol: TCP
- containerPort: 9153
name: metrics
protocol: TCP
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- NET_BIND_SERVICE
drop:
- all
readOnlyRootFilesystem: true
livenessProbe:
httpGet:
path: /health
port: 8080
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
readinessProbe:
httpGet:
path: /ready
port: 8181
scheme: HTTP
dnsPolicy: Default
volumes:
- name: config-volume
configMap:
name: coredns
items:
- key: Corefile
path: Corefile
---
apiVersion: v1
kind: Service
metadata:
name: kube-dns
namespace: kube-system
annotations:
prometheus.io/port: "9153"
prometheus.io/scrape: "true"
labels:
k8s-app: kube-dns
kubernetes.io/cluster-service: "true"
kubernetes.io/name: "CoreDNS"
spec:
selector:
k8s-app: kube-dns
clusterIP: 10.96.0.2
ports:
- name: dns
port: 53
protocol: UDP
- name: dns-tcp
port: 53
protocol: TCP
- name: metrics
port: 9153
protocol: TCP
EOF
kubectl apply -f coredns.yaml验证集群可用性
查看各个节点
[root@k8s-master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready <none> 33m v1.31.0
k8s-master02 Ready <none> 33m v1.31.0
k8s-master03 Ready <none> 33m v1.31.0
k8s-worker01 Ready <none> 33m v1.31.0
k8s-worker02 Ready <none> 33m v1.31.0查看系统内各个资源运行情况
[root@k8s-master01 ~]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
calico-apiserver calico-apiserver-87b55f54c-r88g8 1/1 Running 0 12m
calico-apiserver calico-apiserver-87b55f54c-vsq87 1/1 Running 0 12m
calico-system calico-kube-controllers-569c6fc6c4-vv72m 1/1 Running 0 31m
calico-system calico-node-c9ffb 1/1 Running 0 31m
calico-system calico-node-df57w 1/1 Running 0 31m
calico-system calico-node-fmxfw 1/1 Running 0 31m
calico-system calico-node-lh96h 1/1 Running 0 31m
calico-system calico-node-vhwbl 1/1 Running 0 31m
calico-system calico-typha-65d7bb9f55-4b22f 1/1 Running 0 31m
calico-system calico-typha-65d7bb9f55-rt9wp 1/1 Running 0 31m
calico-system calico-typha-65d7bb9f55-z959b 1/1 Running 0 31m
calico-system csi-node-driver-bklrt 2/2 Running 0 31m
calico-system csi-node-driver-jc4nk 2/2 Running 0 31m
calico-system csi-node-driver-jnmj4 2/2 Running 0 31m
calico-system csi-node-driver-r865b 2/2 Running 0 31m
calico-system csi-node-driver-stjd7 2/2 Running 0 31m
kube-system coredns-5d44bf67b6-f8gqr 1/1 Running 0 65s
tigera-operator tigera-operator-55748b469f-x8fcl 1/1 Running 0 31m查看网络是否可用
[root@k8s-master01 ~]# dig -t -a www.baidu.com @10.96.0.2
;; Warning, ignoring invalid type -a
; <<>> DiG 9.18.21 <<>> -t -a www.baidu.com @10.96.0.2
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 16820
;; flags: qr rd ra; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 1
;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 1232
; COOKIE: 548d065d6d7403e9 (echoed)
;; QUESTION SECTION:
;www.baidu.com. IN A
;; ANSWER SECTION:
www.baidu.com. 5 IN A 39.156.70.239
www.baidu.com. 5 IN A 39.156.70.46
;; Query time: 28 msec
;; SERVER: 10.96.0.2#53(10.96.0.2) (UDP)
;; WHEN: Sun Aug 10 17:10:00 CST 2025
;; MSG SIZE rcvd: 112启动一个资源
cat > nginx.yaml << "EOF"
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-web
spec:
replicas: 2
selector:
matchLabels:
name: nginx
template:
metadata:
labels:
name: nginx
spec:
containers:
- name: nginx
image: nginx:1.19.6
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: nginx-service-nodeport
spec:
ports:
- port: 80
targetPort: 80
nodePort: 30001
protocol: TCP
type: NodePort
selector:
name: nginx
EOF
kubectl apply -f nginx.yamlKubernetes 高可用测试
API Server 故障转移
# 在任意节点测试 VIP 的 API Server 可用性
curl -k https://192.168.148.190:6443/healthz
# 随机停止一个 master 节点的 API Server(如 master01)
systemctl stop kube-apiserver
# 再次测试 VIP(应自动切换到其他 master)
curl -k https://192.168.148.190:6443/healthz # 观察响应时间变化etcd 集群容错
# 检查 etcd 集群状态(在任一 master 执行)
ETCDCTL_API=3 /usr/local/bin/etcdctl \
--write-out=table \
--cacert=/etc/kubernetes/pki/ca.pem \
--cert=/etc/kubernetes/pki/etcd.pem \
--key=/etc/kubernetes/pki/etcd-key.pem \
--endpoints=https://192.168.148.191:2379,https://192.168.148.192:2379,https://192.168.148.193:2379 \
endpoint health
# 模拟 etcd 节点故障(如停止 master03 的 etcd)
systemctl stop etcd
# 验证集群操作(应仍可正常工作)
kubectl create deployment nginx --image=nginxKeepalived 主备切换
# 查看当前 VIP 绑定(应在 ha-master 或 ha-backup)
ssh k8s-ha-master "ip addr show | grep 192.168.148.190"
# 手动停止主节点 keepalived
ssh k8s-ha-master "sudo systemctl stop keepalived"
# 检查 VIP 是否漂移到备节点(应 10 秒内完成)
ssh k8s-ha-backup "ip addr show | grep 192.168.148.190"HAProxy 流量分发
# 检查 HAProxy 后端状态
ssh k8s-ha-master "echo 'show stat' | sudo socat /var/run/haproxy.sock stdio"
# 模拟后端故障(如关闭 master02 的 API Server)
ssh k8s-master02 "sudo systemctl stop kube-apiserver"
# 观察 HAProxy 自动剔除故障节点
watch -n 1 'ssh k8s-ha-master "echo show stat | sudo socat /var/run/haproxy.sock stdio | grep k8s-master02"'