注:本文基于k8s集群v1.16.2版本。
k8s真是一个复杂的东西,每一次安装都可能出现不一样的问题,本文特记录下来彤哥安装过程中出现的一些问题及解决方案。
可以到阿里云、腾讯云上购买按需付费,用完释放
此版本验证通过,其它版本不确定
安装后的拓扑结构为一个master节点,两个worker节点。
# 在 master 节点和 worker 节点都要执行
cat /etc/redhat-release
# 此处 hostname 的输出将会是该机器在 Kubernetes 集群中的节点名字
# 不能使用 localhost 作为节点的名字
hostname
# 请使用 lscpu 命令,核对 CPU 信息
# Architecture: x86_64 本安装文档不支持 arm 架构
# CPU(s): 2 CPU 内核数量不能低于 2
lscpu
修改 hostname
如果您需要修改 hostname,可执行如下指令:
# 修改 hostname
hostnamectl set-hostname [your-new-host-name]
# 查看修改结果
hostnamectl status
# 设置 hostname 解析
echo "127.0.0.1 $(hostname)" >> /etc/hosts
使用 root 身份在所有节点执行如下代码,以安装软件:
手动执行以下代码,或保存为sh脚本执行:
#!/bin/bash
# 在 master 节点和 worker 节点都要执行
# 安装 docker
# 参考文档如下
# https://docs.docker.com/install/linux/docker-ce/centos/
# https://docs.docker.com/install/linux/linux-postinstall/
# 卸载旧版本
yum remove -y docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-selinux \
docker-engine-selinux \
docker-engine
# 设置 yum repository
yum install -y yum-utils \
device-mapper-persistent-data \
lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 安装并启动 docker
yum install -y docker-ce-18.09.7 docker-ce-cli-18.09.7 containerd.io
systemctl enable docker
systemctl start docker
# 安装 nfs-utils
# 必须先安装 nfs-utils 才能挂载 nfs 网络存储
yum install -y nfs-utils
yum install -y wget
# 关闭 防火墙
systemctl stop firewalld
systemctl disable firewalld
# 关闭 SeLinux
setenforce 0
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
# 关闭 swap
swapoff -a
yes | cp /etc/fstab /etc/fstab_bak
cat /etc/fstab_bak |grep -v swap > /etc/fstab
# 修改 /etc/sysctl.conf
# 如果有配置,则修改
sed -i "s#^net.ipv4.ip_forward.*#net.ipv4.ip_forward=1#g" /etc/sysctl.conf
sed -i "s#^net.bridge.bridge-nf-call-ip6tables.*#net.bridge.bridge-nf-call-ip6tables=1#g" /etc/sysctl.conf
sed -i "s#^net.bridge.bridge-nf-call-iptables.*#net.bridge.bridge-nf-call-iptables=1#g" /etc/sysctl.conf
# 可能没有,追加
echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
echo "net.bridge.bridge-nf-call-ip6tables = 1" >> /etc/sysctl.conf
echo "net.bridge.bridge-nf-call-iptables = 1" >> /etc/sysctl.conf
# 执行命令以应用
sysctl -p
# 配置K8S的yum源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
# 卸载旧版本
yum remove -y kubelet kubeadm kubectl
# 安装kubelet、kubeadm、kubectl
yum install -y kubelet-1.16.2 kubeadm-1.16.2 kubectl-1.16.2
# 修改docker Cgroup Driver为systemd
# # 将/usr/lib/systemd/system/docker.service文件中的这一行 ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
# # 修改为 ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock --exec-opt native.cgroupdriver=systemd
# 如果不修改,在添加 worker 节点时可能会碰到如下错误
# [WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd".
# Please follow the guide at https://kubernetes.io/docs/setup/cri/
sed -i "s#^ExecStart=/usr/bin/dockerd.*#ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock --exec-opt native.cgroupdriver=systemd#g" /usr/lib/systemd/system/docker.service
# 设置 docker 镜像,提高 docker 镜像下载速度和稳定性
# 如果您访问 https://hub.docker.io 速度非常稳定,亦可以跳过这个步骤
curl -sSL https://get.daocloud.io/daotools/set_mirror.sh | sh -s http://f1361db2.m.daocloud.io
# 重启 docker,并启动 kubelet
systemctl daemon-reload
systemctl restart docker
systemctl enable kubelet && systemctl start kubelet
docker version
注意:上面的脚本需要在所有节点上执行。
关于初始化时用到的环境变量:
# 只在 master 节点执行
# 替换 x.x.x.x 为 master 节点实际 IP(请使用内网 IP)
# export 命令只在当前 shell 会话中有效,开启新的 shell 窗口后,如果要继续安装过程,请重新执行此处的 export 命令
export MASTER_IP=10.202.12.21
# 替换 apiserver.demo 为 您想要的 dnsName
export APISERVER_NAME=apiserver.demo
# Kubernetes 容器组所在的网段,该网段安装完成后,由 kubernetes 创建,事先并不存在于您的物理网络中
echo "${MASTER_IP} ${APISERVER_NAME}" >> /etc/hosts
使用kubeadm初始化master节点,执行以下脚本:
#!/bin/bash
# 只在 master 节点执行
# 脚本出错时终止执行
set -e
# 查看完整配置选项 https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2
rm -f ./kubeadm-config.yaml
cat <<EOF > ./kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
kubernetesVersion: v1.16.2
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
controlPlaneEndpoint: "${APISERVER_NAME}:6443"
networking:
serviceSubnet: "10.96.0.0/16"
podSubnet: "${POD_SUBNET}"
dnsDomain: "cluster.local"
EOF
# kubeadm init
# 根据您服务器网速的情况,您需要等候 3 - 10 分钟
kubeadm init --config=kubeadm-config.yaml --upload-certs
# 配置 kubectl
rm -rf /root/.kube/
mkdir /root/.kube/
cp -i /etc/kubernetes/admin.conf /root/.kube/config
# 安装 calico 网络插件
# 参考文档 https://docs.projectcalico.org/v3.9/getting-started/kubernetes/
rm -f calico-3.9.2.yaml
wget https://kuboard.cn/install-script/calico/calico-3.9.2.yaml
sed -i "s#192\.168\.0\.0/16#${POD_SUBNET}#" calico-3.9.2.yaml
kubectl apply -f calico-3.9.2.yaml
检查 master 初始化结果
# 只在 master 节点执行
# 执行如下命令,等待 3-10 分钟,直到所有的容器组处于 Running 状态
watch kubectl get pod -n kube-system -o wide
# 查看 master 节点初始化结果
kubectl get nodes -o wide
注意,初始化 master 节点时,如果因为中间某些步骤的配置出错,想要重新初始化 master 节点,请先执行 kubeadm reset 操作。
在 master 节点上执行
# 只在 master 节点执行
kubeadm token create --print-join-command
执行结果大致类似下面这样:
# 请不要直接拷贝这条命令,拷贝你的master节点上执行的结果
# 这里token的有效时间为两个小时,两小时内可以在worker节点上执行
kubeadm join apiserver.demo:6443 --token h345zw.em6ycnpcpevxz8as --discovery-token-ca-cert-hash sha256:84a8710bbfbb046915a90064132203ca453353b85ef5d29f961b03f129f7d5e3
针对所有的 worker 节点执行
# 只在 worker 节点执行
# 替换 x.x.x.x 为 master 节点实际 IP(请使用内网 IP)
export MASTER_IP=x.x.x.x
# 替换 apiserver.demo 为初始化 master 节点时所使用的 APISERVER_NAME
export APISERVER_NAME=apiserver.demo
echo "${MASTER_IP} ${APISERVER_NAME}" >> /etc/hosts
# 替换为 master 节点上 kubeadm token create 命令的输出
kubeadm join apiserver.demo:6443 --token h345zw.em6ycnpcpevxz8as --discovery-token-ca-cert-hash sha256:84a8710bbfbb046915a90064132203ca453353b85ef5d29f961b03f129f7d5e3
在 master 节点上执行
# 只在 master 节点执行
kubectl get nodes -o wide
输出结果如下:
[root@instance-ji0xk9uh-1 ~]# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master Ready master 19m v1.16.2 192.168.64.4 <none> CentOS Linux 7 (Core) 3.10.0-957.27.2.el7.x86_64 docker://18.9.7
node1 Ready <none> 2m43s v1.16.2 192.168.64.5 <none> CentOS Linux 7 (Core) 3.10.0-957.27.2.el7.x86_64 docker://18.9.7
node2 Ready <none> 2m17s v1.16.2 192.168.64.6 <none> CentOS Linux 7 (Core) 3.10.0-957.27.2.el7.x86_64 docker://18.9.7
注意,正常情况下,您无需移除 worker 节点,如果添加到集群出错,您可以移除 worker 节点,再重新尝试添加。
在准备移除的 worker 节点上执行
# 只在 worker 节点执行
kubeadm reset
在 master 节点上执行
# 只在 master 节点执行
# worker 节点的名字可以通过在节点 master 上执行 kubectl get nodes 命令获得
kubectl delete node [worker_node1]
在master上新建如下 nginx-ingress.yaml 文件:
# 如果打算用于生产环境,请参考 https://github.com/nginxinc/kubernetes-ingress/blob/v1.5.5/docs/installation.md 并根据您自己的情况做进一步定制
apiVersion: v1
kind: Namespace
metadata:
name: nginx-ingress
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: nginx-ingress
namespace: nginx-ingress
---
apiVersion: v1
kind: Secret
metadata:
name: default-server-secret
namespace: nginx-ingress
type: Opaque
data:
tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN2akNDQWFZQ0NRREFPRjl0THNhWFhEQU5CZ2txaGtpRzl3MEJBUXNGQURBaE1SOHdIUVlEVlFRRERCWk8KUjBsT1dFbHVaM0psYzNORGIyNTBjbTlzYkdWeU1CNFhEVEU0TURreE1qRTRNRE16TlZvWERUSXpNRGt4TVRFNApNRE16TlZvd0lURWZNQjBHQTFVRUF3d1dUa2RKVGxoSmJtZHlaWE56UTI5dWRISnZiR3hsY2pDQ0FTSXdEUVlKCktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQUwvN2hIUEtFWGRMdjNyaUM3QlBrMTNpWkt5eTlyQ08KR2xZUXYyK2EzUDF0azIrS3YwVGF5aGRCbDRrcnNUcTZzZm8vWUk1Y2Vhbkw4WGM3U1pyQkVRYm9EN2REbWs1Qgo4eDZLS2xHWU5IWlg0Rm5UZ0VPaStlM2ptTFFxRlBSY1kzVnNPazFFeUZBL0JnWlJVbkNHZUtGeERSN0tQdGhyCmtqSXVuektURXUyaDU4Tlp0S21ScUJHdDEwcTNRYzhZT3ExM2FnbmovUWRjc0ZYYTJnMjB1K1lYZDdoZ3krZksKWk4vVUkxQUQ0YzZyM1lma1ZWUmVHd1lxQVp1WXN2V0RKbW1GNWRwdEMzN011cDBPRUxVTExSakZJOTZXNXIwSAo1TmdPc25NWFJNV1hYVlpiNWRxT3R0SmRtS3FhZ25TZ1JQQVpQN2MwQjFQU2FqYzZjNGZRVXpNQ0F3RUFBVEFOCkJna3Foa2lHOXcwQkFRc0ZBQU9DQVFFQWpLb2tRdGRPcEsrTzhibWVPc3lySmdJSXJycVFVY2ZOUitjb0hZVUoKdGhrYnhITFMzR3VBTWI5dm15VExPY2xxeC9aYzJPblEwMEJCLzlTb0swcitFZ1U2UlVrRWtWcitTTFA3NTdUWgozZWI4dmdPdEduMS9ienM3bzNBaS9kclkrcUI5Q2k1S3lPc3FHTG1US2xFaUtOYkcyR1ZyTWxjS0ZYQU80YTY3Cklnc1hzYktNbTQwV1U3cG9mcGltU1ZmaXFSdkV5YmN3N0NYODF6cFErUyt1eHRYK2VBZ3V0NHh3VlI5d2IyVXYKelhuZk9HbWhWNThDd1dIQnNKa0kxNXhaa2VUWXdSN0diaEFMSkZUUkk3dkhvQXprTWIzbjAxQjQyWjNrN3RXNQpJUDFmTlpIOFUvOWxiUHNoT21FRFZkdjF5ZytVRVJxbStGSis2R0oxeFJGcGZnPT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
tls.key: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBdi91RWM4b1JkMHUvZXVJTHNFK1RYZUprckxMMnNJNGFWaEMvYjVyYy9XMlRiNHEvClJOcktGMEdYaVN1eE9ycXgrajlnamx4NXFjdnhkenRKbXNFUkJ1Z1B0ME9hVGtIekhvb3FVWmcwZGxmZ1dkT0EKUTZMNTdlT1l0Q29VOUZ4amRXdzZUVVRJVUQ4R0JsRlNjSVo0b1hFTkhzbysyR3VTTWk2Zk1wTVM3YUhudzFtMApxWkdvRWEzWFNyZEJ6eGc2clhkcUNlUDlCMXl3VmRyYURiUzc1aGQzdUdETDU4cGszOVFqVUFQaHpxdmRoK1JWClZGNGJCaW9CbTVpeTlZTW1hWVhsMm0wTGZzeTZuUTRRdFFzdEdNVWozcGJtdlFmazJBNnljeGRFeFpkZFZsdmwKMm82MjBsMllxcHFDZEtCRThCay90elFIVTlKcU56cHpoOUJUTXdJREFRQUJBb0lCQVFDZklHbXowOHhRVmorNwpLZnZJUXQwQ0YzR2MxNld6eDhVNml4MHg4Mm15d1kxUUNlL3BzWE9LZlRxT1h1SENyUlp5TnUvZ2IvUUQ4bUFOCmxOMjRZTWl0TWRJODg5TEZoTkp3QU5OODJDeTczckM5bzVvUDlkazAvYzRIbjAzSkVYNzZ5QjgzQm9rR1FvYksKMjhMNk0rdHUzUmFqNjd6Vmc2d2szaEhrU0pXSzBwV1YrSjdrUkRWYmhDYUZhNk5nMUZNRWxhTlozVDhhUUtyQgpDUDNDeEFTdjYxWTk5TEI4KzNXWVFIK3NYaTVGM01pYVNBZ1BkQUk3WEh1dXFET1lvMU5PL0JoSGt1aVg2QnRtCnorNTZud2pZMy8yUytSRmNBc3JMTnIwMDJZZi9oY0IraVlDNzVWYmcydVd6WTY3TWdOTGQ5VW9RU3BDRkYrVm4KM0cyUnhybnhBb0dCQU40U3M0ZVlPU2huMVpQQjdhTUZsY0k2RHR2S2ErTGZTTXFyY2pOZjJlSEpZNnhubmxKdgpGenpGL2RiVWVTbWxSekR0WkdlcXZXaHFISy9iTjIyeWJhOU1WMDlRQ0JFTk5jNmtWajJTVHpUWkJVbEx4QzYrCk93Z0wyZHhKendWelU0VC84ajdHalRUN05BZVpFS2FvRHFyRG5BYWkyaW5oZU1JVWZHRXFGKzJyQW9HQkFOMVAKK0tZL0lsS3RWRzRKSklQNzBjUis3RmpyeXJpY05iWCtQVzUvOXFHaWxnY2grZ3l4b25BWlBpd2NpeDN3QVpGdwpaZC96ZFB2aTBkWEppc1BSZjRMazg5b2pCUmpiRmRmc2l5UmJYbyt3TFU4NUhRU2NGMnN5aUFPaTVBRHdVU0FkCm45YWFweUNweEFkREtERHdObit3ZFhtaTZ0OHRpSFRkK3RoVDhkaVpBb0dCQUt6Wis1bG9OOTBtYlF4VVh5YUwKMjFSUm9tMGJjcndsTmVCaWNFSmlzaEhYa2xpSVVxZ3hSZklNM2hhUVRUcklKZENFaHFsV01aV0xPb2I2NTNyZgo3aFlMSXM1ZUtka3o0aFRVdnpldm9TMHVXcm9CV2xOVHlGanIrSWhKZnZUc0hpOGdsU3FkbXgySkJhZUFVWUNXCndNdlQ4NmNLclNyNkQrZG8wS05FZzFsL0FvR0FlMkFVdHVFbFNqLzBmRzgrV3hHc1RFV1JqclRNUzRSUjhRWXQKeXdjdFA4aDZxTGxKUTRCWGxQU05rMXZLTmtOUkxIb2pZT2pCQTViYjhibXNVU1BlV09NNENoaFJ4QnlHbmR2eAphYkJDRkFwY0IvbEg4d1R0alVZYlN5T294ZGt5OEp0ek90ajJhS0FiZHd6NlArWDZDODhjZmxYVFo5MWpYL3RMCjF3TmRKS2tDZ1lCbyt0UzB5TzJ2SWFmK2UwSkN5TGhzVDQ5cTN3Zis2QWVqWGx2WDJ1VnRYejN5QTZnbXo5aCsKcDNlK2JMRUxwb3B0WFhNdUFRR0xhUkcrYlNNcjR5dERYbE5ZSndUeThXczNKY3dlSTdqZVp2b0ZpbmNvVlVIMwphdmxoTUVCRGYxSjltSDB5cDBwWUNaS2ROdHNvZEZtQktzVEtQMjJhTmtsVVhCS3gyZzR6cFE9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=
---
kind: ConfigMap
apiVersion: v1
metadata:
name: nginx-config
namespace: nginx-ingress
data:
server-names-hash-bucket-size: "1024"
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: nginx-ingress
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- watch
- update
- create
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- list
- watch
- get
- apiGroups:
- "extensions"
resources:
- ingresses/status
verbs:
- update
- apiGroups:
- k8s.nginx.org
resources:
- virtualservers
- virtualserverroutes
verbs:
- list
- watch
- get
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: nginx-ingress
subjects:
- kind: ServiceAccount
name: nginx-ingress
namespace: nginx-ingress
roleRef:
kind: ClusterRole
name: nginx-ingress
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nginx-ingress
namespace: nginx-ingress
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9113"
spec:
selector:
matchLabels:
app: nginx-ingress
template:
metadata:
labels:
app: nginx-ingress
spec:
serviceAccountName: nginx-ingress
containers:
- image: nginx/nginx-ingress:1.5.5
name: nginx-ingress
ports:
- name: http
containerPort: 80
hostPort: 80
- name: https
containerPort: 443
hostPort: 443
- name: prometheus
containerPort: 9113
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
args:
- -nginx-configmaps=$(POD_NAMESPACE)/nginx-config
- -default-server-tls-secret=$(POD_NAMESPACE)/default-server-secret
#- -v=3 # Enables extensive logging. Useful for troubleshooting.
#- -report-ingress-status
#- -external-service=nginx-ingress
#- -enable-leader-election
- -enable-prometheus-metrics
#- -enable-custom-resources
使用如下命令执行该文件:
kubectl apply -f nginx-ingress.yaml
查看是否安装成功:
[root@instance-ji0xk9uh-1 ~]# kubectl get all -n nginx-ingress
NAME READY STATUS RESTARTS AGE
pod/nginx-ingress-55dfk 1/1 Running 0 29s
pod/nginx-ingress-9847f 1/1 Running 0 29s
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
daemonset.apps/nginx-ingress 2 2 2 2 2 <none> 29s
验证是否成功:
# 打开浏览器,输入任意worker节点的ip地址,回车,如果出现 404 NOT FOUND,表示nginx ingress安装成功
# 因为现在还没有安装任何服务,所以出现404是正常的,这个就相当于是一个nginx,用来做后端请求的转发。
本章我们就把一个完整的k8s集群搭好了,你可能还比较迷茫,不用着急,我们下一章安装Dashboard,更直观的感受下k8s集群的魅力。
本文参考自https://kuboard.cn/install/install-k8s.html,非常推荐这个网站,上面有很多k8s不错的教程,非常适合新手入门。