Prometheus(普罗米修斯)是一个最初在SoundCloud上构建的监控系统。自2012年成为社区开源项目,拥有非常活跃的开发人员和用户社区。为强调开源及独立维护,Prometheus于2016年加入云原生云计算基金会(CNCF),成为继Kubernetes之后的第二个托管项目。
监控指标 | 具体实现 | 举例 |
---|---|---|
Pod性能 | cAdvisor | 容器CPU,内存使用率 |
Node性能 | node-exporter | 节点CPU,内存使用率 |
k8s资源对象 | kube-state-metrics | Pod/Deploy/Service |
./
├── prometheus-configmap.yaml #主配置文件
├── prometheus-rbac.yaml #权限认证
├── prometheus-rules.yaml #告警策略
├── prometheus-service.yaml #配置svc服务暴露
└── prometheus-statefulset.yaml #配置prometheus
$ kubectl create ns ops
$ ls prometheus-* | xargs -i kubectl apply -f {}
# 访问http://192.168.56.17:30090/graph
$ kubectl get pv,pvc -n ops
$ kubectl get pod -n ops
$ vim grafana.yml
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: ops
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: grafana/grafana:7.1.0
ports:
- containerPort: 3000
protocol: TCP
resources:
limits:
cpu: 100m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
volumeMounts:
- name: grafana-data
mountPath: /var/lib/grafana
subPath: grafana
securityContext:
fsGroup: 472
runAsUser: 472
volumes:
- name: grafana-data
persistentVolumeClaim:
claimName: grafana
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana
namespace: ops
spec:
storageClassName: "managed-nfs-storage"
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: ops
spec:
type: NodePort
ports:
- port : 80
targetPort: 3000
nodePort: 30030
selector:
app: grafana
$ kubeapply -f grafana.yaml
$ kubectl get pod,svc -n ops
$ ls -l kube-*
-rw-r--r-- 1 root root 2362 Jul 24 18:16 kube-state-metrics-deployment.yaml
-rw-r--r-- 1 root root 2536 Jul 24 18:16 kube-state-metrics-rbac.yaml
-rw-r--r-- 1 root root 498 Jul 24 18:16 kube-state-metrics-service.yaml
$ ls kube-state-metrics-* | xargs -i kubectl apply -f {}
$ ls -l node-exporter-*
-rw-r--r-- 1 root root 1633 Jul 25 21:17 node-exporter-ds.yml
-rw-r--r-- 1 root root 417 Jul 24 18:16 node-exporter-service.yaml
$ kubectl apply -f node-exporter-ds.yml
$ kubectl apply -f node-exporter-service.yaml
[root@centos7-node4 dashboard]# ls -l
total 180
-rw-r--r-- 1 root root 59467 Dec 23 2019 K8S工作节点监控-20191219.json
-rw-r--r-- 1 root root 59483 Dec 23 2019 K8S资源对象状态监控-20191219.json
-rw-r--r-- 1 root root 58945 Dec 23 2019 K8S集群资源监控-20191219.json
从此处即可导入模板
$ ls -l alertmanager-*
alertmanager-configmap.yaml #配置文件,这个需要修改对应的告警邮箱配置
alertmanager-deployment.yaml #deploy控制器,用于产生alertmanager副本
alertmanager-pvc.yaml #存储配置
alertmanager-service.yaml #服务暴露配置
# 告警邮箱配置:
$ ls alertmanager-* | xargs -i kubeapply -f {} #修改完成邮箱配置之后再apply