有奖:语音产品征文挑战赛火热进行中> HOT

集群监控概览

图表名称
查询语句
使用的指标
配置文件
CPU Requests Commitment
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
kube_node_status_allocatable_cpu_cores
kube-state-metrics
CPU Limits Commitment
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) / sum(kube_node_status_allocatable_cpu_cores{cluster="$cluster"})
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
kube_node_status_allocatable_cpu_cores
kube-state-metrics
Memory Utilisation
1 - sum(:node_memory_MemAvailable_bytes:sum{cluster="$cluster"}) / sum(node_memory_MemTotal_bytes{cluster="$cluster"})
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
Memory Requests Commitment
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
kube_node_status_allocatable_memory_bytes
kube-state-metrics
Memory Limits Commitment
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) / sum(kube_node_status_allocatable_memory_bytes{cluster="$cluster"})
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
kube_node_status_allocatable_memory_bytes
kube-state-metrics
Node Count
count(kube_node_info{cluster="$cluster"})
kube_node_info
kube-state-metrics
Pod Count
count(kube_pod_info{cluster="$cluster"})
kube_pod_info
kube-state-metrics
Node Request CPU Average Percent
avg(sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_cpu_cores{cluster="$cluster"})by(node))
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
kube_node_status_capacity_cpu_cores
kube-state-metrics
Node Request Memory Average Percent
avg(sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"})by (node)/sum(kube_node_status_capacity_memory_bytes{cluster="$cluster"})by(node))
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
kube_node_status_capacity_memory_bytes
kube-state-metrics
API Server Success Request Percent
sum(irate(apiserver_request_total{cluster="$cluster",code=~"20.*",verb=~"GET|LIST"}[5m]))/sum(irate(apiserver_request_total{cluster="$cluster",verb=~"GET|LIST"}[5m]))
apiserver_request_total
kube-apiserver
apiserver_request_total
kube-apiserver
Namespace Overview
count(kube_pod_info{cluster="$cluster"}) by (namespace)
kube_pod_info
kube-state-metrics
count(kube_service_info{cluster="$cluster"}) by(namespace)
kube_service_info
kube-state-metrics
count(kube_pod_container_info{cluster="$cluster"}) by(namespace)
kube_pod_container_info
kube-state-metrics
count(kube_configmap_info{cluster="$cluster"}) by(namespace)
kube_configmap_info
kube-state-metrics
count(kube_secret_info{cluster="$cluster"}) by(namespace)
kube_secret_info
kube-state-metrics
count(kube_deployment_created{cluster="$cluster"}) by (namespace)
kube_deployment_created
kube-state-metrics
count(kube_statefulset_created{cluster="$cluster"}) by (namespace)
kube_statefulset_created
kube-state-metrics
count(kube_job_created{cluster="$cluster"}) by (namespace)
kube_job_created
kube-state-metrics
count(kube_cronjob_created{cluster="$cluster"}) by (namespace)
kube_cronjob_created
kube-state-metrics
count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace) - (count(kube_pod_status_phase{cluster="$cluster",phase="Succeeded"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{cluster="$cluster",condition="false"}==1) by(namespace)
kube_pod_status_ready
kube-state-metrics
kube_pod_status_phase
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
count(kube_deployment_status_replicas_ready{cluster="$cluster"}<kube_deployment_spec_replicas{cluster="$cluster"}) by (namespace)
kube_deployment_status_replicas_ready
kube-state-metrics
kube_deployment_spec_replicas
kube-state-metrics
count(kube_statefulset_status_replicas_ready{cluster="$cluster"}<kube_statefulset_replicas{cluster="$cluster"}) by (namespace)
kube_statefulset_status_replicas_ready
kube-state-metrics
kube_statefulset_replicas
kube-state-metrics
count(kube_daemonset_status_number_unavailable{cluster="$cluster"}>0)by(namespace)
kube_daemonset_status_number_unavailable
kube-state-metrics
count(kube_job_status_failed{cluster="$cluster"} == 1) by (namespace)
kube_job_status_failed
kube-state-metrics
count(kube_daemonset_created{cluster="$cluster"}) by (namespace)
kube_daemonset_created
kube-state-metrics
count(kube_persistentvolumeclaim_info{cluster="$cluster"}) by (namespace)
kube_persistentvolumeclaim_info
kube-state-metrics
CPU Usage
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
CPU Quota
sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
kube_pod_owner
kube-state-metrics
count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster"}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", container!="POD", container!=""}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster"}) by (namespace)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage (working_set)
sum(container_memory_working_set_bytes{cluster="$cluster", container!="", container!="POD"}) by (namespace)
container_memory_working_set_bytes
cadvisor
Memory Requests
sum(kube_pod_owner{cluster="$cluster"}) by (namespace)
kube_pod_owner
kube-state-metrics
count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster"}) by (workload, namespace)) by (namespace)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace)
container_memory_rss
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster"}) by (namespace)
container_memory_rss
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", container!="", container!="POD"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster"}) by (namespace)
container_memory_rss
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Node Memory Usage (Top 10)
sum(label_replace(topk(10, 1-(node_memory_MemAvailable_bytes{cluster="$cluster"} / node_memory_MemTotal_bytes{cluster="$cluster"})), "node_ip", "$1", "instance", "(.*)"))by(node_ip)
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
Node CPU Usage (Top 10)
topk(10, sum(label_replace(1 - sum(rate(node_cpu_seconds_total{cluster="$cluster",mode="idle"}[1m])) by (instance) / sum(rate(node_cpu_seconds_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_cpu_seconds_total
node-exporter
node_cpu_seconds_total
node-exporter
Node Disk Usage (Top 10)
topk(10, sum(label_replace(1-node_filesystem_free_bytes{cluster="$cluster",mountpoint="/"}/node_filesystem_size_bytes{cluster="$cluster",mountpoint="/",fstype!="rootfs"},"host_ip","$1","instance","(.*)"))by(host_ip))
node_filesystem_free_bytes
node-exporter
Node Network In (Top 10)
topk(10, sum(label_replace(max(irate(node_network_receive_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_network_receive_bytes_total
node-exporter
Node Network Out (Top 10)
topk(10, sum(label_replace(max(irate(node_network_transmit_bytes_total{cluster="$cluster"}[1m])) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_network_transmit_bytes_total
node-exporter
Node Sockets Count(Top 10)
topk(10, sum(label_replace(max(node_sockstat_TCP_alloc{cluster="$cluster"}) by (instance),"host_ip","$1","instance","(.*)"))by(host_ip))
node_sockstat_TCP_alloc
node-exporter
Container Memory Usage(Top10)
topk(10, sum (container_memory_working_set_bytes{cluster="$cluster",container !="",container!="POD"}) by (container))
container_memory_working_set_bytes
cadvisor
Container Memory Usage/Limit(Top10)
topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Container CPU Usage(Top10)
topk(10, sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",container !="",container!="POD"}[2m])) by (container))
container_cpu_usage_seconds_total
cadvisor
Container Network
topk(10, sum(irate(container_network_receive_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
container_network_receive_bytes_total
cadvisor
-topk(10, sum(irate(container_network_transmit_bytes_total{cluster="$cluster",image!="",container!="",container!="POD"}[2m])) by (pod))
container_network_transmit_bytes_total
cadvisor
Container Memory Usage/Limit (Top 10)
topk(10, avg(container_memory_working_set_bytes{cluster="$cluster",container!=""}/(container_spec_memory_limit_bytes{cluster="$cluster"}!=0)) by (container, pod, namespace))
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Container CPU Usage (Top 10)
topk(10, sum(irate(container_cpu_usage_seconds_total{cluster="$cluster",container!="",container!="POD"}[1m])) by (container,pod,namespace)or on() vector(0))
container_cpu_usage_seconds_total
cadvisor
Container Socket Count(Top 10)
topk(10, sum(container_sockets{cluster="$cluster",container!=""}) by (container,pod,namespace)or on() vector(0))
container_sockets
cadvisor

集群 Namespace 大盘

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))
container_cpu_usage_seconds_total
cadvisor
CPU Usage/Request(%)
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage/Limit(%)
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m]))/sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"}) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Limit
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace=~"$namespace", unit="core", resource="cpu"})
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Cluster Available
sum(sum(kube_node_status_capacity{resource="cpu",cluster="$cluster",namespace=~"$namespace"}) by (node) + sum(kube_node_spec_unschedulable{cluster="$cluster",namespace=~"$namespace"}==0) by(node))
kube_node_status_capacity
kube-state-metrics
kube_node_spec_unschedulable
kube-state-metrics
StatefulSet Created
count(kube_statefulset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_statefulset_created
kube-state-metrics
Pod Created
count(kube_pod_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_info
kube-state-metrics
Containers
count(kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_container_info
kube-state-metrics
DaemonSet Created
count(kube_daemonset_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_daemonset_created
kube-state-metrics
Job Created
count(kube_job_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
kube_job_info
kube-state-metrics
Job Active
count(kube_job_status_active{cluster="$cluster",namespace="$namespace"}==1)or on() vector(0)
kube_job_status_active
kube-state-metrics
Cron Job Created
count(kube_cronjob_created{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_cronjob_created
kube-state-metrics
Cron Job Active
count(kube_cronjob_status_active{cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
kube_cronjob_status_active
kube-state-metrics
Unbound PVC
count(kube_persistentvolumeclaim_status_phase{phase!="Bound", cluster="$cluster",namespace="$namespace"}==1) or on() vector(0)
kube_persistentvolumeclaim_status_phase
kube-state-metrics
PersistentVolumeClaim Created
count(kube_persistentvolumeclaim_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_persistentvolumeclaim_info
kube-state-metrics
Service Created
count(kube_service_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_service_info
kube-state-metrics
LoadBalancer Created
count(kube_service_spec_type{type="LoadBalancer", cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_service_spec_type
kube-state-metrics
Ingress Created
count(kube_ingress_info{cluster="$cluster",namespace="$namespace"})or on() vector(0)
kube_ingress_info
kube-state-metrics
ConfigMap Created
count(kube_configmap_info{cluster="$cluster",namespace="$namespace"})
kube_configmap_info
kube-state-metrics
Secret Created
count(kube_secret_info{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_secret_info
kube-state-metrics
PVC Storage Requests Total
sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_persistentvolumeclaim_resource_requests_storage_bytes
kube-state-metrics
Pod NotReady
count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace) - (count(kube_pod_status_phase{phase="Succeeded", cluster="$cluster",namespace="$namespace"}==1) by(namespace) or vector(0)) or count(kube_pod_status_ready{condition="false", cluster="$cluster",namespace="$namespace"}==1) by(namespace)
kube_pod_status_ready
kube-state-metrics
kube_pod_status_phase
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
Pod UnSchedulable
count(kube_pod_status_unschedulable{cluster="$cluster",namespace="$namespace"}) or on() vector(0)
kube_pod_status_unschedulable
kube-state-metrics
Deployment NotReady
count(sum(kube_deployment_status_replicas_ready{cluster="$cluster",namespace="$namespace"}) by (deployment)<sum(kube_deployment_spec_replicas{cluster="$cluster",namespace="$namespace"}) by (deployment)) or on() vector(0)
kube_deployment_status_replicas_ready
kube-state-metrics
kube_deployment_spec_replicas
kube-state-metrics
Daemonset NotReady
count(kube_daemonset_status_number_unavailable{cluster="$cluster",namespace="$namespace"}>0) or on() vector(0)
kube_daemonset_status_number_unavailable
kube-state-metrics
Job Failed
count(kube_job_status_failed{cluster="$cluster",namespace="$namespace"} == 1)
kube_job_status_failed
kube-state-metrics
CPU Usage
sum(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}[2m])) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
Memory Usage/Request(%)
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
sum(container_memory_working_set_bytes{cluster="$cluster",namespace=~"$namespace",container!="",container!="POD"})/sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"}) or on() vector(0)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Request
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Limit
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace=~"$namespace", unit="byte", resource="memory"})
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Cluster Available
sum(sum(kube_node_status_capacity{resource="memory"}) by (node) + sum(kube_node_spec_unschedulable==0) by(node)) or on() vector(0)
kube_node_status_capacity
kube-state-metrics
kube_node_spec_unschedulable
kube-state-metrics
Memory Usage (w/o cache)
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"}) by (pod)
container_memory_working_set_bytes
cadvisor
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
kube_resourcequota
kube-state-metrics
Memory Quota
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_working_set_bytes
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}) by (pod)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"}) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace"}) by (pod)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_rss{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_rss
cadvisor
sum(container_memory_cache{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_cache
cadvisor
sum(container_memory_swap{cluster="$cluster", namespace="$namespace",container!="", container!="POD"}) by (pod)
container_memory_swap
cadvisor
Containers
group by (image, container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_status_running
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_status_restarts_total
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(irate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[1m])) by (pod,container)
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max(container_spec_cpu_quota{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000 > 0) by (container,pod)))
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
container_spec_cpu_quota
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(irate(container_cpu_usage_seconds_total{container!="",container!="POD",cluster="$cluster",namespace="$namespace"}[1m])) by (container,pod) / (max by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"})))
kube_pod_container_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_limits
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container)
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max(container_spec_memory_limit_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod, container) < 1)
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"})
kube_pod_container_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
group by (container, container_id, pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace"}) * on(container,pod) group_left() (max(container_memory_working_set_bytes{container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,container) / max by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))
kube_pod_container_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics

API Server(独立集群)

图表名称
查询语句
使用的指标
配置文件
Availability > 99.000%
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
ErrorBudget > 99.000%
100 * (1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",cluster="$cluster"}[5m])) -0.990000)
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Read Availability
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET", cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Read SLI - Requests
sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Read SLI - Errors
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Read SLI - Duration
histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",cluster="$cluster"}[5m]))) > 0
apiserver_request_duration_seconds_bucket
kube-apiserver
Write Availability
1 - ( ( sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m])) - sum by (cluster,cluster_type) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1", cluster="$cluster"}[5m])) ) + sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..", cluster="$cluster"}[5m]) or vector(0)) ) / sum by (cluster,cluster_type) (increase(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE", cluster="$cluster"}[5m]))
apiserver_request_duration_seconds_count
kube-apiserver
apiserver_request_duration_seconds_bucket
kube-apiserver
apiserver_request_total
kube-apiserver
Write SLI - Requests
sum by (code) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Write SLI - Errors
sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5..",cluster="$cluster"}[5m]))/ sum by (resource) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))
apiserver_request_total
kube-apiserver
Write SLI - Duration
histogram_quantile(0.99, sum by (le, resource,cluster,cluster_type) (rate(apiserver_request_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",cluster="$cluster"}[5m]))) > 0
apiserver_request_duration_seconds_bucket
kube-apiserver
Work Queue Add Rate
sum(rate(workqueue_adds_total{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
workqueue_adds_total
kubelet
Work Queue Depth
sum(rate(workqueue_depth{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name)
workqueue_depth
kubelet
Work Queue Latency
histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job="kube-apiserver", instance=~"$instance", cluster=~"$cluster"}[5m])) by (instance, name, le))
workqueue_queue_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{job="kube-apiserver",instance=~"$instance", cluster=~"$cluster"}[5m])
process_cpu_seconds_total
node-exporter

Controller Manager(独立集群)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster=~"$cluster",job="kube-controller-manager"})
up
kubelet
Work Queue Add Rate
sum(rate(workqueue_adds_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
workqueue_adds_total
kubelet
Work Queue Depth
sum(rate(workqueue_depth{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name)
workqueue_depth
kubelet
Work Queue Latency
histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance"}[5m])) by (instance, name, le))
workqueue_queue_duration_seconds_bucket
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-controller-manager", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-controller-manager",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

Kubelet

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster="$cluster", job="kubelet"})
up
kubelet
Running Pods
sum(kubelet_running_pods{cluster="$cluster", job="kubelet", instance=~"$instance"})
kubelet_running_pods
kubelet
Running Container
sum(kubelet_running_containers{cluster="$cluster", job="kubelet", instance=~"$instance"})
kubelet_running_containers
kubelet
Actual Volume Count
sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance", state="actual_state_of_world"})
volume_manager_total_volumes
kubelet
Desired Volume Count
sum(volume_manager_total_volumes{cluster="$cluster", job="kubelet", instance=~"$instance",state="desired_state_of_world"})
volume_manager_total_volumes
kubelet
Config Error Count
sum(rate(kubelet_node_config_error{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m]))
kubelet_node_config_error
kubelet
Operation Rate
sum(rate(kubelet_runtime_operations_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (operation_type, instance)
kubelet_runtime_operations_total
kubelet
Operation Error Rate
sum(rate(kubelet_runtime_operations_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type)
kubelet_runtime_operations_errors_total
kubelet
Operation duration 99th quantile
histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_type, le))
kubelet_runtime_operations_duration_seconds_bucket
kubelet
Pod Start Rate
sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
kubelet_pod_start_duration_seconds_count
kubelet
sum(rate(kubelet_pod_worker_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance)
kubelet_pod_worker_duration_seconds_count
kubelet
Pod Start Duration
histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pod_start_duration_seconds_count
kubelet
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pod_worker_duration_seconds_bucket
kubelet
Storage Operation Rate
sum(rate(storage_operation_duration_seconds_count{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
storage_operation_duration_seconds_count
kubelet
Storage Operation Error Rate
sum(rate(storage_operation_errors_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin)
storage_operation_errors_total
kubelet
Storage Operation Duration 99th quantile
histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_name, volume_plugin, le))
storage_operation_duration_seconds_bucket
kubelet
Cgroup manager operation rate
sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type)
kubelet_cgroup_manager_duration_seconds_count
kubelet
Cgroup manager 99th quantile
histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance, operation_type, le))
kubelet_cgroup_manager_duration_seconds_bucket
kubelet
PLEG relist rate
sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster="$cluster", job="kubelet", instance=~"$instance"}[5m])) by (instance)
kubelet_pleg_relist_duration_seconds_count
kubelet
PLEG relist interval
histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pleg_relist_interval_seconds_bucket
kubelet
PLEG relist duration
histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])) by (instance, le))
kubelet_pleg_relist_duration_seconds_bucket
kubelet
RPC Rate
sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster="$cluster",job="kubelet", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
Request duration 99th quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster="$cluster",job="kubelet", instance=~"$instance"}[5m])) by (instance, verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster="$cluster",job="kubelet",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster="$cluster",job="kubelet",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter
Goroutines
go_goroutines{cluster="$cluster",job="kubelet",instance=~"$instance"}
go_goroutines
node-exporter

Proxy(非默认安装组件)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{job="kube-proxy"})
up
kubelet
Rules Sync Rate
sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_sync_proxy_rules_duration_seconds_count
kube-proxy
Rule Sync Latency 99th Quantile
histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_sync_proxy_rules_duration_seconds_bucket
kube-proxy
Network Programming Rate
sum(rate(kubeproxy_network_programming_duration_seconds_count{job="kube-proxy", instance=~"$instance"}[5m]))
kubeproxy_network_programming_duration_seconds_count
kube-proxy
Network Programming Latency 99th Quantile
histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job="kube-proxy", instance=~"$instance"}[5m])) by (instance, le))
kubeproxy_network_programming_duration_seconds_bucket
kube-proxy
Kube API Request Rate
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{job="kube-proxy", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy",instance=~"$instance",verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job="kube-proxy", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{job="kube-proxy",instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{job="kube-proxy",instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

Scheduler(独立集群)

图表名称
查询语句
使用的指标
配置文件
Up
sum(up{cluster=~"$cluster", job="kube-scheduler"})
up
kubelet
Kube API Request Rate
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"2.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"3.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"4.."}[5m]))
rest_client_requests_total
kubelet
sum(rate(rest_client_requests_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance",code=~"5.."}[5m]))
rest_client_requests_total
kubelet
Post Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="POST"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Get Request Latency 99th Quantile
histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance", verb="GET"}[5m])) by (verb, url, le))
rest_client_request_duration_seconds_bucket
kubelet
Memory
process_resident_memory_bytes{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}
process_resident_memory_bytes
node-exporter
CPU usage
rate(process_cpu_seconds_total{cluster=~"$cluster",job="kube-scheduler", instance=~"$instance"}[5m])
process_cpu_seconds_total
node-exporter

集群节点监控详情

图表名称
查询语句
使用的指标
配置文件
服务器资源总览表
node_uname_info{job=~"$job", cluster=~"$cluster"} - 0
node_uname_info
node-exporter
node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - 0
node_memory_MemTotal_bytes
node-exporter
count(node_cpu_seconds_total{job=~"$job",mode='system',cluster=~"$cluster"}) by (instance)
node_cpu_seconds_total
node-exporter
sum(time() - node_boot_time_seconds{job=~"$job",cluster=~"$cluster"})by(instance)
node_boot_time_seconds
node-exporter
max((node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}) *100/(node_filesystem_avail_bytes {job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}+(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"}-node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"ext.?|xfs"})))by(instance)
node_filesystem_size_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
node_cpu_seconds_total
node-exporter
(1 - (node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})))* 100
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
node_load5{job=~"$job",cluster=~"$cluster"}
node_load5
node-exporter
max(irate(node_disk_written_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])) by (instance)
node_disk_written_bytes_total
node-exporter
max(irate(node_network_receive_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
node_network_receive_bytes_total
node-exporter
max(irate(node_network_transmit_bytes_total{job=~"$job",cluster=~"$cluster"}[5m])*8) by (instance)
node_network_transmit_bytes_total
node-exporter
node_load5{job=~"$job",cluster=~"$cluster"}
node_load5
node-exporter
整体总负载与整体平均 CPU 使用率
count(node_cpu_seconds_total{job=~"$job",cluster=~"$cluster", mode='system'})
node_cpu_seconds_total
node-exporter
sum(node_load5{job=~"$job",cluster=~"$cluster"})
node_load5
node-exporter
avg(1 - avg(irate(node_cpu_seconds_total{job=~"$job",mode="idle",cluster=~"$cluster"}[5m])) by (instance)) * 100
node_cpu_seconds_total
node-exporter
整体总内存与整体平均内存使用率
sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"})
node_memory_MemTotal_bytes
node-exporter
sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"})
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
(sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"} - node_memory_MemAvailable_bytes{job=~"$job",cluster=~"$cluster"}) / sum(node_memory_MemTotal_bytes{job=~"$job",cluster=~"$cluster"}))*100
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
整体总磁盘与整体平均磁盘使用率
sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
node_filesystem_size_bytes
node-exporter
sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
(sum(avg(node_filesystem_size_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{job=~"$job",fstype=~"xfs|ext.*"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~"$job",cluster=~"$cluster",fstype=~"xfs|ext.*"})by(device,instance))))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
运行时间
avg(time() - node_boot_time_seconds{instance=~"$node",cluster=~"$cluster"}) 75
node_boot_time_seconds
node-exporter
CPU 核数
count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'})
node_cpu_seconds_total
node-exporter
总内存
sum(node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})
node_memory_MemTotal_bytes
node-exporter
总 CPU 使用率
100 - (avg(irate(node_cpu_seconds_total{instance=~"$node",mode="idle",cluster=~"$cluster"}[5m])) * 100)
node_cpu_seconds_total
node-exporter
内存使用率
(1 - (node_memory_MemAvailable_bytes{instance=~"$node",cluster=~"$cluster"} / (node_memory_MemTotal_bytes{instance=~"$node",cluster=~"$cluster"})))* 100
node_memory_MemAvailable_bytes
node-exporter
最大分区使用率
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"})*100 /(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint="$maxmount"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
CPU iowait
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) * 100
node_cpu_seconds_total
node-exporter
各分区可用空间
node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
node_filesystem_size_bytes
node-exporter
node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-0
node_filesystem_avail_bytes
node-exporter
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
CPU 使用率
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="system"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="user"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="iowait"}[5m])) by (instance) *100
node_cpu_seconds_total
node-exporter
(1 - avg(irate(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node",mode="idle"}[5m])) by (instance))*100
node_cpu_seconds_total
node-exporter
内存信息
node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemTotal_bytes
node-exporter
node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"} - node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemTotal_bytes
node-exporter
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"}
node_memory_MemAvailable_bytes
node-exporter
(1 - (node_memory_MemAvailable_bytes{cluster=~"$cluster",instance=~"$node"} / (node_memory_MemTotal_bytes{cluster=~"$cluster",instance=~"$node"})))* 100
node_memory_MemAvailable_bytes
node-exporter
node_memory_MemTotal_bytes
node-exporter
每秒网络带宽使用
irate(node_network_receive_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
node_network_receive_bytes_total
node-exporter
irate(node_network_transmit_bytes_total{cluster=~"$cluster",instance=~'$node',device=~"$device"}[5m])*8
node_network_transmit_bytes_total
node-exporter
系统平均负载
node_load1{cluster=~"$cluster",instance=~"$node"}
node_load1
node-exporter
node_load5{cluster=~"$cluster",instance=~"$node"}
node_load5
node-exporter
node_load15{cluster=~"$cluster",instance=~"$node"}
node_load15
node-exporter
sum(count(node_cpu_seconds_total{cluster=~"$cluster",instance=~"$node", mode='system'}) by (cpu,instance)) by(instance)
node_cpu_seconds_total
node-exporter
每秒磁盘读写容量
irate(node_disk_read_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_read_bytes_total
node-exporter
irate(node_disk_written_bytes_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_written_bytes_total
node-exporter
磁盘使用率
(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{cluster=~"$cluster",instance=~'$node',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}))
node_filesystem_size_bytes
node-exporter
node_filesystem_free_bytes
node-exporter
node_filesystem_avail_bytes
node-exporter
node_filesystem_files_free{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"} / node_filesystem_files{cluster=~"$cluster",instance=~'$node',fstype=~"ext.?|xfs"}
node_filesystem_files_free
node-exporter
磁盘读写速率(IOPS)
irate(node_disk_reads_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_reads_completed_total
node-exporter
irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_writes_completed_total
node-exporter
node_disk_io_now{cluster=~"$cluster",instance=~"$node"}
node_disk_io_now
node-exporter
每1秒内 I/O 操作耗时占比
irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_seconds_total
node-exporter
每次 IO 读写的耗时
irate(node_disk_read_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_reads_completed_total{instance=~"$node"}[5m])
node_disk_read_time_seconds_total
node-exporter
node_disk_reads_completed_total
node-exporter
irate(node_disk_write_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m]) / irate(node_disk_writes_completed_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_write_time_seconds_total
node-exporter
node_disk_writes_completed_total
node-exporter
irate(node_disk_io_time_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_seconds_total
node-exporter
irate(node_disk_io_time_weighted_seconds_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_disk_io_time_weighted_seconds_total
node-exporter
网络 Socket 连接信息
node_netstat_Tcp_CurrEstab{cluster=~"$cluster",instance=~'$node'}
node_netstat_Tcp_CurrEstab
node-exporter
node_sockstat_TCP_tw{cluster=~"$cluster",instance=~'$node'}
node_sockstat_TCP_tw
node-exporter
node_sockstat_sockets_used{cluster=~"$cluster",instance=~'$node'}
node_sockstat_sockets_used
node-exporter
node_sockstat_UDP_inuse{cluster=~"$cluster",instance=~'$node'}
node_sockstat_UDP_inuse
node-exporter
node_sockstat_TCP_alloc{cluster=~"$cluster",instance=~'$node'}
node_sockstat_TCP_alloc
node-exporter
irate(node_netstat_Tcp_PassiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_PassiveOpens
node-exporter
irate(node_netstat_Tcp_ActiveOpens{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_ActiveOpens
node-exporter
irate(node_netstat_Tcp_InSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_InSegs
node-exporter
irate(node_netstat_Tcp_OutSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_OutSegs
node-exporter
irate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster",instance=~'$node'}[5m])
node_netstat_Tcp_RetransSegs
node-exporter
打开的文件描述符(左 )/每秒上下文切换次数(右)
node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}
node_filefd_allocated
node-exporter
irate(node_context_switches_total{cluster=~"$cluster",instance=~"$node"}[5m])
node_context_switches_total
node-exporter
(node_filefd_allocated{cluster=~"$cluster",instance=~"$node"}/node_filefd_maximum{cluster=~"$cluster",instance=~"$node"}) *100
node_filefd_allocated
node-exporter
node_filefd_maximum
node-exporter

节点 Pod 监控

图表名称
查询语句
使用的指标
配置文件
Pods
count(kube_pod_info{node=~"$node"})
kube_pod_info
kube-state-metrics
Pod Request Memory
sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"})by(node)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Pod Request CPU Cores
sum(kube_pod_container_resource_requests_cpu_cores{node=~"$node"})by(node)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", node=~"$node", container!="POD", container!=""}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", node=~"$node"}) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node", container!="", container!="POD"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
Memory Quota
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~"$node"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", node=~"$node"}) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster="$cluster", node=~"$node",container!="", container!="POD"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~"$node"}) by (pod)
node_namespace_pod_container:container_memory_working_set_bytes
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Pod List
group (kube_pod_info{host_ip="$node"})by(created_by_kind, created_by_name,host_network,pod_ip,pod,priority_class,namespace)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(kube_pod_status_phase{}==1) by (pod, phase)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(container_memory_working_set_bytes) by (pod)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() sum(rate(container_cpu_usage_seconds_total{image!=""}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(time()-kube_pod_start_time) by (pod)
kube_pod_info
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) max(kube_pod_status_ready{condition="true"}) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
kube_pod_status_ready
kube-state-metrics
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{image!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_fs_reads_bytes_total
cadvisor
min(kube_pod_info{host_ip="$node"})by(pod) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{container!="POD", container!=""}[5m])) by (pod) or on() vector(0)
kube_pod_info
kube-state-metrics
container_fs_writes_bytes_total
cadvisor

工作负载监控概览

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.cpu"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.cpu"})
kube_resourcequota
kube-state-metrics
CPU Quota
count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="requests.memory"})
kube_resourcequota
kube-state-metrics
scalar(kube_resourcequota{cluster="$cluster", namespace="$namespace", type="hard",resource="limits.memory"})
kube_resourcequota
kube-state-metrics
Memory Quota
count(namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"}) by (workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标 Deployment

Deployment

图表名称
查询语句
使用的指标
配置文件
Age
time() - max(kube_deployment_created{cluster="$cluster",namespace="$namespace",deployment="$workload"})
kube_deployment_created
kube-state-metrics
Replicas(Pods)-Request
max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"})
kube_deployment_spec_replicas
kube-state-metrics
Replicas(Pods)-Ready
max(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"})
kube_deployment_status_replicas_ready
kube-state-metrics
Replica Trend
max(kube_deployment_spec_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_spec_replicas
kube-state-metrics
max(kube_deployment_status_replicas{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas
kube-state-metrics
min(kube_deployment_status_replicas_ready{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_ready
kube-state-metrics
min(kube_deployment_status_replicas_available{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_available
kube-state-metrics
min(kube_deployment_status_replicas_updated{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_updated
kube-state-metrics
min(kube_deployment_status_replicas_unavailable{deployment="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_deployment_status_replicas_unavailable
kube-state-metrics
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Limit-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Info
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod, container))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Limit (%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Request(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU User Time(%)
avg(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container))) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
container_cpu_usage_seconds_total
cadvisor
container_cpu_usage_seconds_total
cadvisor
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="deployment"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Limit-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_spec_memory_limit_bytes
cadvisor
Memory Request-Total
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Info
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by(container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Request(%)
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Sockets
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_sockets
cadvisor
Network In
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_bytes_total
cadvisor
Network Out
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
Network Errors
sum(label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_errors_total
cadvisor
container_network_transmit_errors_total
cadvisor
Network IO
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_receive_bytes_total
cadvisor
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
File System Read
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_reads_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_fs_reads_bytes_total
cadvisor
File System Read
label_replace( max(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="ReplicaSet", pod_ip!=""}) by (created_by_name, uid, pod, pod_ip, node), "replicaset", "$1", "created_by_name", "(.+)" ) * on(replicaset) group_left() max(kube_replicaset_owner{cluster="$cluster",namespace="$namespace",owner_kind="Deployment",owner_name="$workload"}) by (replicaset) * on(pod) group_right() max(rate(container_fs_writes_bytes_total{cluster="$cluster",namespace="$namespace", container!="POD", container!=""}[5m])) by (pod,container)
kube_pod_info
kube-state-metrics
kube_replicaset_owner
kube-state-metrics
container_fs_writes_bytes_total
cadvisor

StatefulSet

图表名称
查询语句
使用的指标
配置文件
Generation
max(kube_statefulset_metadata_generation{cluster="$cluster",namespace="$namespace", statefulset="$workload"})
kube_statefulset_metadata_generation
kube-state-metrics
Replicas(Pods)-Request
max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
kube_statefulset_replicas
kube-state-metrics
Replicas(Pods)-Ready
max(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"})
kube_statefulset_status_replicas_ready
kube-state-metrics
Age
time() - max(kube_statefulset_created{cluster="$cluster",namespace="$namespace",statefulset="$workload"})
kube_statefulset_created
kube-state-metrics
Replica Trend
max(kube_statefulset_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_replicas
kube-state-metrics
max(kube_statefulset_status_replicas{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas
kube-state-metrics
min(kube_statefulset_status_replicas_ready{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_ready
kube-state-metrics
min(kube_statefulset_status_replicas_available{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_available
kube-state-metrics
min(kube_statefulset_status_replicas_updated{statefulset="$workload",cluster="$cluster",namespace="$namespace"}) without (instance, pod)
kube_statefulset_status_replicas_updated
kube-state-metrics
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Limit-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Request-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Info
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image)
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace"}) by (pod, container,image)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU Usage/Limit (%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_limits_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
CPU Usage/Request(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_cpu_usage_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container) / max by(container, pod) (kube_pod_container_resource_requests_cpu_cores{resource="cpu", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_cpu_usage_seconds_total
cadvisor
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
CPU User Time(%)
avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod, container,image) / max(rate(container_cpu_user_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])+rate(container_cpu_system_seconds_total{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}[5m])) by (pod,container,image))) by (pod,container,image)
kube_pod_info
kube-state-metrics
container_cpu_user_seconds_total
cadvisor
container_cpu_user_seconds_total
cadvisor
container_cpu_system_seconds_total
cadvisor
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod) /sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="statefulset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Limit-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_spec_memory_limit_bytes{cluster="$cluster",namespace="$namespace",container!="",container!="POD"}) by (pod))
kube_pod_info
kube-state-metrics
container_spec_memory_limit_bytes
cadvisor
Memory Request-Total
sum(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"}) by (pod))
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Memory Info
avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"}))by (container, pod, image)
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod, image) (kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max(avg(group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{cluster="$cluster",namespace="$namespace"}))by (container, pod))by(container)
kube_pod_info
kube-state-metrics
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Limit(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_limits_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Memory Usage/Request(%)
group(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max by(container, pod) (container_memory_working_set_bytes{cluster="$cluster",namespace="$namespace", container!="", image!="", container!="POD"})/max by(container, pod) (kube_pod_container_resource_requests_memory_bytes{resource="memory", cluster="$cluster",namespace="$namespace"})
kube_pod_info
kube-state-metrics
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Sockets
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(container_sockets{cluster="$cluster",namespace="$namespace",container!=""}) by (pod))
kube_pod_info
kube-state-metrics
container_sockets
cadvisor
Network In
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
Network Out
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() sum(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod))
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor
Network Errors
sum(sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() (sum(container_network_receive_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod) + sum(container_network_transmit_errors_total{cluster="$cluster",namespace="$namespace"}) by (pod)))
kube_pod_info
kube-state-metrics
container_network_receive_errors_total
cadvisor
container_network_transmit_errors_total
cadvisor
Network IO
sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_receive_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_network_receive_bytes_total
cadvisor
-sum(kube_pod_info{cluster="$cluster",namespace="$namespace",created_by_kind="StatefulSet",pod_ip!="", created_by_name="$workload"}) by (pod) * on(pod) group_right() max(rate(container_network_transmit_bytes_total{cluster="$cluster",namespace="$namespace"}[5m])) by (pod)
kube_pod_info
kube-state-metrics
container_network_transmit_bytes_total
cadvisor

DaemonSet

图表名称
查询语句
使用的指标
配置文件
CPU Usage
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
CPU Quota
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", container!="POD", container!=""} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum( kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Usage
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Memory Quota
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
amespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
container_memory_working_set_bytes
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
amespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sum( container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", container!="", container!="POD"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod) /sum(
kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster="$cluster", namespace="$namespace", workload="$workload", workload_type="daemonset"} ) by (pod)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
amespace_workload_pod:kube_pod_owner:relabel
预聚合指标
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

集群 Pod 监控

图表名称
查询语句
使用的指标
配置文件
Age
time() - max(kube_pod_created{pod=~"$pod",cluster="$cluster",namespace="$namespace"})
kube_pod_created
kube-state-metrics
Restart Count-Last 1 Hour
ceil(sum (increase(kube_pod_container_status_restarts_total{pod=~"$pod",cluster="$cluster",namespace="$namespace"}[1h])))
kube_pod_container_status_restarts_total
kube-state-metrics
Requests-CPU
sum(kube_pod_container_resource_requests_cpu_cores{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
Requests-Memory
sum(kube_pod_container_resource_requests_memory_bytes{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
Limits-CPU
sum(kube_pod_container_resource_limits_cpu_cores{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Limits-Memory
sum(kube_pod_container_resource_limits_memory_bytes{pod=~"$pod"}) or vector(0)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Containers
group by (image, container,pod) (kube_pod_container_info{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_info
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
max by (container,pod)(kube_pod_container_status_running{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_status_running
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_limits{resource="cpu",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_limits
kube-state-metrics
sum by (container,pod)(kube_pod_container_resource_limits{resource="memory",cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_resource_limits
kube-state-metrics
max by (container,pod)(kube_pod_container_status_restarts_total{cluster="$cluster",namespace="$namespace", pod=~"$pod"})
kube_pod_container_status_restarts_total
kube-state-metrics
CPU Usage (%)
max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (container,namespace,pod) / max(container_spec_cpu_quota{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}/100000) by (container,namespace,pod) or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
container_spec_cpu_quota
cadvisor
CPU Usage By Cores
max(irate(container_cpu_usage_seconds_total{pod=~"$pod",container!="",container!="POD",cluster="$cluster",namespace=~"$namespace"}[1m])) by (pod,container,namespace)or on() vector(0)
container_cpu_usage_seconds_total
cadvisor
CPU Load (10s)
max(container_cpu_load_average_10s{namespace=~"$namespace", pod=~"$pod", container!="", container!="POD"} / 1000)by(pod,container)
container_cpu_load_average_10s
cadvisor
CPU Throttled Percent
max (rate (container_cpu_cfs_throttled_seconds_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) / max (rate (container_cpu_cfs_periods_total{image!="", container!="", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (container,pod) or on() vector(0)
container_cpu_cfs_throttled_seconds_total
cadvisor
container_cpu_cfs_periods_total
cadvisor
CPU Quota
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_requests_cpu_cores
kube-state-metrics
sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
预聚合指标
kube_pod_container_resource_limits_cpu_cores
kube-state-metrics
Memory Usage (WSS)
max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_working_set_bytes
cadvisor
Memory Usage
max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_usage_bytes
cadvisor
Memory Usage (RSS)
max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container) or on() vector(0)
container_memory_rss
cadvisor
Memory Cache
max(container_memory_cache{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)
container_memory_cache
cadvisor
Usage WSS/Limit (%)
(max(container_memory_working_set_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_working_set_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Usage/Limit (%)
(max(container_memory_usage_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ max(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_usage_bytes
cadvisor
container_spec_memory_limit_bytes
cadvisor
Usage RSS/Limit (%)
(max(container_memory_rss{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace,container)/ sum(container_spec_memory_limit_bytes{pod=~"$pod",container !="",container!="POD",cluster="$cluster",namespace=~"$namespace"}) by (pod,namespace, container) * 100) <= 100 or on() vector(0)
container_memory_rss
cadvisor
container_spec_memory_limit_bytes
cadvisor
Memory Failcnt
max (increase(container_memory_failcnt{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (pod,container)
container_memory_failcnt
cadvisor
Memory Quota
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="POD", container!=""}) by (container)
container_memory_working_set_bytes
cadvisor
sum(kube_pod_container_resource_requests_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_requests_memory_bytes
kube-state-metrics
sum(kube_pod_container_resource_limits_memory_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
sum(container_memory_working_set_bytes{cluster="$cluster", namespace="$namespace", pod="$pod", container!="", container!="POD"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace="$namespace", pod="$pod"}) by (container)
container_memory_working_set_bytes
cadvisor
kube_pod_container_resource_limits_memory_bytes
kube-state-metrics
Network Input
max (rate (container_network_receive_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m])) by(pod)
container_network_receive_bytes_total
cadvisor
Network Output
max (rate (container_network_transmit_bytes_total{image!="",cluster="$cluster",namespace=~"$namespace", pod_name=~"$pod"}[1m]))by(pod)
container_network_transmit_bytes_total
cadvisor
Network Input Error (%)
max (increase (container_network_receive_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_receive_packets_dropped_total
cadvisor
container_network_receive_packets_total
cadvisor
max (increase (container_network_receive_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_receive_errors_total
cadvisor
container_network_receive_packets_total
cadvisor
Network Output Error (%)
max (increase (container_network_transmit_packets_dropped_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_transmit_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_transmit_packets_dropped_total
cadvisor
container_network_transmit_packets_total
cadvisor
max (increase (container_network_transmit_errors_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface) / max (increase (container_network_receive_packets_total{id!="/", cluster="$cluster",namespace=~"$namespace", pod=~"$pod"}[1m])) by (pod,interface)
container_network_transmit_errors_total
cadvisor
container_network_receive_packets_total
cadvisor
File System Read
max (rate(container_fs_reads_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m]))by (container,pod)
container_fs_reads_bytes_total
cadvisor
File System Write
max (rate(container_fs_writes_bytes_total{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}[1m])) by (container,pod)
container_fs_writes_bytes_total
cadvisor
Network Socket
max(container_sockets{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
container_sockets
cadvisor
Process Number
count(container_processes{cluster="$cluster",namespace=~"$namespace", pod=~"$pod", container!=""}) by (container,pod)
container_processes
cadvisor

集群网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Current Status
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_dropped_total
cadvisor
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~".+"}[5m])) by (namespace))
container_network_transmit_packets_dropped_total
cadvisor
Rate of TCP Retransmits out of all sent segments
sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[5m]) / rate(node_netstat_Tcp_OutSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
node_netstat_Tcp_RetransSegs
node-exporter
Rate of TCP SYN Retransmits out of all retransmits
sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=~"$cluster"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=~"$cluster"}[$interval:$resolution])) by (instance))
node_netstat_TcpExt_TCPSynRetrans
node-exporter
node_netstat_Tcp_RetransSegs
node-exporter

命名空间 Pods 网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sum(irate(container_network_transmit_bytes_total{namespace=~"$namespace"}[5m]))
container_network_transmit_bytes_total
cadvisor
Current Status
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor
Receive Bandwidth
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster", namespace=~"$namespace"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor

命名空间工作负载网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Current Status
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_receive_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~".+", workload_type="$type"}) by (workload))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

Pod 网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
container_network_receive_bytes_total
cadvisor
Current Rate of Bytes Transmitted
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m]))
container_network_transmit_bytes_total
cadvisor
Receive Bandwidth
sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_bytes_total
cadvisor
Transmit Bandwidth
sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_bytes_total
cadvisor
Rate of Received Packets
sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_packets_total
cadvisor
Rate of Transmitted Packets
sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_packets_total
cadvisor
Rate of Received Packets Dropped
sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_receive_packets_dropped_total
cadvisor
Rate of Transmitted Packets Dropped
sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace", pod=~"$pod"}[5m])) by (pod)
container_network_transmit_packets_dropped_total
cadvisor

工作负载网络监控

图表名称
查询语句
使用的指标
配置文件
Current Rate of Bytes Received
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
cadvisor
Current Rate of Bytes Transmitted
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Received
sort_desc(avg(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Average Rate of Bytes Transmitted
sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Receive Bandwidth
sort_desc(sum(irate(container_network_receive_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Transmit Bandwidth
sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_bytes_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标


Rate of Received Packets
sort_desc(sum(irate(container_network_receive_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Transmitted Packets
sort_desc(sum(irate(container_network_transmit_packets_total{cluster=~"$cluster",namespace=~"$namespace"}[5m]) * on (namespace,pod) group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_packets_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标
Rate of Received Packets Dropped
sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_receive_packets_dropped_total
预聚合指标
namespace_workload_pod:kube_pod_owner:relabel
cadvisor
Rate of Transmitted Packets Dropped
sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=~"$cluster",namespace=~"$namespace"}[5m])
* on (namespace,pod)
group_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=~"$cluster",namespace=~"$namespace", workload=~"$workload", workload_type="$type"}) by (pod))
container_network_transmit_packets_dropped_total
cadvisor
namespace_workload_pod:kube_pod_owner:relabel
预聚合指标

PVC 存储监控

图表名称
查询语句
使用的指标
配置文件
Volume Space Usage
( sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
kubelet_volume_stats_capacity_bytes
kubelet
kubelet_volume_stats_available_bytes
kubelet
sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
kubelet_volume_stats_available_bytes
kubelet
Volume Space Usage
( kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} - kubelet_volume_stats_available_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} ) / kubelet_volume_stats_capacity_bytes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
kubelet_volume_stats_capacity_bytes
kubelet
kubelet_volume_stats_available_bytes
kubelet
kubelet_volume_stats_capacity_bytes
kubelet
Volume inodes Usage
sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"})
kubelet_volume_stats_inodes_used
kubelet
( sum without(instance, node) (kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) - sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"}) )
kubelet_volume_stats_inodes
kubelet
kubelet_volume_stats_inodes_used
kubelet
Volume inodes Usage
kubelet_volume_stats_inodes_used{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} / kubelet_volume_stats_inodes{cluster="$cluster", job="kubelet", namespace="$namespace", persistentvolumeclaim="$volume"} * 100
kubelet_volume_stats_inodes_used
kubelet
kubelet_volume_stats_inodes
kubelet