kops create cluster \
--node-count 6 \
--zones us-east-2a,us-east-2b,us-east-2c \
--master-zones us-east-2a,us-east-2b,us-east-2c \
--node-size t2.micro \
--topology private \
--networking kopeio-vxlan \
--api-loadbalancer-type=public \
--bastion \
tornado.quicknuke.com
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
component: core
annotations:
prometheus.io/scrape: 'true'
external-dns.alpha.kubernetes.io/hostname: prometheus.quicknuke.com.
spec:
type: LoadBalancer
ports:
- port: 9090
targetPort: webui
name: webui
selector:
app: prometheus
component: core
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus-core
namespace: monitoring
labels:
app: prometheus
component: core
spec:
replicas: 1
template:
metadata:
name: prometheus-main
labels:
app: prometheus
component: core
spec:
containers:
- name: prometheus
image: prom/prometheus:v2.1.0
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus/"
- "--web.enable-lifecycle"
ports:
- containerPort: 9090
name: webui
volumeMounts:
- name: prometheus-config-volume
mountPath: /etc/prometheus
- name: prometheus-rules-volume
mountPath: /var/prometheus/rules
- name: prometheus-storage-volume
mountPath: /prometheus/
- name: watch
image: weaveworks/watch:master-5b2a6e5
imagePullPolicy: IfNotPresent
args: ["-v", "-t", "-p=/etc/prometheus", "-p=/var/prometheus", "curl", "-X", "POST", "--fail", "-o", "-", "-sS", "http://localhost:9090/-/reload"]
volumeMounts:
- name: prometheus-config-volume
mountPath: /etc/prometheus
- name: prometheus-rules-volume
mountPath: /var/prometheus/rules
volumes:
- name: prometheus-config-volume
configMap:
defaultMode: 420
name: prometheus-server-conf
- name: prometheus-rules-volume
configMap:
name: prometheus-rules-conf
- name: prometheus-storage-volume
emptyDir: {}
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server-conf
labels:
name: prometheus-server-conf
namespace: monitoring
data:
prometheus.yml: |-
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: alertmanager
action: keep
- source_labels: [__meta_kubernetes_namespace]
regex: monitoring
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
action: keep
regex: cluster
rule_files:
- "/var/prometheus/rules/*_rules.yml"
- "/var/prometheus/rules/*_alerts.yml"
scrape_configs:
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiservers'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
apiVersion: v1
kind: Service
metadata:
name: alertmanager-webui
namespace: monitoring
labels:
app: alertmanager
component: core
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9093"
external-dns.alpha.kubernetes.io/hostname: alertmanager.quicknuke.com.
spec:
type: LoadBalancer
ports:
- port: 9093
name: metrics
selector:
app: alertmanager
component: core
---
apiVersion: v1
kind: Service
metadata:
name: alertmanager
namespace: monitoring
labels:
app: alertmanager
component: core
spec:
ports:
- port: 9093
name: cluster
type: ClusterIP
clusterIP: None
selector:
app: alertmanager
component: core
---
apiVersion: apps/v1beta2
kind: StatefulSet
metadata:
name: alertmanager
namespace: monitoring
labels:
app: alertmanager
component: core
spec:
updateStrategy:
type: RollingUpdate
replicas: 3
selector:
matchLabels:
app: alertmanager
component: core
serviceName: alertmanager
template:
metadata:
labels:
app: alertmanager
component: core
spec:
containers:
- name: alertmanager
image: quay.io/prometheus/alertmanager:master
imagePullPolicy: IfNotPresent
command:
- "sh"
- "-c"
args:
- /bin/alertmanager
--config.file=/etc/alertmanager/config.yml
--web.listen-address=0.0.0.0:9093
--cluster.listen-address=0.0.0.0:8001
--storage.path=/alertmanager
--cluster.peer="alertmanager-0.alertmanager.monitoring.svc:8001"
--cluster.peer="alertmanager-1.alertmanager.monitoring.svc:8001"
--cluster.peer="alertmanager-2.alertmanager.monitoring.svc:8001"
--log.level=debug
ports:
- containerPort: 9093
name: web
protocol: TCP
- containerPort: 8001
name: cluster
protocol: TCP
livenessProbe:
httpGet:
path: /api/v1/status
port: web
scheme: HTTP
failureThreshold: 10
readinessProbe:
failureThreshold: 10
httpGet:
path: /api/v1/status
port: web
scheme: HTTP
initialDelaySeconds: 3
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
volumeMounts:
- name: alertmanager-config-volume
mountPath: /etc/alertmanager/
- name: alertmanager-data-volume
mountPath: /alertmanager/
volumes:
- name: alertmanager-config-volume
configMap:
name: alertmanager-server-conf
- name: alertmanager-data-volume
emptyDir: {}
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
#
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for nodes (kubelet).
#
# Rather than connecting directly to the node, the scrape is proxied though the
# Kubernetes apiserver. This means it will work if Prometheus is running out of
# cluster, or can't connect to nodes for some other reason (e.g. because of
# firewalling).
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# Example scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some endpoints.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# Example relabel to scrape only endpoints that have
# "example.io/should_be_scraped = true" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_scraped]
# action: keep
# regex: true
#
# Example relabel to customize metric path based on endpoints
# "example.io/metric_path = <metric path>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_metric_path]
# action: replace
# target_label: __metrics_path__
# regex: (.+)
#
# Example relabel to scrape only single, desired port for the service based
# on endpoints "example.io/scrape_port = <port>" annotation.
# - source_labels: [__address__, __meta_kubernetes_service_annotation_example_io_scrape_port]
# action: replace
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
#
# Example relabel to configure scrape scheme for all service scrape targets
# based on endpoints "example.io/scrape_scheme = <scheme>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_scrape_scheme]
# action: replace
# target_label: __scheme__
# regex: (https?)
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
# Example relabel to probe only some services that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
# Example scrape config for probing ingresses via the Blackbox Exporter.
#
# The relabeling allows the actual ingress scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-ingresses'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: ingress
relabel_configs:
# Example relabel to probe only some ingresses that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_ingress_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape to be configured
# for all the declared ports (or port-free target if none is declared)
# or only some ports.
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Example relabel to scrape only pods that have
# "example.io/should_be_scraped = true" annotation.
# - source_labels: [__meta_kubernetes_pod_annotation_example_io_should_be_scraped]
# action: keep
# regex: true
#
# Example relabel to customize metric path based on pod
# "example.io/metric_path = <metric path>" annotation.
# - source_labels: [__meta_kubernetes_pod_annotation_example_io_metric_path]
# action: replace
# target_label: __metrics_path__
# regex: (.+)
#
# Example relabel to scrape only single, desired port for the pod
# based on pod "example.io/scrape_port = <port>" annotation.
# Note that __address__ is modified here, so if pod containers' ports
# are declared, they all will be ignored.
# - source_labels: [__address__, __meta_kubernetes_pod_annotation_example_io_scrape_port]
# action: replace
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
labels:
app: node-exporter
name: node-exporter
namespace: monitoring
spec:
clusterIP: None
ports:
- name: scrape
port: 9100
protocol: TCP
selector:
app: node-exporter
type: ClusterIP
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
spec:
template:
metadata:
labels:
app: node-exporter
name: node-exporter
spec:
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostNetwork: true
hostPID: true
hostIPC: true
securityContext:
runAsUser: 0
containers:
- image: prom/node-exporter:latest
name: node-exporter
volumeMounts:
- mountPath: /run/systemd/private
name: systemd-socket
readOnly: true
args:
- "--collector.systemd"
- "--collector.systemd.unit-whitelist=(docker|ssh|rsyslog|kubelet).service"
ports:
- containerPort: 9100
hostPort: 9100
name: scrape
livenessProbe:
httpGet:
path: /metrics
port: 9100
initialDelaySeconds: 30
timeoutSeconds: 10
periodSeconds: 1
readinessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: 9100
initialDelaySeconds: 10
timeoutSeconds: 10
periodSeconds: 2
volumes:
- hostPath:
path: /run/systemd/private
name: systemd-socket
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
spec:
template:
metadata:
labels:
app: node-exporter
name: node-exporter
spec:
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostNetwork: true
hostPID: true
hostIPC: true
securityContext:
runAsUser: 0
……………………
………………
containers:
- image: prom/node-exporter:latest
name: node-exporter
volumeMounts:
- mountPath: /run/systemd/private
name: systemd-socket
readOnly: true
args:
- "--collector.systemd"
- "--collector.systemd.unit-whitelist=(docker|ssh|rsyslog|kubelet).service"
ports:
- containerPort: 9100
hostPort: 9100
name: scrape
……………………
prom/node_exporter
(https://hub.docker.com/r/prom/node-exporter),并获取最新版本。我们还为实例上的目录`/run/systemd/private`挂载了一个卷,这允许Node Exporter访问systemd并在实例上收集systemd管理服务的服务状态……………………
livenessProbe:
httpGet:
path: /metrics
port: 9100
initialDelaySeconds: 30
timeoutSeconds: 10
periodSeconds: 1
readinessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: 9100
initialDelaySeconds: 10
timeoutSeconds: 10
periodSeconds: 2
………………
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
labels:
app: node-exporter
name: node-exporter
namespace: monitoring
spec:
clusterIP: None
ports:
- name: scrape
port: 9100
protocol: TCP
selector:
app: node-exporter
type: ClusterIP
prometheus.io/scrape:'true'
(https://kubernetes.io/zh/docs/concepts/overview/working-with-objects/annotations/),作为服务的元数据,它将告诉Prometheus应该抓取这个服务kubectl create -f ./node-exporter.yml -n monitoring
daemonset "node-exporter" created
service "node-exporter" created
kubectl config set-context $(kubectl config current-context) --namespace=monitoring
prometheus.io/scrape
(设置为true)的端点。然后,我们使用内置的Kubernetes服务发现来查找端点,并将它们作为Prometheus的潜在目标返回# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
#
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for nodes (kubelet).
#
# Rather than connecting directly to the node, the scrape is proxied though the
# Kubernetes apiserver. This means it will work if Prometheus is running out of
# cluster, or can't connect to nodes for some other reason (e.g. because of
# firewalling).
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# Example scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some endpoints.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# Example relabel to scrape only endpoints that have
# "example.io/should_be_scraped = true" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_scraped]
# action: keep
# regex: true
#
# Example relabel to customize metric path based on endpoints
# "example.io/metric_path = <metric path>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_metric_path]
# action: replace
# target_label: __metrics_path__
# regex: (.+)
#
# Example relabel to scrape only single, desired port for the service based
# on endpoints "example.io/scrape_port = <port>" annotation.
# - source_labels: [__address__, __meta_kubernetes_service_annotation_example_io_scrape_port]
# action: replace
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
#
# Example relabel to configure scrape scheme for all service scrape targets
# based on endpoints "example.io/scrape_scheme = <scheme>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_scrape_scheme]
# action: replace
# target_label: __scheme__
# regex: (https?)
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
# Example relabel to probe only some services that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
# Example scrape config for probing ingresses via the Blackbox Exporter.
#
# The relabeling allows the actual ingress scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-ingresses'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: ingress
relabel_configs:
# Example relabel to probe only some ingresses that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_ingress_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape to be configured
# for all the declared ports (or port-free target if none is declared)
# or only some ports.
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Example relabel to scrape only pods that have
# "example.io/should_be_scraped = true" annotation.
# - source_labels: [__meta_kubernetes_pod_annotation_example_io_should_be_scraped]
# action: keep
# regex: true
#
# Example relabel to customize metric path based on pod
# "example.io/metric_path = <metric path>" annotation.
# - source_labels: [__meta_kubernetes_pod_annotation_example_io_metric_path]
# action: replace
# target_label: __metrics_path__
# regex: (.+)
#
# Example relabel to scrape only single, desired port for the pod
# based on pod "example.io/scrape_port = <port>" annotation.
# Note that __address__ is modified here, so if pod containers' ports
# are declared, they all will be ignored.
# - source_labels: [__address__, __meta_kubernetes_pod_annotation_example_io_scrape_port]
# action: replace
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
endpoints
The endpoints role discovers targets from listed endpoints of a service. For each endpoint address one target is discovered per port. If the endpoint is backed by a pod, all additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well.
Available meta labels:
__meta_kubernetes_namespace: The namespace of the endpoints object.
__meta_kubernetes_endpoints_name: The names of the endpoints object.
For all targets discovered directly from the endpoints list (those not additionally inferred from underlying pods), the following labels are attached:
__meta_kubernetes_endpoint_hostname: Hostname of the endpoint.
__meta_kubernetes_endpoint_node_name: Name of the node hosting the endpoint.
__meta_kubernetes_endpoint_ready: Set to true or false for the endpoint's ready state.
__meta_kubernetes_endpoint_port_name: Name of the endpoint port.
__meta_kubernetes_endpoint_port_protocol: Protocol of the endpoint port.
__meta_kubernetes_endpoint_address_target_kind: Kind of the endpoint address target.
__meta_kubernetes_endpoint_address_target_name: Name of the endpoint address target.
If the endpoints belong to a service, all labels of the role: service discovery are attached.
For all targets backed by a pod, all labels of the role: pod discovery are attached.
prometheus.io/scrape:'true'
。在服务发现过程中prometheus.io/scrape
注解会被转换为prometheus_io_scrape,以创建一个有效的标签名称,这是因为在Prometheus指标标签中点和斜杠不是合法字符。由于这是Kubernetes服务的注解,因此Prometheus服务进程还会在标签中添加__meta_kubernetes_service_annotation_前缀
__meta_kubernetes_service_annotation_(设置为true)
。所有其他目标都会被丢弃,这使得你只抓取所需的端点prometheus.io/scheme
、prometheus.io/path
和prometheus.io/port
。如果这些标签存在,那么,它将使用这些注解的内容作为要抓取的scheme、path和port。这使我们能够从服务端点精确控制要抓取的内容,进而使作业变得更加灵活__meta_kubernetes_service_label_app
元数据标签映射为一个简单的app标签。下一个规则将__meta_kubernetes_namespace
标签复制到kubernetes_namespace
,将__meta_kubernetes_service_name
元数据标签复制到kubernetes_name
kubectl replace -f ./prom-config-map-v1.yml -n monitoring
- alert: KubernetesServiceDown
expr: up{job="kubernetes-service-endpoints"} == 0
for: 10m
labels:
severity: critical
annotations:
summary: Pod {{ $labels.instance }} is down!
- alert: KubernetesServicesGone
expr: absent(up{job="kubernetes-service-endpoints"})
for: 10m
labels:
severity: critical
annotations:
summary: No Kubernetes services are reporting!
description: Werner Heisenberg says - OMG Where are my servicez?
- alert: CriticalServiceDown
expr: node_systemd_unit_state{state="active"} != 1
for: 2m
labels:
severity: critical
annotations:
summary: Service {{ $labels.name }} failed to start.
description: Service {{ $labels.instance }} failed to (re)start service {{ $labels.name }}.
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app: kube-state-metrics
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: metrics
port: 8080
targetPort: metrics
protocol: TCP
selector:
app: kube-state-metrics
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app: kube-state-metrics
spec:
replicas: 1
template:
metadata:
name: kube-state-metrics-main
labels:
app: kube-state-metrics
spec:
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:latest
ports:
- containerPort: 8080
name: metrics
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server-conf
labels:
name: prometheus-server-conf
namespace: monitoring
data:
prometheus.yml: |-
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: alertmanager
action: keep
- source_labels: [__meta_kubernetes_namespace]
regex: monitoring
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
action: keep
regex: cluster
rule_files:
- "/var/prometheus/rules/*_rules.yml"
- "/var/prometheus/rules/*_alerts.yml"
scrape_configs:
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiservers'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- alert: TornadoRedisCacheMissesHigh
expr: redis_keyspace_hits_total / (redis_keyspace_hits_total + redis_keyspace_misses_total) > 0.8
for: 10m
labels:
severity: warning
annotations:
summary: Redis Server {{ $labels.instance }} Cache Misses are high.
- alert: DeploymentReplicasNotUpdated
expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
unless (kube_deployment_spec_paused == 1)
for: 5m
labels:
severity: warning
annotations:
description: Replicas are not updated and available for deployment {{ $labels.namespace }}/{{ $labels.deployment }}
summary: Deployment replicas are outdated
- alert: PodzFrequentlyRestarting
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
for: 10m
labels:
severity: warning
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} was restarted {{ $value }} times within the last hour
summary: Pod is restarting frequently
increase
increase(v range-vector) calculates the increase in the time series in the range vector. Breaks in monotonicity (such as counter resets due to target restarts) are automatically adjusted for. The increase is extrapolated to cover the full time range as specified in the range vector selector, so that it is possible to get a non-integer result even if a counter increases only by integer increments. The following example expression returns the number of HTTP requests as measured over the last 5 minutes, per time series in the range vector:
increase(http_requests_total{job="api-server"}[5m])
increase
should only be used with counters. It is syntactic sugar forrate(v)
multiplied by the number of seconds under the specified time range window, and should be used primarily for human readability. Userate
in recording rules so that increases are tracked consistently on a per-second basis.
groups:
- name: kubernetes_alerts
rules:
- alert: DeploymentGenerationOff
expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation
for: 5m
labels:
severity: warning
annotations:
description: Deployment generation does not match expected generation {{ $labels.namespace }}/{{ $labels.deployment }}
summary: Deployment is outdated
- alert: DeploymentReplicasNotUpdated
expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
unless (kube_deployment_spec_paused == 1)
for: 5m
labels:
severity: warning
annotations:
description: Replicas are not updated and available for deployment {{ $labels.namespace }}/{{ $labels.deployment }}
summary: Deployment replicas are outdated
- alert: PodzFrequentlyRestarting
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
for: 10m
labels:
severity: warning
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} was restarted {{ $value }} times within the last hour
summary: Pod is restarting frequently
- alert: KubeNodeNotReady
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
for: 1h
labels:
severity: warning
annotations:
description: The Kubelet on {{ $labels.node }} has not checked in with the API,
or has set itself to NotReady, for more than an hour
summary: Node status is NotReady
- alert: KubeManyNodezNotReady
expr: count(kube_node_status_condition{condition="Ready",status="true"} == 0)
> 1 and (count(kube_node_status_condition{condition="Ready",status="true"} ==
0) / count(kube_node_status_condition{condition="Ready",status="true"})) > 0.2
for: 1m
labels:
severity: critical
annotations:
description: '{{ $value }}% of Kubernetes nodes are not ready'
- alert: APIHighLatency
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
labels:
severity: critical
annotations:
description: the API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}
- alert: APIServerErrorsHigh
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m]) * 100 > 5
for: 10m
labels:
severity: critical
annotations:
description: API server returns errors for {{ $value }}% of requests
- alert: KubernetesAPIServerDown
expr: up{job="kubernetes-apiservers"} == 0
for: 10m
labels:
severity: critical
annotations:
summary: Apiserver {{ $labels.instance }} is down!
- alert: KubernetesAPIServersGone
expr: absent(up{job="kubernetes-apiservers"})
for: 10m
labels:
severity: critical
annotations:
summary: No Kubernetes apiservers are reporting!
description: Werner Heisenberg says - OMG Where are my apiserverz?
- job_name: 'kubernetes-apiservers'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) / 1e+06
labels:
quantile: "0.99"
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) / 1e+06
labels:
quantile: "0.9"
- record: apiserver_latency_seconds:quantile
expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) / 1e+06
labels:
quantile: "0.5"
histogram_quantile(φ float, b instant-vector)
calculates the φ-quantile (0 ≤ φ ≤ 1) from the bucketsb
of a histogram. (See histograms and summaries for a detailed explanation of φ-quantiles and the usage of the histogram metric type in general.) The samples inb
are the counts of observations in each bucket. Each sample must have a labelle
where the label value denotes the inclusive upper bound of the bucket. (Samples without such a label are silently ignored.) The histogram metric type automatically provides time series with the_bucket
suffix and the appropriate labels. Use therate()
function to specify the time window for the quantile calculation. Example: A histogram metric is calledhttp_request_duration_seconds
. To calculate the 90th percentile of request durations over the last 10m, use the following expression:
histogram_quantile(0.9, rate(http_request_duration_seconds_bucket[10m]))
The quantile is calculated for each label combination in
http_request_duration_seconds
. To aggregate, use thesum()
aggregator around therate()
function. Since thele
label is required byhistogram_quantile()
, it has to be included in theby
clause. The following expression aggregates the 90th percentile byjob
:
histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[10m])) by (job, le))
To aggregate everything, specify only the
le
label:
histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[10m])) by (le))
The
histogram_quantile()
function interpolates quantile values by assuming a linear distribution within a bucket. The highest bucket must have an upper bound of+Inf
. (Otherwise,NaN
is returned.) If a quantile is located in the highest bucket, the upper bound of the second highest bucket is returned. A lower limit of the lowest bucket is assumed to be 0 if the upper bound of that bucket is greater than 0. In that case, the usual linear interpolation is applied within that bucket. Otherwise, the upper bound of the lowest bucket is returned for quantiles located in the lowest bucket. Ifb
contains fewer than two buckets,NaN
is returned. For φ < 0,-Inf
is returned. For φ > 1,+Inf
is returned.
- alert: APIHighLatency
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
labels:
severity: critical
annotations:
description: the API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}
- alert: APIServerErrorsHigh
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m]) * 100 > 5
for: 10m
labels:
severity: critical
annotations:
description: API server returns errors for {{ $value }}% of requests
- alert: KubernetesAPIServerDown
expr: up{job="kubernetes-apiservers"} == 0
for: 10m
labels:
severity: critical
annotations:
summary: Apiserver {{ $labels.instance }} is down!
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
__adress__
标签替换为Kubernetes API服务器的默认DNS名称。然后我们使用其中一个元数据标签,一个带有节点名称的标签,在API上创建一个新标签__metrics_path__
,它将节点名称传递给路径/api/v1/nodes/${1}/proxy/metrics/cadvisor
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server-conf
labels:
name: prometheus-server-conf
namespace: monitoring
data:
prometheus.yml: |-
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_name]
regex: alertmanager
action: keep
- source_labels: [__meta_kubernetes_namespace]
regex: monitoring
action: keep
- source_labels: [__meta_kubernetes_endpoint_port_name]
action: keep
regex: cluster
rule_files:
- "/var/prometheus/rules/*_rules.yml"
- "/var/prometheus/rules/*_alerts.yml"
scrape_configs:
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
insecure_skip_verify: true
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiservers'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https