公司有一台Mac笔记本跑自动化脚本,但是机器部署了很多服务,导致内存和CPU可用比较低,有一个jenkins的agent经常被kill掉.
所以想监控一下这台电脑的性能情况,做一些调整.因为平时是使用这台电脑基本上是用ssh登录,而且想实时关注性能数据.
大概能想到两个方案, 1、获取当前机器的内存和CPU数据,定时发送消息 2、搭建一个简单的性能监控平台
#!/usr/bin/python
import re
import requests
import subprocess
def get_per():
# Get process info
ps = subprocess.Popen(['ps', '-caxm', '-orss,comm'], stdout=subprocess.PIPE).communicate()[0].decode()
vm = subprocess.Popen(['vm_stat'], stdout=subprocess.PIPE).communicate()[0].decode()
# Iterate processes
processLines = ps.split('\n')
sep = re.compile('[\s]+')
rssTotal = 0 # kB
for row in range(1,len(processLines)):
rowText = processLines[row].strip()
rowElements = sep.split(rowText)
try:
rss = float(rowElements[0]) * 1024
except:
rss = 0 # ignore...
rssTotal += rss
# Process vm_stat
vmLines = vm.split('\n')
sep = re.compile(':[\s]+')
vmStats = {}
for row in range(1,len(vmLines)-2):
rowText = vmLines[row].strip()
rowElements = sep.split(rowText)
vmStats[(rowElements[0])] = int(rowElements[1].strip('\.')) * 4096
print('Wired Memory:\t\t%d MB' % ( vmStats["Pages wired down"]/1024/1024 ))
print('Active Memory:\t\t%d MB' % ( vmStats["Pages active"]/1024/1024 ))
print('Inactive Memory:\t%d MB' % ( vmStats["Pages inactive"]/1024/1024 ))
print('Free Memory:\t\t%d MB' % ( vmStats["Pages free"]/1024/1024 ))
print('Real Mem Total (ps):\t%.3f MB' % ( rssTotal/1024/1024 ))
return 'Wired Memory:\t\t%d MB' % ( vmStats["Pages wired down"]/1024/1024 ) + '\n' + \
'Active Memory:\t\t%d MB' % ( vmStats["Pages active"]/1024/1024 ) + '\n' + \
'Inactive Memory:\t%d MB' % (vmStats["Pages inactive"] / 1024 / 1024) + '\n' + \
'Free Memory:\t\t%d MB' % (vmStats["Pages free"] / 1024 / 1024) + '\n' + \
'Real Mem Total (ps):\t%.3f MB' % (rssTotal / 1024 / 1024)
def get_jenkins_java_status():
"""
:return:
"""
is_exits_master = False
is_exits_agent = False
result = subprocess.Popen(['ps', '-ef'], stdout=subprocess.PIPE).communicate()[0].decode()
efLines = result.split('\n')
for row in range(1, len(efLines) - 2):
rowText = efLines[row].strip()
if 'jenkins.war' in str(rowText):
is_exits_master = True
if 'agent.jar' in str(rowText):
is_exits_agent = True
if is_exits_master == False:
message_params = {}
message_params['content'] = 'Mac M1电脑的jenkins Master节点下线'
send_message_to_group(message_params)
if is_exits_agent == False:
message_params = {}
message_params['content'] = 'Mac M1电脑的jenkins Agent节点下线'
send_message_to_group(message_params)
message_params = {}
message_params['content'] = 'Mac M1电脑的性能: \n{}'.format(get_per())
send_message_to_group(message_params)
def send_message_to_group(params):
"""
发送企业微信机器人消息
:param platform:
:param app_version:
:param tag:
:param env:
:param app_path:
:return:
"""
try:
webhook_api = "xxxxxxxx"
data = {
"msgtype": "markdown",
"markdown": {
"content": params['content']
}
}
r = requests.post(webhook_api, json=data, verify=False)
print(r.json())
print('send to message')
except Exception as e:
print(e)
get_jenkins_java_status()
上面这个脚本放到crontab的定时任务下即可.
架构: prometheus + grafana + node-exporter
主要用户存储数据
docker pull prom/prometheus
prometheus配置
global:
scrape_interval: 60s
evaluation_interval: 60s
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
labels:
instance: prometheu
docker run -d \
-p 9090:9090 \
-v /Users/xinxi/Documents/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
prom/prometheus
前端数据展示
docker run -d -p 3000:3000 grafana/grafana
采集机器的性能数据
docker run -d -p 9100:9100 \
-v "/proc:/host/proc:ro" \
-v "/sys:/host/sys:ro" \
-v "/:/rootfs:ro" \
prom/node-exporter