作者:余淼
1.0 背景介绍
在实际生产环境(production environment)中, 很多系统为了安全考虑只有对应一些指定的堡垒服务器(bastion server)才能登录到虚拟机上做一些想要的操作,这点
虽然带来了很大的安全性,但是也给DevOps带来了很大的障碍。这篇文章讲一个很简单的但是很常见的一个场景:从上百台服务器上收集某些文件,比如messages
系统日志文件。
有几个前提问题:
2.0 解决方案
3.0 目前发现可以改善的功能
备注:由于赶进度,先解决用户的问题再说,所有脚本并不是很完善,甚至可以重构再优化。
4.0 python脚本和配置文件
4.1 collect_messages.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
NAME
collect_messages.py
DESCRIPTION
Collect messages log files for all dom0s and domUs inside a POD to /tmp directory
NOTES
This shall be executed on a Linux bastion server
Example of contents of HOST_LIST_FILE:
(copied from FleetManager)
POD Dom0 DomU Identifier
EEHO 111.usdc2.oraclecloud.com 111_1.usdc2.oraclecloud.com AUXVM_SCALE12
EEHO 222.usdc2.oraclecloud.com 222_4.usdc2.oraclecloud.com AUXVM_SCALE32
Usage: collect_messages.py [options]
Copy messages log files for a POD from a Linux bastion server to /tmp
directory
Options:
-h, --help show this help message and exit
-d, --debug Turn on debug information
-l HOST_LIST_FILE, --pod_dom_list=HOST_LIST_FILE
POD, Dom0 and DomU mappings
MODIFIED (MM/DD/YY)
Kevin Yu 07/04/19 - Initial version
"""
# pylint: disable=line-too-long,missing-docstring
import os
import sys
import optparse
import pexpect
import signal
import pprint
from subprocess import Popen, PIPE
G_CONF_PASSWORD_OPC = '****'
G_CONF_GUID = '***'
DEBUG = False
HOST_LIST_FN = ''
class DOCMD(object):
def __init__(self, command, timeout_sec=180):
from threading import Timer
self.command = command
p = Popen(self.command, stdin=PIPE, stdout=PIPE, \
stderr=PIPE, shell=True)
timer = Timer(timeout_sec, os.kill, args=[p.pid, signal.SIGKILL])
try:
timer.start()
self.out, self.err = p.communicate()
self.code = p.returncode
finally:
timer.cancel()
def out(self):
return self.out
def err(self):
return self.err
def code(self):
return self.code
def dprint(msg):
if DEBUG:
print(msg)
def parse_opts():
global DEBUG
global HOST_LIST_FN
""" Parse program options """
parser = optparse.OptionParser(description='Copy messages log files for a POD from a Linux bastion server to /tmp directory')
parser.add_option('-d', '--debug', help='Turn on debug information', action='store_true', dest='debug', default=False)
parser.add_option('-l', '--pod_dom_list', help='POD, Dom0 and DomU mappings', dest='host_list_file', default='')
(opts, args) = parser.parse_args()
DEBUG = opts.debug
HOST_LIST_FN = opts.host_list_file
return (opts, args)
def perform_one_host(host, pod_name, domuv):
ret = True
ret = perform_ssh_copy(host, pod_name, domuv)
if ret == False:
print('Failure in perform_ssh_copy for host %' % host)
return(ret)
ret = perform_scp(host, pod_name, domuv)
if ret == False:
print('Failure in perform_scp for host %' % host)
return(ret)
ret = perform_ssh_rm_tmp_backup(host, pod_name, domuv)
if ret == False:
print('Failure in perform_ssh_rm_tmp_backup for host %' % host)
return(ret)
return(ret)
def perform_all():
pod = PODInfo(HOST_LIST_FN)
pod.parse_pod_dom0_domu_conf()
ret = True
pod_name = pod.pod_name
for host in pod.domu_list:
ret = perform_one_host(host, pod_name, 'domu')
if ret == False:
break
for host in pod.dom0_list:
ret = perform_one_host(host, pod_name, 'dom0')
if ret == False:
break
if ret:
ret = perform_chmod_local_backup(pod_name)
else:
print('Failure in perform_chmod_local_backup')
return(ret)
def perform_ssh_copy(host, pod_name, domuv):
ret = True
dprint('= Perform ssh and copy for host %s =' % host)
pchild = None
try:
host_head = host.split('.')[0]
ptn_vm_pwd = "%s@%s's password:" % (G_CONF_GUID, host)
cmd_ssh = 'ssh -o StrictHostKeyChecking=no %s@%s' % (G_CONF_GUID, host)
ptn_ssh_guid_prompt = '\[%s@%s.*~\]\$' % (G_CONF_GUID, host_head)
step_guid = 0
cmd_pbrun_root = 'pbrun cloud-root -u root'
ptn_pbrun_root_pwd = 'Password:'
''' root or sdiroot sometimes '''
ptn_pbrun_root_prompt = '\[.*root@%s.*~\]\#' % (host_head)
new_message_fn = '/tmp/messages_%s_%s_%s' % (pod_name, domuv, host_head)
cmd_cp_root = 'yes | cp -uf /var/log/messages %s' % (new_message_fn)
cmd_chmod_root = 'chmod 666 %s' % (new_message_fn)
cmd_exit_root = 'exit'
cmd_exit_guid = 'exit'
step_pbrun_root = 0
dprint('Done: %s' % cmd_ssh)
pchild = pexpect.spawn(cmd_ssh)
pchild.expect(ptn_vm_pwd)
pchild.sendline(G_CONF_PASSWORD_OPC)
dprint('Done: sent ssh password')
pchild.expect(ptn_ssh_guid_prompt)
pchild.sendline(cmd_pbrun_root)
dprint('Done: %s' % cmd_pbrun_root)
pchild.expect(ptn_pbrun_root_pwd)
pchild.sendline(G_CONF_PASSWORD_OPC)
dprint('Done: sent OPC password')
pchild.expect(ptn_pbrun_root_prompt)
pchild.sendline(cmd_cp_root)
dprint('Done: %s' % cmd_cp_root)
pchild.expect(ptn_pbrun_root_prompt)
pchild.sendline(cmd_chmod_root)
dprint('Done: %s' % cmd_chmod_root)
pchild.expect(ptn_pbrun_root_prompt)
pchild.sendline(cmd_exit_root)
dprint('Done: %s' % cmd_exit_root)
pchild.sendline(cmd_exit_guid)
dprint('Done: %s' % cmd_exit_guid)
except Exception as e:
print('Oops Something went wrong')
print(e)
ret = False
if pchild:
pchild.close()
return(ret)
def perform_ssh_rm_tmp_backup(host, pod_name, domuv):
ret = True
dprint('= Perform ssh and rm for host %s =' % host)
pchild = None
try:
host_head = host.split('.')[0]
ptn_vm_pwd = "%s@%s's password:" % (G_CONF_GUID, host)
cmd_ssh = 'ssh -o StrictHostKeyChecking=no %s@%s' % (G_CONF_GUID, host)
ptn_ssh_guid_prompt = '\[%s@%s.*~\]\$' % (G_CONF_GUID, host_head)
step_guid = 0
cmd_pbrun_root = 'pbrun cloud-root -u root'
ptn_pbrun_root_pwd = 'Password:'
ptn_pbrun_root_prompt = '\[.*root@%s.*~\]\#' % (host_head)
new_message_fn = '/tmp/messages_%s_%s_%s' % (pod_name, domuv, host_head)
cmd_rm_root = 'rm -f %s' % (new_message_fn)
cmd_exit_root = 'exit'
cmd_exit_guid = 'exit'
step_pbrun_root = 0
dprint('Done: %s' % cmd_ssh)
pchild = pexpect.spawn(cmd_ssh)
pchild.expect(ptn_vm_pwd)
pchild.sendline(G_CONF_PASSWORD_OPC)
dprint('Done: sent ssh password')
pchild.expect(ptn_ssh_guid_prompt)
pchild.sendline(cmd_pbrun_root)
dprint('Done: %s' % cmd_pbrun_root)
pchild.expect(ptn_pbrun_root_pwd)
pchild.sendline(G_CONF_PASSWORD_OPC)
dprint('Done: sent OPC password')
pchild.expect(ptn_pbrun_root_prompt)
pchild.sendline(cmd_rm_root)
dprint('Done: %s' % cmd_rm_root)
pchild.expect(ptn_pbrun_root_prompt)
pchild.sendline(cmd_exit_root)
dprint('Done: %s' % cmd_exit_root)
pchild.sendline(cmd_exit_guid)
dprint('Done: %s' % cmd_exit_guid)
except Exception as e:
print('Oops Something went wrong')
print(e)
ret = False
if pchild:
pchild.close()
return(ret)
def perform_scp(host, pod_name, domuv):
dprint('= Perform scp for host %s =' % host)
pchild = None
host_head = host.split('.')[0]
ptn_vm_pwd = "%s@%s's password:" % (G_CONF_GUID, host)
new_message_fn = '/tmp/messages_%s_%s_%s' % (pod_name, domuv, host_head)
cmd_scp = "scp -o StrictHostKeyChecking=no %s@%s:%s %s" % (G_CONF_GUID, host, new_message_fn, new_message_fn)
dprint('Done: %s' % cmd_scp)
pchild = pexpect.spawn(cmd_scp)
i = pchild.expect([ptn_vm_pwd, pexpect.EOF])
if i == 0:
pchild.sendline(G_CONF_PASSWORD_OPC)
dprint('Done: sent scp password')
elif i == 1:
dprint('Timeout or EOF')
if pchild:
pchild.wait()
pchild.close()
return True
def perform_chmod_local_backup(pod_name):
dprint('= Perform chmod to 666 for all local messages backup files')
oscmd = 'chmod 666 /tmp/messages_%s_*' % (pod_name)
cmd = DOCMD(oscmd)
if cmd.code:
dprint('Failure: %s' % oscmd)
return(False)
else:
dprint('Success: %s' % oscmd)
return(True)
class PODInfo(object):
def __init__(self, host_list_file):
self.host_list_file = host_list_file
self.pod_name = ''
self.dom0_list = []
self.domu_list = []
def parse_pod_dom0_domu_conf(self):
if self.host_list_file == '':
return(False)
fo = open(self.host_list_file)
lines = fo.readlines()
new_lines = []
for line in lines:
line = line.strip()
if line == '' or line[0] == '#':
continue
line_items = line.split()
if line_items[0] != self.pod_name:
self.pod_name = line_items[0]
if line_items[1] not in self.dom0_list:
self.dom0_list.append(line_items[1])
if line_items[2] not in self.domu_list:
self.domu_list.append(line_items[2])
fo.close()
return(True)
def dprint(self):
print('POD Name: %s' % self.pod_name)
print('Dom0 Names:')
pprint.pprint(self.dom0_list)
print('DomU Names:')
pprint.pprint(self.domu_list)
def main():
opts, _ = parse_opts()
ret = perform_all()
if ret:
sys.exit(0)
else:
sys.exit(1)
if __name__ == "__main__":
main()
4.2 host_list配置文件
POD Dom0 DomU Identifier
XYZ dom0_111.abcdefg.oraclecloud.com domu_111_1.abcdefg.oraclecloud.com XYZ_1
XYZ dom0_222.abcdefg.oraclecloud.com domu_222_1.abcdefg.oraclecloud.com XYZ_2
XYZ dom0_222.abcdefg.oraclecloud.com domu_222_2.abcdefg.oraclecloud.com XYZ_3
个人简介:
作者之前一直在Sun做Solaris底层存储相关协议驱动开发,后来加入Oracle Cloud开发运维团队,有了一些新的心得体会。有的时候Ops发现了问题却无法用自动化的方式简化工作流程,Dev参与到Ops的运维工作可以在发现问题的同时加快批量自动化解决问题,同时加快迭代速度。
本文分享自 云服务与SRE架构师社区 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!