线上multisite环境出现HEALTH_WARN 32 large omap objects,已经bucket auto reshard=false,所以排除是bucket index 所在的shard omap过大引发的问题,官方的给出的告警信息无法定位到具体的object,于是有了下面的排错过程
[root@demo supdev]# ceph health detail
HEALTH_WARN 32 large omap objects
LARGE_OMAP_OBJECTS 32 large omap objects
32 large objects found in pool 'cn-bj-test1.rgw.log' #出现large omap的pool
Search the cluster log for 'Large omap object found' for more details.
[root@demo supdev]# ceph pg ls-by-pool cn-bj-test1.rgw.log |awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x
ceph pg 11.0 query|grep num_large_omap_objects
ceph pg 11.1 query|grep num_large_omap_objects
ceph pg 11.2 query|grep num_large_omap_objects
......
+ ceph pg 11.1e6 query
+ grep num_large_omap_objects
"num_large_omap_objects": 1 #有large omap的objcet数量
"num_large_omap_objects": 0
"num_large_omap_objects": 0
[root@demo supdev]# ceph pg 11.1e6 query #查询pg详细信息
{
"state": "active+clean",
.....
"info": {
"pgid": "11.1e6",
"last_update": "10075'3051746",
"last_complete": "10075'3051746",
"log_tail": "10075'3050200",
"last_user_version": 3051746,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
.....
"acting": [
46, #主OSD id=46
63, #从OSD
23 #从OSD
],
"stat_sum": {
"num_bytes": 40,
"num_objects": 2,
"num_object_clones": 0,
"num_object_copies": 6,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 2,
"num_whiteouts": 0,
"num_read": 3055759,
"num_read_kb": 3056162,
"num_write": 5986011,
"num_write_kb": 53,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 1,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 1 #large omap的object数量
},
...
"agent_state": {}
}
[root@demo supdev]# ceph osd find 46 #根据OSD id找到对应的主机信息
{
"osd": 46,
"ip": "100.1.1.40:6812/3691515",
"crush_location": {
"host": "TX-100-1-40-sata",
"media": "site1-rack2-sata",
"mediagroup": "site1-sata",
"root": "default"
}
}
[root@demo supdev]# zcat /var/log/ceph/ceph-osd.46.log-20181210.gz |grep omap #根据OSD日志找到具体的object名称
2018-12-09 23:03:18.803799 7f90e9b46700 0 log_channel(cluster) log [WRN] : Large omap object found. Object: 11:67885262:::sync.error-log.3:head Key count: 2934286 Size (bytes): 657040594
#OSD 46上的object名称为sync.error-log.3的omap超出标准
[root@demo supdev]# rados ls -p cn-bj-test1.rgw.log|grep "sync.error-log.3$" #确定objects存在
sync.error-log.3
#注意整个multisite的同步过程中的错误日志信息以omap形式存储在sync.error-log.*
#吐槽一下,错误日志分32个shard存储,代码写死了,而且错误日志目前还只能通过手工清理,无法像其他日志一样自动trim,随着错误日志不断堆积,才引发了今天的问题。
[root@demo supdev]# radosgw-admin sync error list|more#查看错误日志
[
{
"shard_id": 0,
"entries": [
{
"id": "1_1540890427.972991_36.1",
"section": "data",
"name": "demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1:3",
"timestamp": "2018-10-30 09:07:07.972991Z",
"info": {
"source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
"error_code": 5,
"message": "failed to sync bucket instance: (5) Input/output error"
}
},
......
{
"id": "1_1543395420.626552_32014.1",
"section": "data",
"name": "demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1:0/file1205085",
"timestamp": "2018-11-28 08:57:00.626552Z",
"info": {
"source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
"error_code": 5,
"message": "failed to sync object(5) Input/output error"
}
}
[root@TX-97-140-6 supdev]# radosgw-admin sync error trim --start-date=2018-11-14 --end-date=2018-11-28 #按日期清理错误日志记录
简单写了个脚本,先根据warn信息找pool,之后再根据pool找出有large omap objects的pg,凑合用,不保证没bug,在12.2.10下面测试通过。
[root@demo cephuser]# cat large_obj.py
import json
import rados
import rbd
ceph_conf_path = '/etc/ceph/ceph.conf'
rados_connect_timeout = 5
class RADOSClient(object):
def __init__(self,driver,pool=None):
self.driver = driver
self.client, self.ioctx = driver._connect_to_rados(pool)
def __enter__(self):
return self
def __exit__(self, type_, value, traceback):
self.driver._disconnect_from_rados(self.client, self.ioctx)
class RBDDriver(object):
def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):
self.ceph_conf_path = ceph_conf_path
self.rados_connect_timeout = rados_connect_timeout
self.pool = pool
def _connect_to_rados(self, pool=None):
client = rados.Rados(conffile=self.ceph_conf_path)
try:
if self.rados_connect_timeout >= 0:
client.connect(timeout=
self.rados_connect_timeout)
else:
client.connect()
if self.pool == None:
ioctx = None
else:
ioctx = client.open_ioctx(self.pool)
return client, ioctx
except rados.Error:
msg = "Error connecting to ceph cluster."
client.shutdown()
raise msg
def _disconnect_from_rados(self, client, ioctx=None):
if ioctx == None:
client.shutdown()
else:
ioctx.close()
client.shutdown()
class cmd_manager():
def get_large_omap_obj_poolname(self):
with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
result = ''
cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'
result = dr.client.mon_command(cmd,result)
if result[0] == 0:
res_ = json.loads(result[1])
if res_["checks"]['LARGE_OMAP_OBJECTS']:
return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]
else:
return False
def get_pg_list_by_pool(self,poolname):
with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
result = ''
cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + poolname + '", "format": "json"}'
result = dr.client.mon_command(cmd,result)
if result[0] == 0:
return json.loads(result[1])
else:
return False
cmd_ = cmd_manager()
poolname = cmd_.get_large_omap_obj_poolname()
print "Large omap objects poolname = {0}".format(poolname)
res = cmd_.get_pg_list_by_pool(poolname)
for i in res:
if i["stat_sum"]["num_large_omap_objects"] != 0:
print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"],i["acting"],i["stat_sum"]["num_large_omap_objects"])
如果你认为通过上面方式清除omap集群就能立马恢复状态,那就太天真,告警信息“HEALTH_WARN 32 large omap objects”依然挂在那里不尴不尬,虽然omap清理了,但是因为对应PG状态没更新,所以告警信息依然存在,只能通过手工或者其他方式去触发PG的状态更新,我这边是通过ceph pg deep-scrub {pg}去触发pg信息更新,注意如果你用scrub是没用,必须deep-scrub,这里又要吐槽官方的逻辑设计,真是WFK!当然你也可以放那里不管,等后台自动deep-scrub也能恢复。
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有