前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Large omap objects 排错实战

Large omap objects 排错实战

作者头像
用户1260683
发布2018-12-26 11:53:14
5.4K0
发布2018-12-26 11:53:14
举报

线上multisite环境出现HEALTH_WARN 32 large omap objects,已经bucket auto reshard=false,所以排除是bucket index 所在的shard omap过大引发的问题,官方的给出的告警信息无法定位到具体的object,于是有了下面的排错过程

排查过程

代码语言:javascript
复制
[root@demo supdev]# ceph health detail
HEALTH_WARN 32 large omap objects
LARGE_OMAP_OBJECTS 32 large omap objects
    32 large objects found in pool 'cn-bj-test1.rgw.log' #出现large omap的pool
    Search the cluster log for 'Large omap object found' for more details.


[root@demo supdev]# ceph pg ls-by-pool cn-bj-test1.rgw.log |awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x
ceph pg 11.0 query|grep num_large_omap_objects
ceph pg 11.1 query|grep num_large_omap_objects
ceph pg 11.2 query|grep num_large_omap_objects
......
+ ceph pg 11.1e6 query
+ grep num_large_omap_objects
                "num_large_omap_objects": 1 #有large omap的objcet数量
                    "num_large_omap_objects": 0
                    "num_large_omap_objects": 0


[root@demo supdev]# ceph pg 11.1e6 query #查询pg详细信息
{
    "state": "active+clean",
.....
    "info": {
        "pgid": "11.1e6",
        "last_update": "10075'3051746",
        "last_complete": "10075'3051746",
        "log_tail": "10075'3050200",
        "last_user_version": 3051746,
        "last_backfill": "MAX",
        "last_backfill_bitwise": 0,
        "purged_snaps": [],
.....

              "acting": [
                    46, #主OSD id=46
                    63, #从OSD
                    23  #从OSD
                ],
            "stat_sum": {
                "num_bytes": 40,
                "num_objects": 2,
                "num_object_clones": 0,
                "num_object_copies": 6,
                "num_objects_missing_on_primary": 0,
                "num_objects_missing": 0,
                "num_objects_degraded": 0,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 2,
                "num_whiteouts": 0,
                "num_read": 3055759,
                "num_read_kb": 3056162,
                "num_write": 5986011,
                "num_write_kb": 53,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 0,
                "num_bytes_recovered": 0,
                "num_keys_recovered": 0,
                "num_objects_omap": 1,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0,
                "num_legacy_snapsets": 0,
                "num_large_omap_objects": 1 #large omap的object数量
            },
            ...
                "agent_state": {}
}


[root@demo supdev]# ceph osd find 46 #根据OSD id找到对应的主机信息
{
    "osd": 46,
    "ip": "100.1.1.40:6812/3691515",
    "crush_location": {
        "host": "TX-100-1-40-sata",
        "media": "site1-rack2-sata",
        "mediagroup": "site1-sata",
        "root": "default"
    }
}


[root@demo supdev]# zcat /var/log/ceph/ceph-osd.46.log-20181210.gz |grep omap #根据OSD日志找到具体的object名称
2018-12-09 23:03:18.803799 7f90e9b46700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 11:67885262:::sync.error-log.3:head Key count: 2934286 Size (bytes): 657040594 
#OSD 46上的object名称为sync.error-log.3的omap超出标准



[root@demo supdev]# rados ls -p cn-bj-test1.rgw.log|grep "sync.error-log.3$" #确定objects存在
sync.error-log.3

#注意整个multisite的同步过程中的错误日志信息以omap形式存储在sync.error-log.* 
#吐槽一下,错误日志分32个shard存储,代码写死了,而且错误日志目前还只能通过手工清理,无法像其他日志一样自动trim,随着错误日志不断堆积,才引发了今天的问题。

[root@demo supdev]# radosgw-admin sync error list|more#查看错误日志
[
    {
        "shard_id": 0,
        "entries": [
            {
                "id": "1_1540890427.972991_36.1",
                "section": "data",
                "name": "demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1:3",
                "timestamp": "2018-10-30 09:07:07.972991Z",
                "info": {
                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
                    "error_code": 5,
                    "message": "failed to sync bucket instance: (5) Input/output error"
                }
            },
......
            {
                "id": "1_1543395420.626552_32014.1",
                "section": "data",
                "name": "demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1:0/file1205085",
                "timestamp": "2018-11-28 08:57:00.626552Z",
                "info": {
                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
                    "error_code": 5,
                    "message": "failed to sync object(5) Input/output error"
                }
            }


[root@TX-97-140-6 supdev]# radosgw-admin sync error trim --start-date=2018-11-14 --end-date=2018-11-28 #按日期清理错误日志记录

优化定位效率

简单写了个脚本,先根据warn信息找pool,之后再根据pool找出有large omap objects的pg,凑合用,不保证没bug,在12.2.10下面测试通过。

代码语言:javascript
复制
[root@demo cephuser]# cat large_obj.py
import json
import rados
import rbd

ceph_conf_path = '/etc/ceph/ceph.conf'
rados_connect_timeout = 5

class RADOSClient(object):
    def __init__(self,driver,pool=None):
        self.driver = driver
        self.client, self.ioctx = driver._connect_to_rados(pool)
    def __enter__(self):
        return self
    def __exit__(self, type_, value, traceback):
        self.driver._disconnect_from_rados(self.client, self.ioctx)

class RBDDriver(object):
    def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):
        self.ceph_conf_path = ceph_conf_path
        self.rados_connect_timeout = rados_connect_timeout
        self.pool = pool
    def _connect_to_rados(self, pool=None):
        client = rados.Rados(conffile=self.ceph_conf_path)
        try:
            if self.rados_connect_timeout >= 0:
                client.connect(timeout=
                               self.rados_connect_timeout)
            else:
                client.connect()
            if self.pool == None:
                                ioctx = None
            else:
                                ioctx = client.open_ioctx(self.pool)
            return client, ioctx
        except rados.Error:
            msg = "Error connecting to ceph cluster."
            client.shutdown()
            raise msg

    def _disconnect_from_rados(self, client, ioctx=None):
                if ioctx == None:
                        client.shutdown()
                else:
                        ioctx.close()
                        client.shutdown()

class cmd_manager():
    def get_large_omap_obj_poolname(self):
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
                result = ''
                cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'
                result = dr.client.mon_command(cmd,result)
                if result[0] == 0:
                    res_ = json.loads(result[1])
                    if res_["checks"]['LARGE_OMAP_OBJECTS']:
                        return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]
                else:
                    return False
    def get_pg_list_by_pool(self,poolname):
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
                result = ''
                cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + poolname + '", "format": "json"}'
                result = dr.client.mon_command(cmd,result)
                if result[0] == 0:
                    return json.loads(result[1])
                else:
                    return False

cmd_ = cmd_manager()
poolname =  cmd_.get_large_omap_obj_poolname()
print "Large omap objects poolname = {0}".format(poolname)
res =  cmd_.get_pg_list_by_pool(poolname)
for i in res:
    if i["stat_sum"]["num_large_omap_objects"] != 0:
        print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"],i["acting"],i["stat_sum"]["num_large_omap_objects"])

再爆一个雷

如果你认为通过上面方式清除omap集群就能立马恢复状态,那就太天真,告警信息“HEALTH_WARN 32 large omap objects”依然挂在那里不尴不尬,虽然omap清理了,但是因为对应PG状态没更新,所以告警信息依然存在,只能通过手工或者其他方式去触发PG的状态更新,我这边是通过ceph pg deep-scrub {pg}去触发pg信息更新,注意如果你用scrub是没用,必须deep-scrub,这里又要吐槽官方的逻辑设计,真是WFK!当然你也可以放那里不管,等后台自动deep-scrub也能恢复。

本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2018-12-11,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 Ceph对象存储方案 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 排查过程
  • 优化定位效率
  • 再爆一个雷
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档