背景
核心数据结构
解说客户端
都会在struct obd_import
数据结构(这个数据结构是客户端通过obd_import
发送请求到lustre的服务端)中维护struct ptlrpc_connection
结构来维护lustre服务端的链接。其中obd_import->imp_state
维护了enum lustre_imp_state
定义的链接状态.struct obd_import {
// 引用计数
refcount_t imp_refcount;
struct lustre_handle imp_dlm_handle; /* client's ldlm export */
// 当前活跃的连接
struct ptlrpc_connection *imp_connection;
// 当前活跃连接客户端RPC结构
struct ptlrpc_client *imp_client;
/** List element for linking into pinger chain */
struct list_head imp_pinger_chain;
/** work struct for destruction of import */
struct work_struct imp_zombie_work;
// 请求已经发送但是未收到响应的列表
struct list_head imp_replay_list;
// 请求发送列表 obd_import->imp_sending_list
struct list_head imp_sending_list;
// delayed队列obd_import->imp_delayed_list
struct list_head imp_delayed_list;
struct list_head imp_committed_list;
struct list_head *imp_replay_cursor;
/** @} */
/** List of not replied requests */
struct list_head imp_unreplied_list;
/** Known maximal replied XID */
__u64 imp_known_replied_xid;
/** obd device for this import */
struct obd_device *imp_obd;
struct ptlrpc_sec *imp_sec;
rwlock_t imp_sec_lock;
time64_t imp_sec_expire;
pid_t imp_sec_refpid;
/** @} */
/** Wait queue for those who need to wait for recovery completion */
wait_queue_head_t imp_recovery_waitq;
/** Number of requests allocated */
atomic_t imp_reqs;
};
imp_state
维护的状态目前有11中,具体如下enum lustre_imp_state {
// 连接关闭
LUSTRE_IMP_CLOSED = 1,
// 请求发送到服务端开启新的连接
LUSTRE_IMP_NEW = 2,
// 断开连接
LUSTRE_IMP_DISCON = 3,
// 连接中的状态
LUSTRE_IMP_CONNECTING = 4,
// 连接的响应状态
LUSTRE_IMP_REPLAY = 5,
// repay & lock的连接状态
LUSTRE_IMP_REPLAY_LOCKS = 6,
// 响应等待的状态
LUSTRE_IMP_REPLAY_WAIT = 7,
// 连接的recovery的状态
LUSTRE_IMP_RECOVER = 8,
// 活跃连接的状态
LUSTRE_IMP_FULL = 9,
// 被驱逐链接的状态
LUSTRE_IMP_EVICTED = 10,
LUSTRE_IMP_IDLE = 11,
LUSTRE_IMP_LAST
};
// 状态到名称状态转换
static inline const char *ptlrpc_import_state_name(enum lustre_imp_state state)
{
static const char * const import_state_names[] = {
"<UNKNOWN>", "CLOSED", "NEW", "DISCONN",
"CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
"RECOVER", "FULL", "EVICTED", "IDLE",
};
LASSERT(state < LUSTRE_IMP_LAST);
return import_state_names[state];
}
核心Recovery API
解说ptlrpc_fail_import
ptlrpc_fail_import
当客户端锁请求时候被调用,这个函数着重做2个事情,第一个是设置连接断开状态;第二是强制启动通信的ping
行为。static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main);
void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
{
// 设置连接断开状态
if (ptlrpc_set_import_discon(imp, conn_cnt, true))
ptlrpc_pinger_force(imp);
EXIT;
}
ptlrpc_set_import_discon
函数中的实现实现中最终调用的是obd_import_event
所指向的函数指针.lustre中在mgc/mdc/lwp/osc/osp
都会针对这个函数指针设置自身的函数。ptlrpc_recover_import
ptlrpc_recover_import
函数是rpc层发送端的重连的实现,其中最核心的是设置断开连接状态然后再去连接。int ptlrpc_recover_import(struct obd_import *imp...)
{
// 强制断开连接
ptlrpc_set_import_discon(imp, 0, false)
rc = ptlrpc_connect_import(imp)
{
ptlrpc_connect_import_locked(imp)
{
// 尝试连接
obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
&obd->obd_uuid, &ocd, NULL)
}
}
}
ptlrpc_reconnect_import
ptlrpc_reconnect_import
重新连接import
端用来发送请求,这个执行链路如下。int ptlrpc_reconnect_import(struct obd_import *imp)
{
// 设置连接状态
ptlrpc_set_import_discon(imp, 0, false)
// 校验import
ptlrpc_invalidate_import(imp)
// 断开连接
ptlrpc_disconnect_import(imp, 1)
// 激活import发送端
ptlrpc_activate_import(imp, false)
// 恢复import发送端
ptlrpc_recover_import(imp, NULL, 0);
}
ptlrpc_resend
ptlrpc_resend
核心功能是遍历obd_import->imp_sending_list
然后把这个列表的所有请求发送出去。int ptlrpc_resend(struct obd_import *imp)
{
list_for_each_entry(req, &imp->imp_sending_list, rq_list) {
if (!ptlrpc_no_resend(req) &&
(req->rq_timedout || !req->rq_allow_replay))
ptlrpc_resend_req(req);
}
}
ptlrpc_wake_delayed
ptlrpc_wake_delayed
遍历obd_import->imp_delayed_list
然后唤醒线程重新发送请求void ptlrpc_wake_delayed(struct obd_import *imp)
{
struct ptlrpc_request *req;
spin_lock(&imp->imp_lock);
list_for_each_entry(req, &imp->imp_delayed_list, rq_list) {
ptlrpc_client_wake_req(req);
}
spin_unlock(&imp->imp_lock);
}