当系统内存不足时,Linux内核会触发OOM来选择一些进程kill掉,以便能回收一些内存,尽量继续保持系统继续运行。具体选择哪个进程杀掉,这有一套算分的策略,参考因子是进程占用的内存数,进程页表占用的内存数等,oom_score_adj的值越小,进程得分越少,也就越难被杀掉,oom_score_adj的取值为[-1000,1000]
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
int order, nodemask_t *nodemask, bool force_kill)
{
...
//得到分值最高的进程
p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
panic("Out of memory and no killable processes...\n");
}
if (PTR_ERR(p) != -1UL) {//kill 掉被选中得分最高的进程
oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
nodemask, "Out of memory");
killed = 1;
}
out:
/*
* Give the killed threads a good chance of exiting before trying to
* allocate memory again.
*/
if (killed)
schedule_timeout_killable(1);
}
/*
* Simple selection loop. We chose the process with the highest
* number of 'points'.
*
* (not docbooked, we don't want this one cluttering up the manual)
*/
static struct task_struct *select_bad_process(unsigned int *ppoints,
unsigned long totalpages, const nodemask_t *nodemask,
bool force_kill)
{
struct task_struct *g, *p;
struct task_struct *chosen = NULL;
unsigned long chosen_points = 0;
rcu_read_lock();
for_each_process_thread(g, p) {
unsigned int points;
switch (oom_scan_process_thread(p, totalpages, nodemask,
force_kill)) {
case OOM_SCAN_SELECT:
chosen = p;
chosen_points = ULONG_MAX;
/* fall through */
case OOM_SCAN_CONTINUE:
continue;
case OOM_SCAN_ABORT:
rcu_read_unlock();
return ERR_PTR(-1UL);
case OOM_SCAN_OK:
break;
};
//计算进程oom score adj分值
points = oom_badness(p, NULL, nodemask, totalpages);
if (points > chosen_points) {//得到的分值比之前遍历的进程高
chosen = p;//替换得分最高的进程
chosen_points = points;
}
}
if (chosen)
get_task_struct(chosen);
rcu_read_unlock();
//若points值很小,此处得到的points将为0,这里不影响选中kill掉的进程,
//只是作为OOM时的输出信息score值
*ppoints = chosen_points * 1000 / totalpages;
return chosen;
}
根据内核代码算法推导出oom score的计算方式
points = rss + nr_ptes + swapents
points = points - (points *3) /100 //for root
//若point很小,oom_adj为负数(比如-100),则算出来的point可能是负值
adj =oom_score_adj * (total_pages/1000)
points = points + adj
points = points + (oom_score_adj * (total_pages/1000) )
//若point为负值,则此处返回1
points = points > 0 ? points : 1;
//若points值很小,此处得到的points将为0,这里不影响选中kill掉的进程,
//只是作为OOM时的输出信息score值
score = points * 1000 / totalpages;
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。