在上一篇文章中我们分析CFS的主要代码,设计的内容有:
本节在围绕一个进程的生命周期,继续分析一个进程是如何被抢占? 如果睡眠? 如何被调度出去的?
周期性调度就是Linux内核会在每一个tick的时候会去更新当前进程的运行时间,已经判断当前进程是否需要被调度出去等。
在时钟中断的处理函数中会调用update_process_times,最终会调用到调度器相关的scheduler_tick函数中
void scheduler_tick(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
struct rq_flags rf;
sched_clock_tick();
rq_lock(rq, &rf);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
cpu_load_update_active(rq);
calc_global_load_tick(rq);
psi_task_tick(rq);
rq_unlock(rq, &rf);
perf_event_task_tick();
#ifdef CONFIG_SMP
rq->idle_balance = idle_cpu(cpu);
trigger_load_balance(rq);
#endif
}
获取当前CPU上的运行队列rq, 在根据调度类sched_class去调用该进程调度类中的task_tick函数,此处我们只描述CFS调度类
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
entity_tick(cfs_rq, se, queued);
}
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
update_misfit_status(curr, rq);
update_overutilized_status(task_rq(curr));
}
通过当前的task_struct,获取调度实体se,然后根据调度实体se获取CFS运行队列,通过entity_tick函数做进一步操作
static void
entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
{
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
/*
* Ensure that runnable average is periodically updated.
*/
update_load_avg(cfs_rq, curr, UPDATE_TG);
update_cfs_group(curr);
if (cfs_rq->nr_running > 1)
check_preempt_tick(cfs_rq, curr);
}
static void
check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
unsigned long ideal_runtime, delta_exec;
struct sched_entity *se;
s64 delta;
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
resched_curr(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
*/
clear_buddies(cfs_rq, curr);
return;
}
/*
* Ensure that a task that missed wakeup preemption by a
* narrow margin doesn't have to wait for a full slice.
* This also mitigates buddy induced latencies under load.
*/
if (delta_exec < sysctl_sched_min_granularity)
return;
se = __pick_first_entity(cfs_rq);
delta = curr->vruntime - se->vruntime;
if (delta < 0)
return;
if (delta > ideal_runtime)
resched_curr(rq_of(cfs_rq));
}
当一个进程由于要等待资源,而不得不去放弃CPU,则会选择将自己调度出去。比如串口在等待有数据发送过来,则不得不让出CPU,让别的进程来占用CPU,最大资源的使用CPU。通常需要睡眠的进程都会使用schedule函数来让出CPU
asmlinkage __visible void __sched schedule(void)
{
struct task_struct *tsk = current;
sched_submit_work(tsk);
do {
preempt_disable();
__schedule(false);
sched_preempt_enable_no_resched();
} while (need_resched());
}
static void __sched notrace __schedule(bool preempt)
{
cpu = smp_processor_id();
rq = cpu_rq(cpu);
prev = rq->curr;
if (!preempt && prev->state) {
if (signal_pending_state(prev->state, prev)) {
prev->state = TASK_RUNNING;
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
prev->on_rq = 0;
........
}
当一个进程调度schedule的函数时,传递的参数是flase。false的意思是当前不是发生抢占。之前在进程的基本概念中描述了进程的状态,进程的状态是running的时候等于0,其余是非0的。则就通过deactivate_task函数,将当前进程从rq中移除掉。
void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible++;
dequeue_task(rq, p, flags);
}
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
p->sched_class->dequeue_task(rq, p, flags);
}
最终调用到属于该进程的调度类中的dequeue_task函数中,这里还是以CFS调度类为例子
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
int task_sleep = flags & DEQUEUE_SLEEP;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
cfs_rq->h_nr_running--;
}
}
获取该进程的调度实体,再获取调度实体属于的CFS运行队列,通过dequeue_entity函数将调度实体从CFS运行队列删除
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
/*
* When dequeuing a sched_entity, we must:
* - Update loads to have both entity and cfs_rq synced with now.
* - Subtract its load from the cfs_rq->runnable_avg.
* - Subtract its previous weight from cfs_rq->load.weight.
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
update_load_avg(cfs_rq, se, UPDATE_TG);
dequeue_runnable_load_avg(cfs_rq, se);
update_stats_dequeue(cfs_rq, se, flags);
clear_buddies(cfs_rq, se);
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
之前在fork一个新进程之后,最后会通过wake_up_new_task来唤醒一个进程,这个函数在上篇中讲过如何将一个进程添加到CFS就绪队列
void wake_up_new_task(struct task_struct *p)
{
p->state = TASK_RUNNING;
activate_task(rq, p, ENQUEUE_NOCLOCK);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, WF_FORK);
}
通过activate_task会将此函数添加到就绪队列中,同时check_preempt_curr函数用来检查唤醒的进程是否可以强制当前进程。因为一个唤醒的进程可能是更高优先级的实时进程,当前进程是个普通进程等,都有可能发生。
void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
const struct sched_class *class;
if (p->sched_class == rq->curr->sched_class) {
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
} else {
for_each_class(class) {
if (class == rq->curr->sched_class)
break;
if (class == p->sched_class) {
resched_curr(rq);
break;
}
}
}
}
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
{
if (wakeup_preempt_entity(se, pse) == 1)
resched_curr(rq);
}
通过wakeup_preempt_entity来判断是否可以强制当前进程,如果可以则设置need_sched标志位
/*
* Should 'se' preempt 'curr'.
*
* |s1
* |s2
* |s3
* g
* |<--->|c
*
* w(c, s1) = -1
* w(c, s2) = 0
* w(c, s3) = 1
*
*/
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;
if (vdiff <= 0)
return -1;
gran = wakeup_gran(se);
if (vdiff > gran)
return 1;
return 0;
}