上面讲述了如何通过 fork, vfork, pthread_create 去创建一个进程,或者一个线程。通过分析最终 fork, vfork, pthread_create 最终都会通过系统调用 do_fork 去创建进程。
long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
unsigned long tls)
{
......
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE); ------(1)
......
pid = get_task_pid(p, PIDTYPE_PID); ------(2)
......
wake_up_new_task(p); ------(3)
}
static __latent_entropy struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
......
p = dup_task_struct(current, node); ------(1)
......
retval = sched_fork(clone_flags, p); ------(2)
......
retval = copy_files(clone_flags, p); ------(3)
......
retval = copy_fs(clone_flags, p); ------(4)
......
retval = copy_mm(clone_flags, p); ------(5)
......
retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); ------(6)
......
if (pid != &init_struct_pid) {
pid = alloc_pid(p->nsproxy->pid_ns_for_children); ------(7)
if (IS_ERR(pid)) {
retval = PTR_ERR(pid);
goto bad_fork_cleanup_thread;
}
}
......
}
接下来我们一起看下这里的几个函数。
static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
struct task_struct *tsk;
unsigned long *stack;
struct vm_struct *stack_vm_area;
int err;
......
tsk = alloc_task_struct_node(node); ------(1)
if (!tsk)
return NULL;
stack = alloc_thread_stack_node(tsk, node); ------(2)
if (!stack)
goto free_tsk;
stack_vm_area = task_stack_vm_area(tsk);
err = arch_dup_task_struct(tsk, orig); ------(3)
tsk->stack = stack; ------(4)
......
setup_thread_stack(tsk, orig); ------(5)
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk); ------(6)
......
}
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
unsigned long flags;
int cpu = get_cpu();
__sched_fork(clone_flags, p); ------(1)
/*
* We mark the process as NEW here. This guarantees that
* nobody will actually run it, and a signal or other external
* event cannot wake it up and insert it on the runqueue either.
*/
p->state = TASK_NEW; ------(2)
/*
* Make sure we do not leak PI boosting priority to the child.
*/
p->prio = current->normal_prio; ------(3)
......
if (dl_prio(p->prio)) {
put_cpu();
return -EAGAIN;
} else if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
} else {
p->sched_class = &fair_sched_class; ------(4)
}
......
init_task_preempt_count(p); ------(5)
......
}
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm;
int retval;
......
if (!oldmm) ------(1)
return 0;
/* initialize the new vmacache entries */
vmacache_flush(tsk);
if (clone_flags & CLONE_VM) { ------(2)
mmget(oldmm);
mm = oldmm;
goto good_mm;
}
retval = -ENOMEM;
mm = dup_mm(tsk); ------(3)
......
}
static struct mm_struct *dup_mm(struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm = current->mm;
int err;
mm = allocate_mm(); ------(1)
if (!mm)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm)); ------(2)
if (!mm_init(mm, tsk, mm->user_ns)) ------(3)
goto fail_nomem;
err = dup_mmap(mm, oldmm); ------(4)
if (err)
goto free_pt;
......
}
在讲解这个函数之前,先看下几个重要的结构体,具体的用法会在进程调度章节中有详细描述。
struct task_struct {
struct thread_info thread_info;
......
/* CPU-specific state of this task: */
struct thread_struct thread;
}
struct cpu_context {
unsigned long x19;
unsigned long x20;
unsigned long x21;
unsigned long x22;
unsigned long x23;
unsigned long x24;
unsigned long x25;
unsigned long x26;
unsigned long x27;
unsigned long x28;
unsigned long fp;
unsigned long sp;
unsigned long pc;
};
struct thread_struct {
struct cpu_context cpu_context; /* cpu context */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
unsigned int sve_vl; /* SVE vector length */
unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
};
struct user_pt_regs {
__u64 regs[31];
__u64 sp;
__u64 pc;
__u64 pstate;
};
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
u64 regs[31];
u64 sp;
u64 pc;
u64 pstate;
};
};
u64 orig_x0;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif
u64 orig_addr_limit;
u64 unused; // maintain 16 byte alignment
u64 stackframe[2];
};
int copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p)
{
struct pt_regs *childregs = task_pt_regs(p); ------(1)
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); ------(2)
......
if (likely(!(p->flags & PF_KTHREAD))) { ------(3)
*childregs = *current_pt_regs(); ------(4)
childregs->regs[0] = 0; ------(5)
......
} else { ------(6)
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h; ------(7)
if (IS_ENABLED(CONFIG_ARM64_UAO) &&
cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
p->thread.cpu_context.x19 = stack_start; ------(8)
p->thread.cpu_context.x20 = stk_sz; ------(9)
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork; ------(10)
p->thread.cpu_context.sp = (unsigned long)childregs; ------(11)
ptrace_hw_copy_thread(p);
return 0;
}
我们用一张图简单的总结下: