根据向勇老师的指示,在这里分享一下我跟踪do_fork()及相关创建线程代码的过程。主要内容分为do_fork()主要涉及的子函数功能分析,以及完成do_fork()之后如何启动新的线程init。
Breakpoint 2, do_fork (clone_flags=256, stack=0, tf=0xc0126f54) at kern/process/proc.c:279
279 do_fork(uint32_t clone_flags, uintptr_t stack, struct trapframe *tf) {
(gdb) l
274 * @clone_flags: used to guide how to clone the child process
275 * @stack: the parent's user stack pointer. if stack==0, It means to fork a kernel thread.
276 * @tf: the trapframe info, which will be copied to child process's proc->tf
277 */
278 int
279 do_fork(uint32_t clone_flags, uintptr_t stack, struct trapframe *tf) {
280 int ret = -E_NO_FREE_PROC;
281 struct proc_struct *proc;
282 if (nr_process >= MAX_PROCESS) {
283 goto fork_out;
(gdb) n
这是初始界面,用list命令查看代码上下文,用next进行下一条代码(不进入函数),用step进行下一条代码(会进入函数)。(gdb) n 280 int ret = -E_NO_FREE_PROC; (gdb) p ret $1 = 1211072
struct proc_struct *proc = kmalloc(sizeof(struct proc_struct));
if (proc != NULL) {
proc->state = PROC_UNINIT;
proc->pid = -1;
proc->runs = 0;
proc->kstack = 0;
proc->need_resched = 0;
proc->parent = NULL;
proc->mm = NULL;
memset(&(proc->context), 0, sizeof(struct context));
proc->tf = NULL;
proc->cr3 = boot_cr3;
proc->flags = 0;
memset(proc->name, 0, PROC_NAME_LEN);
}
return proc;
可以看出是对TCB的变量进行初始设置。 struct Page *page = alloc_pages(KSTACKPAGE);
if (page != NULL) {
proc->kstack = (uintptr_t)page2kva(page);
return 0;
}
return -E_NO_MEM;
这里很重要的一点是调用之前我们实现的alloc_pages,并且把proc->kstack指向新的页面,page2kva会算出page指针相对于pages这个起始地址的偏移,进而可以得到其相对的内核虚拟地址。如果页面内存不足,则返回失败。static void
copy_thread(struct proc_struct *proc, uintptr_t esp, struct trapframe *tf) {
proc->tf = (struct trapframe *)(proc->kstack + KSTACKSIZE) - 1;
*(proc->tf) = *tf;
proc->tf->tf_regs.reg_eax = 0;
proc->tf->tf_esp = esp;
proc->tf->tf_eflags |= FL_IF;
proc->context.eip = (uintptr_t)forkret;
proc->context.esp = (uintptr_t)(proc->tf);
}
proc->context.eip = (uintptr_t)forkret;
proc->context.esp = (uintptr_t)(proc->tf);
这里十分重要,会设置指令指针eip为forkret函数,esp为父进程的中断栈帧,这样进入子进程后就会调用forkret函数,具体等我们后面跟踪到子进程启动再说。
ide_init(); // init ide devices
swap_init(); // init swap
clock_init(); // init clock interrupt
intr_enable(); // enable irq interrupt
cpu_idle(); // run idle process
这一系列初始化,而cpu_idle()看名字就知道是我们所关心的了,进去之后,
void
cpu_idle(void) {
while (1) {
if (current->need_resched) {
schedule();
}
}
}
我们发现他会循环判断当前进程/线程current->need_resched是否为真。在之前跟踪的过程中,在alloc_pro()函数里,我们初始化的子线程的need_resched设置的为0,而在proc_init()函数里:
idleproc->pid = 0;
idleproc->state = PROC_RUNNABLE;
idleproc->kstack = (uintptr_t)bootstack;
idleproc->need_resched = 1;
set_proc_name(idleproc, "idle");
nr_process ++;
current = idleproc;
我们可以看到idleproc也即空闲进程的need_resched初始是为1的,所current->need_resched为1,所以会在cpu_idle()里判断为真,进入schedule()函数。
load_esp0(next->kstack + KSTACKSIZE); lcr3(next->cr3); switch_to(&(prev->context), &(next->context));
这三条语句就是proc_run()的核心,第一条语句修改TSS任务状态栈,将TSS的ts_esp0(stack pointers and segment selectors)指向下一个进程的堆栈空间(不知这里和copy_thread()里设置proc->tf = (*trap_frame)(proc->kstack + KSTACKSIZE) - 1 有什么关联?暂时没有理解)。第二条语句修改cr3,即页表基址。第三条语句进行切换,这里便是IDE无法继续查看调用的地方了,而用gdb仍可以很方便地跟踪。
switch_to () at kern/process/switch.S:6
6 movl 4(%esp), %eax # eax points to from
(gdb) s
7 popl 0(%eax) # save eip !popl
switch_to () at kern/process/switch.S:8
8 movl %esp, 4(%eax)
9 movl %ebx, 8(%eax)
10 movl %ecx, 12(%eax)
11 movl %edx, 16(%eax)
12 movl %esi, 20(%eax)
13 movl %edi, 24(%eax)
14 movl %ebp, 28(%eax)
17 movl 4(%esp), %eax # not 8(%esp): popped return address already
19 movl 28(%eax), %ebp
switch_to () at kern/process/switch.S:20
20 movl 24(%eax), %edi
21 movl 20(%eax), %esi
22 movl 16(%eax), %edx
23 movl 12(%eax), %ecx
24 movl 8(%eax), %ebx
25 movl 4(%eax), %esp
27 pushl 0(%eax) # push eip
switch_to () at kern/process/switch.S:29
29 ret
forkret () at kern/process/proc.c:193
193 forkret(void) {
forkret () at kern/process/proc.c:194
194 forkrets(current->tf);
forkrets () at kern/trap/trapentry.S:48
48 movl 4(%esp), %esp
(gdb) l
43 iret
44
45 .globl forkrets
46 forkrets:
47 # set stack to this new process's trapframe
48 movl 4(%esp), %esp
49 jmp __trapret
(gdb) s
forkrets () at kern/trap/trapentry.S:49
49 jmp __trapret
(gdb)
__trapret () at kern/trap/trapentry.S:33
33 popal
(gdb)
__trapret () at kern/trap/trapentry.S:36
36 popl %gs
37 popl %fs
38 popl %es
39 popl %ds
42 addl $0x8, %esp
43 iret
5 pushl %edx # push arg
6 call *%ebx # call fn
(gdb) s
init_main (arg=<error reading variable: Unknown argument list address for `arg'.>)
at kern/process/proc.c:359
359 init_main(void *arg) {
可以看到首先是我们讨论很多的两段代码,取esp+4(movl 4(%esp), %eax),然后保存现场,存当前一系列寄存器到第一个参数的内存空间里,然后取esp+8(movl 4(%esp), %eax),这里由于已经pop过了,所以只需继续+4,然后将第二个参数的内存空间里的值赋给一系列寄存器。事实上两个参数就是&(prev->context), &(next->context),保存了各自的上下文。
42 addl $0x8, %esp (gdb) 43 iret (gdb) 5 pushl %edx # push arg (gdb) 6 call *%ebx # call fn
这里将esp加8,执行中断返回iret,跳转到文件第5、6行,但是并不是原文件的5、6行...后来我发现是entry.S的5、6行,这里如何自动跳转的我并不明了。总之call *%ebx就进入了init_main(),就开始执行子进程的代码了。
(gdb) s
kernel_thread_entry () at kern/process/entry.S:8
8 pushl %eax # save the return value of fn(arg)
(gdb)
9 call do_exit # call do_exit to terminate current thread
(gdb)
do_exit (error_code=<error reading variable: Unknown argument list address for `error_code'.>)
at kern/process/proc.c:353
353 do_exit(int error_code) {
(gdb)
do_exit (error_code=0) at kern/process/proc.c:354
354 panic("process exit!!.\n");
(gdb)