使用版本: linux-2.6.22.9
Fork的系統調用代碼在linux/arch/i386/kernel/process.c中:
asmlinkage int sys_fork(struct pt_regs regs) { return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); } |
Sys_fork系統調用通過 do_fork()函數實現,通過對do_fork()函數傳遞不同的clone_flags來實現fork,clone,vfork。
Syn_clone和syn_vfork的系統調用代碼如下:
asmlinkage int sys_clone(struct pt_regs regs) { unsigned long clone_flags; unsigned long newsp; int __user *parent_tidptr, *child_tidptr; clone_flags = regs.ebx; newsp = regs.ecx; parent_tidptr = (int __user *)regs.edx; child_tidptr = (int __user *)regs.edi; if (!newsp) newsp = regs.esp; return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); } asmlinkage int sys_vfork(struct pt_regs regs) { return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); } |
其中clone_flas在include\linux\sched.h中定義
/* * cloning flags: */ #define CSIGNAL 0x000000ff /* 進程退出時需要傳遞的訊號*/ #define CLONE_VM 0x00000100 /* 父子進程共用地址空間 */ #define CLONE_FS 0x00000200 /* 父子進程共用檔案系統資訊 */ #define CLONE_FILES 0x00000400 /* 父子進程共用已開啟的檔案 */ #define CLONE_SIGHAND 0x00000800 /* 父子進程共用訊號處理 */ #define CLONE_PTRACE 0x00002000 /* 繼續調試子進程 */ #define CLONE_VFORK 0x00004000 /* 調用vfork(),父進程休眠*/ #define CLONE_PARENT 0x00008000 /* 設定一個共有的父進程 */ #define CLONE_THREAD 0x00010000 /* 父子進程在同一個線程組 */ #define CLONE_NEWNS 0x00020000 /* 為子進程建立一個新的命名空間 */ #define CLONE_SYSVSEM 0x00040000 /* 父子進程共用system V SEM_UNDO */ #define CLONE_SETTLS 0x00080000 /* 為子進程建立新的TLS */ #define CLONE_PARENT_SETTID 0x00100000 /* 設定父進程TID */ #define CLONE_CHILD_CLEARTID 0x00200000 /* 清除子進程TID */ #define CLONE_DETACHED 0x00400000 /* Unused, ignored */ #define CLONE_UNTRACED 0x00800000 /* 不允許調試子進程 */ #define CLONE_CHILD_SETTID 0x01000000 /* 設定子進程TID */ #define CLONE_STOPPED 0x02000000 /* 設定進程停止狀態 */ #define CLONE_NEWUTS 0x04000000 /* 建立新的utsname組 */ #define CLONE_NEWIPC 0x08000000 /* 建立新的IPC */ |
Do_fork()在kernel/fork.c中定義,代碼如下:
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; struct pid *pid = alloc_pid(); long nr; if (!pid) return -EAGAIN; nr = pid->nr; if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); if (trace) clone_flags |= CLONE_PTRACE; } p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!(clone_flags & CLONE_STOPPED)) wake_up_new_task(p, clone_flags); else p->state = TASK_STOPPED; if (unlikely (trace)) { current->ptrace_message = nr; ptrace_notify ((trace << 8) | SIGTRAP); } if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { current->ptrace_message = nr; ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); } } } else { free_pid(pid); nr = PTR_ERR(p); } return nr; } |
Do_fork()函數的核心是copy_process()函數,該函數完成了進程建立的絕大部分工作並且也在fork.c定義,copy_process函數較長,逐段往下看:
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_TRACE_IRQFLAGS DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif |
這段代碼首先對傳入的clone_flag進行檢查,接著調用了dup_task_struct()函數,該函數的主要作用是:為子進程建立一個新的核心棧,複製task_struct結構和thread_info結構,這裡只是對結構完整的複製,所以子進程的進程描述符跟父進程完全一樣。跟進dup_task_struct()函數看代碼:
static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; prepare_to_copy(orig); tsk = alloc_task_struct(); if (!tsk) return NULL; ti = alloc_thread_info(tsk); if (!ti) { free_task_struct(tsk); return NULL; } *tsk = *orig; tsk->stack = ti; setup_thread_stack(tsk, orig); #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); #endif /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); atomic_set(&tsk->fs_excl, 0); #ifdef CONFIG_BLK_DEV_IO_TRACE tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; return tsk; } |