fs/binfmt_elf.c代碼如下:
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
struct file *interpreter = NULL; /* to shut gcc up */
unsigned long load_addr = 0, load_bias = 0;
int load_addr_set = 0;
char * elf_interpreter = NULL;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata;
unsigned long elf_bss, elf_brk;
int retval, i;
unsigned int size;
unsigned long elf_entry;
unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc = 0;
int executable_stack = EXSTACK_DEFAULT;
unsigned long def_flags = 0;
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc;
loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
retval = -ENOMEM;
goto out_ret;
}
loc->elf_ex = *((struct elfhdr *)bprm->buf);
retval = -ENOEXEC;
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out;
/ *判斷是否是可執行程式或者是共用庫,不是的話返回 * /
if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
if (!bprm->file->f_op||!bprm->file->f_op->mmap)
goto out;
/* 讀取的程式頭大小若與ELF標準的程式頭大小不一致,則返回 */
if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
goto out;
/* 程式表頭數目不合法,返回 */
if (loc->elf_ex.e_phnum < 1 ||
loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
goto out;
size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
retval = -ENOMEM;
/* 為得到的程式表頭分配空間 */
elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
/* 讀取可執行檔程式表頭的內容,最終調用vfs的read函數完成 */
retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
(char *)elf_phdata, size);
if (retval != size) {
if (retval >= 0)
retval = -EIO;
goto out_free_ph;
}
/* bss段,brk段先初始化為0 */
elf_ppnt = elf_phdata;
elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;
end_code = 0;
start_data = 0;
end_data = 0;
for (i = 0; i < loc->elf_ex.e_phnum; i++) {
if (elf_ppnt->p_type == PT_INTERP) {
/* 從PT_INTERP的段中找到所對應的動態連結器名稱,並載入動態連結器。通常是/lib/ld-linux.so.2. */
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < 2)
goto out_free_ph;
retval = -ENOMEM;
/* 為動態連接器分配空間並讀取載入 */
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph;
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= 0)
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
goto out_free_interp;
/* 設定elf可執行檔的特性 */
SET_PERSONALITY(loc->elf_ex);
/* 核心把新進程的堆棧中設定一些標記對,以指示動態連結器的相關操作,詳見open_exec實現 */
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp;
/*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
if (file_permission(interpreter, MAY_READ) < 0)
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
retval = kernel_read(interpreter, 0, bprm->buf,
BINPRM_BUF_SIZE);
if (retval != BINPRM_BUF_SIZE) {
if (retval >= 0)
retval = -EIO;
goto out_free_dentry;
}
loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
break;
}
/* 迴圈檢查所有的程式頭看是否有動態連接器 */
elf_ppnt++;
}
elf_ppnt = elf_phdata;
for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
if (elf_ppnt->p_type == PT_GNU_STACK) {
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
}
/* 檢查是否由動態連接器,無論是否有動態連接器都會執行elf檔案 */
if (elf_interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
} else {
/* Executables without an interpreter also need a personality */
SET_PERSONALITY(loc->elf_ex);
}
/* 在此清除掉了父進程的所有相關代碼 */
retval = flush_old_exec(bprm);
if (retval)
goto out_free_dentry;
/* OK, This is the point of no return */
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY(loc->elf_ex);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE;
arch_pick_mmap_layout(current->mm);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
/* 為下面的動態連接器執行擷取核心空間page */
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
}
current->mm->start_stack = bprm->p;
/* 按照先前擷取的程式頭表,迴圈將所有的可執行檔載入到記憶體中 */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
if (elf_ppnt->p_type != PT_LOAD)
continue;
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk (elf_bss + load_bias,
elf_brk + load_bias);
if (retval) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
}
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
/*
* This bss-zeroing can fail if the ELF
* file specifies odd protections. So
* we don't check the return value
*/
}
}
}
if (elf_ppnt->p_flags & PF_R)
elf_prot |= PROT_READ;
if (elf_ppnt->p_flags & PF_W)
elf_prot |= PROT_WRITE;
if (elf_ppnt->p_flags & PF_X)
elf_prot |= PROT_EXEC;
elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
vaddr = elf_ppnt->p_vaddr;
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
/* Try and get dynamic programs out of the way of the
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
#ifdef CONFIG_X86
load_bias = 0;
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, 0);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, 0);
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
if (!load_addr_set) {
load_addr_set = 1;
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
if (loc->elf_ex.e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
k = elf_ppnt->p_vaddr;
if (k < start_code)
start_code = k;
if (start_data < k)
start_data = k;
/*
* Check to see if the section's size will overflow the
* allowed task size. Note that p_filesz must always be
* <= p_memsz so it is only necessary to check p_memsz.
*/
if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
send_sig(SIGKILL, current, 0);
retval = -EINVAL;
goto out_free_dentry;
}
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk)
elf_brk = k;
}
/* 更新讀入記憶體中相關資訊的記錄 */
loc->elf_ex.e_entry += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
/* 使用set_brk調整bss段的大小 */
retval = set_brk(elf_bss, elf_brk);
if (retval) {
send_sig(SIGKILL, current, 0);
goto out_free_dentry;
}
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
send_sig(SIGSEGV, current, 0);
retval = -EFAULT; /* Nobody gets to see this, but.. */
goto out_free_dentry;
}
/* 核心把控制權傳遞給動態連結器。
動態連結器檢查程式對共用庫的依賴性,
並在需要時對其進行載入,由load_elf_interp完成 */
if (elf_interpreter) {
unsigned long uninitialized_var(interp_map_addr);
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
&interp_map_addr,
load_bias);
if (!IS_ERR((void *)elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
elf_entry += loc->interp_elf_ex.e_entry;
}
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
allow_write_access(interpreter);
fput(interpreter);
kfree(elf_interpreter);
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = -EINVAL;
goto out_free_dentry;
}
}
kfree(elf_phdata);
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
if (retval < 0) {
send_sig(SIGKILL, current, 0);
goto out;
}
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
/* 在記憶體中產生elf映射表 */
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0) {
send_sig(SIGKILL, current, 0);
goto out;
}
/* 調整記憶體映射內容 */
current->mm->end_code = end_code;
current->mm->start_code = start_code;
current->mm->start_data = start_data;
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
#ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#endif
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
down_write(¤t->mm->mmap_sem);
error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
up_write(¤t->mm->mmap_sem);
}
#ifdef ELF_PLAT_INIT
/*
* The ABI may specify that certain registers be set up in special
* ways (on i386 %edx is the address of a DT_FINI function, for
* example. In addition, it may also specify (eg, PowerPC64 ELF)
* that the e_entry field is the address of the function descriptor
* for the startup routine, rather than the address of the startup
* routine itself. This macro performs whatever initialization to
* the regs structure is required as well as any relocations to the
* function descriptor entries when executing dynamically links apps.
*/
ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
/* 開始執行程式,這時已經是子進程了 */
start_thread(regs, elf_entry, bprm->p);
retval = 0;
out:
kfree(loc);
out_ret:
return retval;
/* error cleanup */
out_free_dentry:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_interp:
kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}
針對以上核心中載入ELF的原始碼,可以分析ELF檔案載入過程如下:
1)核心首先讀取ELF檔案頭部,再讀如各種資料結構,從這些資料結構中可知各段或節的地址及標識,然後調用mmap()把找到的可載入段的內容載入到記憶體中。同時讀取區段標記,以標識該段在記憶體中是否可讀、可寫、可執行。其中,文本段是程式碼,唯讀且可執行,而資料區段是可讀且可寫。
2)從PT_INTERP的段中找到所對應的動態連結器名稱,並載入動態連結器。通常是/lib/ld-linux.so.2.
3)核心把新進程的堆棧中設定一些標記對,以指示動態連結器的相關操作。
4)核心把控制權傳遞給動態連結器。
5)動態連結器檢查程式對共用庫的依賴性,並在需要時對其進行載入。
6)動態連結器對程式的外部參考進行重定位,並告訴程式其引用的外部變數/函數的地址,此地址位於共用庫被載入在記憶體的區間內。動態連結還有一個延遲定位的特性,即只有在“真正”需要引用符號時才重定位,這對提高程式運行效率有極大協助。
7)動態連結器執行在ELF檔案中標記為.init的節的代碼,進行程式啟動並執行初始化。
8)動態連結器把控制傳遞給程式,從ELF檔案頭部中定義的程式進入點(main)開始執行。在a.out格式和ELF格式中,程式進入點的值是顯式存在的,而在COFF格式中則是由規範隱含定義。
9)程式開始執行.
其它相關資料:
Intel平台下Linux中ELF檔案動態連結的載入、解析及執行個體分析
http://www.ibm.com/developerworks/cn/linux/l-elf/part1/