The so-called object-oriented and inheritance, polymorphism, and so on are only implemented at the language level, and machines do not even know this when executing programs. The purpose of this blog is to discuss how the compiler implements C ++ virtual functions.
First, we will discuss the simplest situation:
The Code is as follows: vtable. cpp:
#include <stdio.h>class Base{public: Base(){ puts("Base()"); } virtual void fun1() { puts("fun1()"); } virtual void fun2() { puts("fun2()"); } virtual void fun3() { puts("fun3()"); } void fun4(){ puts("Normal fun4()"); } ~Base(){ puts("~Base()"); }};typedef void (*FUN) (void);int main(){ FUN pfun1 = NULL; FUN pfun2 = NULL; FUN pfun3 = NULL; Base b; //printf("sizeof b = %d\n",sizeof b); printf("&b = %p\n",(&b)); printf("vtable = %p\n",(int *)(&b)); //printf("b = 0x%x\n",*(int*)(&b)); pfun1 = (FUN)**(int**)(&b); printf("fun1 = %p\n",pfun1); pfun1(); pfun2 = (FUN)*(*(int**)(&b)+1); printf("fun2 = %p\n",pfun2); pfun2(); pfun3 = (FUN)*(*(int**)(&b)+2); printf("fun3 = %p\n",pfun3); pfun3(); b.fun4(); return 0;}
Gcc-s vtable. cpp generates vtable. s
The annotations are very detailed. You only need to know a few simple assembly commands to understand them (mov call RET push pop)
# Annotations
. File "vtable. CPP ". section. rodata # read-only data segment. lc0 :. string "base ()" # The definition string is equivalent to Char lc0 [] = "base ()";. section. text. _ zn4basec1ev, "axg", @ progbits, _ zn4basec1ev, comdat. align 2. weak_zn4basec1ev.type_zn4basec1ev, @ function_zn4basec1ev: # base constructor. lfb2 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl8 (% EBP), % eax # Here we need to talk a little more about it. Whether it is easy to lose off is confusing. 8 (% EBP) is the memory at the ESP + 8 position. Why should we skip 8 bytes? #1. when the function is called, The % EIP register will be saved, which is used to return the correct location (Cache overflow is used to rewrite the value of % EIP saved in the stack, returns the position you want), #2. add the four bytes of pushl % EBP. In this way, 8 (% EBP) actually saves the this pointer movl $ _ ztv4base + 8, (% eax) # This is the key equivalent to * This = $ _ ztv4base + 8; # $ _ ztv4base is the address with the _ ztv4base label, and adding 8 is exactly the address of the virtual function fun1 () movl $. lc0, (% ESP) # Prepare the stack callputs # function call leave. cfi_restore 5. cfi_def_cfa 4, 4ret. cfi_endproc.lfe2 :. size_zn4basec1ev ,. -_ zn4basec1 Ev. section. rodata. LC1 :. string "fun1 ()". section. text. _ zn4base4fun1ev, "axg", @ progbits, _ zn4base4fun1ev, comdat. align 2. weak_zn4base4fun1ev.type_zn4base4fun1ev, @ function_zn4base4fun1ev :. lfb3 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl $. LC1, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4R ET. cfi_endproc.lfe3 :. size_zn4base4fun1ev ,. -_ zn4base4fun1ev. section. rodata. LC2 :. string "fun2 ()". section. text. _ zn4base4fun2ev, "axg", @ progbits, _ zn4base4fun2ev, comdat. align 2. weak_zn4base4fun2ev.type_zn4base4fun2ev, @ function_zn4base4fun2ev :. lfb4 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl $. LC2, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4ret. cfi_endproc.lfe4 :. size_zn4base4fun2ev ,. -_ zn4base4fun2ev. section. rodata. lc3 :. string "fun3 ()". section. text. _ zn4base4fun3ev, "axg", @ progbits, _ zn4base4fun3ev, comdat. align 2. weak_zn4base4fun3ev.type_zn4base4fun3ev, @ function_zn4base4fun3ev :. lfb5 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. Cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl $. lc3, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4ret. cfi_endproc.lfe5 :. size_zn4base4fun3ev ,. -_ zn4base4fun3ev. section. rodata. LC4 :. string "normal fun4 ()". section. text. _ zn4base4fun4ev, "axg", @ progbits, _ zn4base4fun4ev, comdat. align 2. weak_zn4base4fun4ev.type_zn4base4fun4ev, @ function_zn4base4fun4ev :. lfb6 :. cfi_startproc.cfi_personality 0x0 ,_ _ Gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl $. LC4, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4ret. cfi_endproc.lfe6 :. size_zn4base4fun4ev ,. -_ zn4base4fun4ev. section. rodata. LC5 :. string "~ Base ()". section. text. _ zn4based1ev, "axg", @ progbits, _ zn4based1ev, comdat. align 2. weak_zn4based1ev.type_zn4based1ev, @ function_zn4based1ev :. lfb9 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl8 (% EBP), % eaxmovl $ _ ztv4base + 8, (% eax) movl $. LC5, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4ret. cf I _endproc.lfe9 :. size_zn4based1ev ,. -_ zn4based1ev. section. rodata. lc6 :. string "& B = % P \ n ". lc7 :. string "vtable = % P \ n ". lc8 :. string "fun1 = % P \ n ". LC9 :. string "fun2 = % P \ n ". lc10 :. string "fun3 = % P \ n ". globl _ unwind_resume.text.globl main. typemain, @ functionmain: # in the main call, for ease of reading, some weihu. lfb10: pushl % ebpmovl % ESP, % ebpandl $-16, % esppushl % esipushl % ebxsubl $40, % espmovl $0, 20 (% ESP) # pfun1movl $0, 24 (% ESP) # pfun2mo Vl $0, 28 (% ESP) # pfun3leal16 (% ESP), % eax # % ESP stores the stack pointer, the command is to add % ESP plus 16 to the % eax register, so that the address movl % eax (% ESP) of address B is saved in % eax) # Save the address B in the memory directed by % esp for the following function calls, which is equivalent to a hidden this pointer. lehb0 :. cfi_escape 0x10, 0x3, 0x8, 0x75, 0x0 0x9, 0xf0, 0x1a, 0x9, 0xf8, 0x22. cfi_escape 0x10, 0x6, 0x8, 0x75, 0x0 0x9, 0xf0, 0x1a, 0x9, 0xfc, 0x22 # These 22 bytes prevent stack overflow. See "deep understanding of computer systems" call_zn4basec1ev # call the base constructor % eax with the value $ _ ztv4base + 8 ,. long declares a 4-byte integer, so the actual value stored by % eax is _ zn4base4fun1ev, that is, B The address of the first virtual function in the ASE class # It is understood in the C method. The value of B after the constructor is the virtual function table ($ _ ztv4base + 8 ), the virtual function table refers to an array that stores the function address, so int ** is used **. lehe0: leal16 (% ESP), % eax # address of B movl % eax, 4 (% ESP) # Prepare stack movl for function call $. lc6, (% ESP) # Prepare a stack for the function call. lehb1: callprintf # Print & bleal16 (% ESP), % eaxmovl % eax, 4 (% ESP) movl $. lc7, (% ESP) callprintf # prints vtableleal16 (% ESP), % eaxmovl (% eax), % eaxmovl (% eax ), % eax # transfer the vtable address to % eaxmovl % eax, 20 (% ESP) # pfun1 = (fun) ** (INT **) (& B ); movl20 (% ESP), % eax Movl % eax, 4 (% ESP) # Prepare the stack movl for function calls $. lc8, (% ESP) # Prepare the stack callprintf # printf ("fun1 = % P \ n", pfun1) for function calls; movl20 (% ESP ), % eax # After the function is called, % eax will change and get the call address * % eax # pfun1 (); leal16 (% ESP), % eaxmovl (% eax ), % eaxaddl $4, % eax # * (INT **) (& B) + 1 movl (% eax), % eaxmovl % eax, 24 (% ESP) # pfun2 = (fun) * (INT **) (& B) + 1); movl24 (% ESP), % eaxmovl % eax, 4 (% ESP) movl $. LC9, (% ESP) callprintf # printf ("fun2 = % P \ n", pfun2); movl24 (% ESP), % EA Xcall * % eax # pfun2 (); leal16 (% ESP), % eaxmovl (% eax), % eaxaddl $8, % eax # * (INT **) (& B) + 2; movl (% eax), % eaxmovl % eax, 28 (% ESP) # pfun3 = (fun) * (INT **) (& B) + 2); movl28 (% ESP), % eaxmovl % eax, 4 (% ESP) movl $. lc10, (% ESP) callprintf # printf ("fun3 = % P \ n", pfun3); movl28 (% ESP), % eaxcall * % eax # pfun3 (); leal16 (% ESP), % eaxmovl % eax, (% ESP) call_zn4base4fun4ev # B. fun4 ();. lehe1: movl $0, % ebxleal16 (% ESP), % eaxmovl % eax, (% ESP ). le Hb2: call_zn4based1ev # Call the destructor. lehe2: movl % EBX, % eax # below are some stack restoration operations addl $40, % esppopl % EBX. cfi_remember_state.cfi_restore 3 popl % ESI. cfi_restore 6 movl % EBP, % ESP. cfi_def_cfa_register 4 popl % EBP. cfi_restore 5. cfi_def_cfa_offset 4ret. l17 :. cfi_restore_state.l15: movl % edX, % ebxmovl % eax, % esileal16 (% ESP), % eaxmovl % eax, (% ESP) limit % ESI, % eaxmovl % EBX, % edxmovl % eax, (% ESP ). lehb3: call_unwind_resume.lehe 3 :. cfi_endproc.lfe10 :. sizemain ,. -Main. globl _ gxx_personality_v0.section.gcc_effect_table, "A", @ progbits. llsda10 :. byte0xff. byte0xff. byte0x1. uleb128. LLSDACSE10-.LLSDACSB10.LLSDACSB10 :. uleb128. LEHB0-.LFB10. uleb128. LEHE0-.LEHB0. uleb128 0x0. uleb128 0x0. uleb128. LEHB1-.LFB10. uleb128. LEHE1-.LEHB1. uleb128. l17-.LFB10. uleb128 0x0. uleb128. LEHB2-.LFB10. uleb128. LEHE2-.LEHB2. uleb128 0x0. uleb128 0x0. uleb 128. LEHB3-.LFB10. uleb128. LEHE3-.LEHB3. uleb128 0x0. uleb128 0x0. llsdacse10 :. text. weak_ztv4base.section.rodata. _ ztv4base, "ag", @ progbits, _ ztv4base, comdat. align 8. type_ztv4base, @ object. size_ztv4base, 20_ztv4base :. long0.long _ zti4base. long_zn4base4fun1ev # $ _ ztv4base + 8 virtual function table address. long_zn4base4fun2ev.long_zn4base4fun3ev.weak_zts4base.section.rodata. _ zts4base, "ag", @ progbits, _ zts4base, comdat. type_zts4base ,@ Object. size_zts4base, 6_zts4base :. string "4 base ". weak_zti4base.section.rodata. _ zti4base, "ag", @ progbits, _ zti4base, comdat. align 4. type_zti4base, @ object. size_zti4base, 8_zti4base :. long_ztvn10 _ cxxabiv117 _ class_type_infoe + 8. long_zts4base.ident "GCC: (GNU) 4.4.2 20091027 (Red Hat 4.4.2-7 )". section. note. GNU-stack, "", @ progbits to better understand the implicit parameter This, change fun2 to: Virtual void fun2 () {fun1 (); puts ("fun2 ()");} corresponding assembly code. Section. rodata. LC2 :. string "fun2 ()". section. text. _ zn4base4fun2ev, "axg", @ progbits, _ zn4base4fun2ev, comdat. align 2. weak_zn4base4fun2ev.type_zn4base4fun2ev, @ function_zn4base4fun2ev :. lfb4 :. cfi_startproc.cfi_personality 0x0 ,__ gxx_personality_v0pushl % EBP. cfi_def_cfa_offset 8 movl % ESP, % EBP. cfi_offset 5,-8. cfi_def_cfa_register 5 subl $24, % espmovl8 (% EBP), % eax # implicit parameter This movl (% eax), % eax movl (% eax), % edxmov L8 (% EBP), % eaxmovl % eax, (% ESP) # implicit this parameter call * % edX # fun1 (); this stores vtable, the first element of vtable is fun1 address movl $. LC2, (% ESP) callputsleave. cfi_restore 5. cfi_def_cfa 4, 4ret. cfi_endproc.lfe4 :. size_zn4base4fun2ev ,. -_ zn4base4fun2ev still uses the original call at this time. Because this is not used as the parameter, the call code is incorrectly modified in call * % EDX. // pfun2 (); B. fun2 (); the corresponding assembly code is changed to leal16 (% ESP), % eax # This is put into eaxmovl % eax, (% ESP) # This put into the stack call_zn4base4fun2ev if you change fun1 to fun4 virtual voi D fun2 () {fun4 (); puts ("fun2 ()") ;}then there is no problem with the initial call (there is no this pointer for implicit parameter passing ), because no virtual function or member variable is called in fun4, this pointer is not used.
The base memory model is as follows: