Extract the ASM code for CPU detection from x264 and put it in the VC project.

Source: Internet
Author: User

Reference: http://blog.csdn.net/eagler_hzh/article/details/6550841

In fact, the function to be extracted is the void Xid _cpu_detect (void) in x264 \ common \ CPU. C. The source file

int x264_cpu_cpuid_test( void );void x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );void x264_cpu_xgetbv( uint32_t op, uint32_t *eax, uint32_t *edx );uint32_t x264_cpu_detect( void ){    uint32_t cpu = 0;    uint32_t eax, ebx, ecx, edx;    uint32_t vendor[4] = {0};    uint32_t max_extended_cap;    int cache;#if !ARCH_X86_64    if( !x264_cpu_cpuid_test() )        return 0;#endif    x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );    if( eax == 0 )        return 0;    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );    if( edx&0x00800000 )        cpu |= X264_CPU_MMX;    else        return 0;    if( edx&0x02000000 )        cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;    if( edx&0x04000000 )        cpu |= X264_CPU_SSE2;    if( ecx&0x00000001 )        cpu |= X264_CPU_SSE3;    if( ecx&0x00000200 )        cpu |= X264_CPU_SSSE3;    if( ecx&0x00080000 )        cpu |= X264_CPU_SSE4;    if( ecx&0x00100000 )        cpu |= X264_CPU_SSE42;    /* Check OXSAVE and AVX bits */    if( (ecx&0x18000000) == 0x18000000 )    {        /* Check for OS support */        x264_cpu_xgetbv( 0, &eax, &edx );        if( (eax&0x6) == 0x6 )            cpu |= X264_CPU_AVX;    }    if( cpu & X264_CPU_SSSE3 )        cpu |= X264_CPU_SSE2_IS_FAST;    if( cpu & X264_CPU_SSE4 )        cpu |= X264_CPU_SHUFFLE_IS_FAST;    x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );    max_extended_cap = eax;    if( !strcmp((char*)vendor, "AuthenticAMD") && max_extended_cap >= 0x80000001 )    {        cpu |= X264_CPU_SLOW_CTZ;        x264_cpu_cpuid( 0x80000001, &eax, &ebx, &ecx, &edx );        if( edx&0x00400000 )            cpu |= X264_CPU_MMXEXT;        if( cpu & X264_CPU_SSE2 )        {            if( ecx&0x00000040 ) /* SSE4a */            {                cpu |= X264_CPU_SSE2_IS_FAST;                cpu |= X264_CPU_LZCNT;                cpu |= X264_CPU_SHUFFLE_IS_FAST;                cpu &= ~X264_CPU_SLOW_CTZ;            }            else                cpu |= X264_CPU_SSE2_IS_SLOW;            if( ecx&0x00000080 ) /* Misalign SSE */            {                cpu |= X264_CPU_SSE_MISALIGN;                x264_cpu_mask_misalign_sse();            }        }    }    if( !strcmp((char*)vendor, "GenuineIntel") )    {        x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );        int family = ((eax>>8)&0xf) + ((eax>>20)&0xff);        int model  = ((eax>>4)&0xf) + ((eax>>12)&0xf0);        if( family == 6 )        {            /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")             * theoretically support sse2, but it's significantly slower than mmx for             * almost all of x264's functions, so let's just pretend they don't. */            if( model == 9 || model == 13 || model == 14 )            {                cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);                assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));            }            /* Detect Atom CPU */            else if( model == 28 )            {                cpu |= X264_CPU_SLOW_ATOM;                cpu |= X264_CPU_SLOW_CTZ;            }            /* Some Penryns and Nehalems are pointlessly crippled (SSE4 disabled), so             * detect them here. */            else if( model >= 23 )                cpu |= X264_CPU_SHUFFLE_IS_FAST;        }    }    if( (!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu&X264_CPU_SSE42))    {        /* cacheline size is specified in 3 places, any of which may be missing */        x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );        cache = (ebx&0xff00)>>5; // cflush size        if( !cache && max_extended_cap >= 0x80000006 )        {            x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );            cache = ecx&0xff; // cacheline size        }        if( !cache )        {            // Cache and TLB Information            static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };            static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67,                                                0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };            uint32_t buf[4];            int max, i = 0;            do {                x264_cpu_cpuid( 2, buf+0, buf+1, buf+2, buf+3 );                max = buf[0]&0xff;                buf[0] &= ~0xff;                for( int j = 0; j < 4; j++ )                    if( !(buf[j]>>31) )                        while( buf[j] )                        {                            if( strchr( cache32_ids, buf[j]&0xff ) )                                cache = 32;                            if( strchr( cache64_ids, buf[j]&0xff ) )                                cache = 64;                            buf[j] >>= 8;                        }            } while( ++i < max );        }        if( cache == 32 )            cpu |= X264_CPU_CACHELINE_32;        else if( cache == 64 )            cpu |= X264_CPU_CACHELINE_64;        else            x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );    }#if BROKEN_STACK_ALIGNMENT    cpu |= X264_CPU_STACK_MOD4;#endif    return cpu;}

Int x264_cpu_cpuid_test (void );
Void x264_cpu_cpuid (uint32_t op, uint32_t * eax, uint32_t * EBX, uint32_t * ECx, uint32_t * EDX );
Void x264_cpu_xgetbv (uint32_t op, uint32_t * eax, uint32_t * EDX );

These three functions are implemented in ASM, location x264 \ common \ x86 \ cpu-a.asm

The cpu-a.asm references x264 \ common \ x86 \ x86inc. ASM, directly copy all the content of x86inc. ASM to the cpu-a.asm.

Then add the cpu-a.asm to the VC project, install NASM (http://www.nasm.us/), right-click the cpu-a.asm file in the VC solution file list and choose Properties from the following settings:

I am a VC dialog box project, and C functions are included in x264. Therefore, when using the function, add extern "C ". A function in CPU-A is defined

;-----------------------------------------------------------------------------; void cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx );-----------------------------------------------------------------------------cglobal cpu_cpuid, 5,7    push rbx    push  r4    push  r3    push  r2    push  r1    mov  eax, r0d    cpuid    pop  rsi    mov [rsi], eax    pop  rsi    mov [rsi], ebx    pop  rsi    mov [rsi], ecx    pop  rsi    mov [rsi], edx    pop  rbx    RET

However, if you want to use it outside, it is void x264_cpu_cpuid (...), the macro cglobal has changed its name, starting with % define program_name x264. What is its last name, dumpbin/All XXX. OBJ.

Call:

// Check the CPU code extern "C" Void x264_cpu_cpuid (INT op, int * eax, int * EBX, int * ECx, int * EDX); void cdialogdlg: onbnclickedbuttoncpudec () {// todo: add the control notification handler code int CPU = 0; int eax, EBX, ECx, EDX; int vendor [4] = {0}; int max_extended_cap; int cache; x1__cpu_cpuid (0, & eax, vendor + 0, vendor + 2, vendor + 1); If (eax = 0) return; // continue operation}

Or not. Directly in the command line

nasm -f win32 -DPREFIX cpu-a.asm

Come out of the cpu-a.obj, and then use the tool provided by VC

lib cpu-a.obj

I came out of the cpu-a.lib, and then threw lib to the VC Project LINK, In the extern "C" declaration function can also be used.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.