Express unsigned integer

Source: Internet
Author: User

I checked the uint32sqrt test program packaged by liangbch and found it difficult to further improve the FPU type. However, I barely made some minor changes and provided four versions with isqrt_fpu2_yaos and isqrt_fpu1_lbc.

 

Code:# Include <stdio. h> <br/> # include <time. h> <br/> typedef unsigned int DWORD; <br/> double B32 [] = {0.0, 4294967296.0}; </P> <p >__ declspec (naked) <br/> DWORD _ fastcall isqrt_fpu2_yaos (dword n) <br/>{< br/>__ ASM <br/>{< br/> push ECx <br/> mov eax, ECx <br/> and eax, 0x80000000 <br/> SHR eax, 31 <br/> define qword PTR [B32 + eax * 8] <br/> fild dword ptr [esp] <br/> faddp ST (1 ), st <br/> fsqrt <br/> sub ESP, 8 <br/> fstp qword PTR [esp] <br/> mov edX, dword ptr [esp + 4] <br/> mov eax, EDX <br/> and EDX, 0x7ff00000 <br/> and eax, 0 xfffff <br/> SHR edX, 20 <br/> or eax, 0x100000 <br/> xchg ECx, EDX <br/> sub ECx, 1043 <br/> neg ECx <br/> SHR eax, CL <br/> xchg edX, ECx <br/> Add ESP, 12 <br/> or ECX, ECx <br/> cmove eax, ECX <br/> RET <br/>}</P> <p> _ declspec (naked) <br/> DWORD fast_sqrt1 (dword x) <br/>{< br/> _ ASM <br/> {<br/> sub ESP, 4 </P> <p> mov dword ptr [esp + 12], 0 <br/> fild qword PTR [esp + 8] <br/> fsqrt <br/> fisttp dword ptr [esp] <br/> mov eax, [esp] <br/> Add ESP, 4 <br/> RET <br/>}</P> <p> _ declspec (naked) <br/> DWORD fast_sqrt2 (dword x) <br/>{< br/> _ ASM <br/>{</P> <p> Add ESP, 12 </P> <p> mov dword ptr [esp-4], 0 <br/> fild qword PTR [esp-8] </P> <p> fsqrt <br/> fstp qword PTR [esp] </P> <p> mov ECx, [esp + 4]; // exponential processing <br/> mov eax, ECx; // processing the ending number <br/> SHR ECx, 20 <br/> and eax, 0 xfffff <br/> sub ESP, 12 <br/> or eax, 0x100000 <br/> test ECx, ECx; // processing 0 <br/> cmove eax, ECx <br/> sub ECx, 1075 <br/> neg ECx <br/> SHR eax, CL </P> <p> RET <br/>}</P> <p> _ declspec (naked) <br/> DWORD fast_sqrt3 (dword x) <br/>{< br/> _ ASM <br/>{</P> <p> Add ESP, 4 </P> <p> mov eax, dword ptr [esp] <br/> and eax, 0x80000000 <br/> SHR eax, 28 <br/> define qword PTR [B32 + eax] <br/> fild dword ptr [esp] <br/> faddp ST (1 ), st </P> <p> fsqrt </P> <p> fstp qword PTR [esp + 4] </P> <p> mov ECx, [esp + 8]; // exponential processing <br/> mov eax, ECx; // processing the ending number <br/> SHR ECx, 20 <br/> and eax, 0 xfffff <br/> sub ESP, 4 <br/> or eax, 0x100000 <br/> test ECx, ECx; // processing 0 <br/> cmove eax, ECx <br/> sub ECx, 1075 <br/> neg ECx <br/> SHR eax, CL </P> <p> RET <br/>}< br/>__ declspec (naked) <br/> DWORD _ fastcall fast_sqrt4 (dword n) <br/>{< br/>__ ASM <br/>{< br/> push ECx <br/> SHR ECx, 31 <br/> define qword PTR [B32 + ECx * 8] <br/> fild dword ptr [esp] <br/> faddp ST (1 ), st <br/> fsqrt </P> <p> fisttp dword ptr [esp] <br/> pop eax <br/> RET <br/>}< br/>} </P> <p> double zero5 = 0.49999999999636 ;; </P> <p >__ declspec (naked) <br/> DWORD _ fastcall isqrt_fpu1_lbc (dword n) <br/>{< br/>__ ASM <br/>{< br/> push ECx </P> <p> SHR ECx, 31 <br/> define qword PTR [B32 + ECx * 8] <br/> fild dword ptr [esp] <br/> faddp ST (1 ), st <br/> fsqrt <br/> fsub qword PTR [zero5] <br/> fistp dword ptr [esp] <br/> pop eax <br/> RET <br/ >}< br/>}</P> <p> int main () <br/>{< br/> double T0, T1; <br/> dword I; </P> <p> printf ("elapsed time:/N "); <br/> // =============================< br/> fast_sqrt1 (0 ); <br/> t0 = clock (); </P> <p> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> fast_sqrt1 (I); <br/>}< br/> printf ("fast_sqrt1: % F S/N ", (Clock ()-T0)/clocks_per_sec ); <br/> // =============================< br/> fast_sqrt2 (0 ); <br/> t0 = clock (); </P> <p> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> fast_sqrt2 (I); <br/>}< br/> printf ("fast_sqrt2: % F S/N ", (Clock ()-T0)/clocks_per_sec ); <br/> // =============================< br/> isqrt_fpu2_yaos (0 ); <br/> t0 = clock (); <br/> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> isqrt_fpu2_yaos (I); <br/>}< br/> printf ("isqrt_fpu2_yaos: % F S/N ", (Clock ()-T0)/clocks_per_sec ); <br/> // =============================< br/> fast_sqrt3 (0 ); <br/> t0 = clock (); <br/> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> fast_sqrt3 (I); <br/>}< br/> printf ("fast_sqrt3: % F S/N ", (Clock ()-T0)/clocks_per_sec ); <br/> // =============================< br/> fast_sqrt4 (0 ); <br/> t0 = clock (); <br/> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> fast_sqrt4 (I); <br/>}< br/> printf ("fast_sqrt4: % F S/N ", (Clock ()-T0)/clocks_per_sec ); <br/> // =============================< br/> isqrt_fpu1_lbc (0 ); <br/> t0 = clock (); <br/> for (I = 0; I <= 0 xfffffff; I ++) // test <br/>{< br/> isqrt_fpu1_lbc (I); <br/>}< br/> printf ("isqrt_fpu1_lbc: % F S/N ", (Clock ()-T0)/clocks_per_sec ); </P> <p> // ===============================< br /> printf ("/n/nboundary test. /n "); </P> <p> printf (" fast_sqrt1 (0) = % 10u/N ", fast_sqrt1 (0 )); <br/> printf ("fast_sqrt1 (0 xffffffff) = % u/n", fast_sqrt1 (0 xffffffffff )); </P> <p> printf ("fast_sqrt2 (0) = % 10u/N", fast_sqrt2 (0); <br/> printf ("fast_sqrt2 (0 xffffffffff) = % u/n ", fast_sqrt2 (0 xffffffff); </P> <p> printf (" fast_sqrt3 (0) = % 10u/N ", fast_sqrt3 (0); <br/> printf ("fast_sqrt3 (0 xffffffff) = % u/n", fast_sqrt3 (0 xffffffff )); </P> <p> printf ("fast_sqrt4 (0) = % 10u/N", fast_sqrt4 (0); <br/> printf ("fast_sqrt4 (0 xffffffffff) = % u/N ", fast_sqrt4 (0 xffffffff); </P> <p> printf (" isqrt_fpu1_lbc (0) = % 10u/N ", isqrt_fpu1_lbc (0); <br/> printf ("isqrt_fpu1_lbc (0 xffffffff) = % u/N", isqrt_fpu1_lbc (0 xffffffffff )); </P> <p> return 0; </P> <p >}< br/>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.