Express unsigned integer

Last Update:2018-12-05 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

I checked the uint32sqrt test program packaged by liangbch and found it difficult to further improve the FPU type. However, I barely made some minor changes and provided four versions with isqrt_fpu2_yaos and isqrt_fpu1_lbc.

Code:# Include <stdio. h> # include <time. h> typedef unsigned int DWORD; double B32 [] = {0.0, 4294967296.0}; __ declspec (naked) DWORD _ fastcall isqrt_fpu2_yaos (dword n) { __ ASM { push ECx mov eax, ECx and eax, 0x80000000 SHR eax, 31 define qword PTR [B32 + eax * 8] fild dword ptr [esp] faddp ST (1 ), st fsqrt sub ESP, 8 fstp qword PTR [esp] mov edX, dword ptr [esp + 4] mov eax, EDX and EDX, 0x7ff00000 and eax, 0 xfffff SHR edX, 20 or eax, 0x100000 xchg ECx, EDX sub ECx, 1043 neg ECx SHR eax, CL xchg edX, ECx Add ESP, 12 or ECX, ECx cmove eax, ECX RET } _ declspec (naked) DWORD fast_sqrt1 (dword x) { _ ASM { sub ESP, 4 mov dword ptr [esp + 12], 0 fild qword PTR [esp + 8] fsqrt fisttp dword ptr [esp] mov eax, [esp] Add ESP, 4 RET } _ declspec (naked) DWORD fast_sqrt2 (dword x) { _ ASM { Add ESP, 12 mov dword ptr [esp-4], 0 fild qword PTR [esp-8] fsqrt fstp qword PTR [esp] mov ECx, [esp + 4]; // exponential processing mov eax, ECx; // processing the ending number SHR ECx, 20 and eax, 0 xfffff sub ESP, 12 or eax, 0x100000 test ECx, ECx; // processing 0 cmove eax, ECx sub ECx, 1075 neg ECx SHR eax, CL RET } _ declspec (naked) DWORD fast_sqrt3 (dword x) { _ ASM { Add ESP, 4 mov eax, dword ptr [esp] and eax, 0x80000000 SHR eax, 28 define qword PTR [B32 + eax] fild dword ptr [esp] faddp ST (1 ), st fsqrt fstp qword PTR [esp + 4] mov ECx, [esp + 8]; // exponential processing mov eax, ECx; // processing the ending number SHR ECx, 20 and eax, 0 xfffff sub ESP, 4 or eax, 0x100000 test ECx, ECx; // processing 0 cmove eax, ECx sub ECx, 1075 neg ECx SHR eax, CL RET } __ declspec (naked) DWORD _ fastcall fast_sqrt4 (dword n) { __ ASM { push ECx SHR ECx, 31 define qword PTR [B32 + ECx * 8] fild dword ptr [esp] faddp ST (1 ), st fsqrt fisttp dword ptr [esp] pop eax RET } } double zero5 = 0.49999999999636 ;; __ declspec (naked) DWORD _ fastcall isqrt_fpu1_lbc (dword n) { __ ASM { push ECx SHR ECx, 31 define qword PTR [B32 + ECx * 8] fild dword ptr [esp] faddp ST (1 ), st fsqrt fsub qword PTR [zero5] fistp dword ptr [esp] pop eax RET } } int main () { double T0, T1; dword I; printf ("elapsed time:/N "); // ============================= fast_sqrt1 (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { fast_sqrt1 (I); } printf ("fast_sqrt1: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // ============================= fast_sqrt2 (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { fast_sqrt2 (I); } printf ("fast_sqrt2: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // ============================= isqrt_fpu2_yaos (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { isqrt_fpu2_yaos (I); } printf ("isqrt_fpu2_yaos: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // ============================= fast_sqrt3 (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { fast_sqrt3 (I); } printf ("fast_sqrt3: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // ============================= fast_sqrt4 (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { fast_sqrt4 (I); } printf ("fast_sqrt4: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // ============================= isqrt_fpu1_lbc (0 ); t0 = clock (); for (I = 0; I <= 0 xfffffff; I ++) // test { isqrt_fpu1_lbc (I); } printf ("isqrt_fpu1_lbc: % F S/N ", (Clock ()-T0)/clocks_per_sec ); // =============================== printf ("/n/nboundary test. /n "); printf (" fast_sqrt1 (0) = % 10u/N ", fast_sqrt1 (0 )); printf ("fast_sqrt1 (0 xffffffff) = % u/n", fast_sqrt1 (0 xffffffffff )); printf ("fast_sqrt2 (0) = % 10u/N", fast_sqrt2 (0); printf ("fast_sqrt2 (0 xffffffffff) = % u/n ", fast_sqrt2 (0 xffffffff); printf (" fast_sqrt3 (0) = % 10u/N ", fast_sqrt3 (0); printf ("fast_sqrt3 (0 xffffffff) = % u/n", fast_sqrt3 (0 xffffffff )); printf ("fast_sqrt4 (0) = % 10u/N", fast_sqrt4 (0); printf ("fast_sqrt4 (0 xffffffffff) = % u/N ", fast_sqrt4 (0 xffffffff); printf (" isqrt_fpu1_lbc (0) = % 10u/N ", isqrt_fpu1_lbc (0); printf ("isqrt_fpu1_lbc (0 xffffffff) = % u/N", isqrt_fpu1_lbc (0 xffffffffff )); return 0; }

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Express unsigned integer

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Express unsigned integer

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support