[Original] 10 ^ 9 hexadecimal High Precision big integer subtraction (MMX Version)

Source: Internet
Author: User

// This Code does not need to consider the data alignment problem when using the division and control method (if complicated), Len is also an even number, so there is no need to check the Len with an odd number. It is not as fast as the general method, and it is very small. Here, we only provide another idea, which I originally wrote, if you have a better method, please let us know

// DEST = A-B, DEST, A, and B are all at the top and at the bottom, that is, Dest [0] indicates the highest digit of the number, and A and B are similar.

Const unsigned int base = 1000000000; // 10 ^ 9

Const unsigned _ int64 sign64 = 0x0000000000000000;
Const unsigned _ int64 borrowfirst = 0x0000000100000000;
Const unsigned _ int64 borrownext = 0x0000000000000001;

/* Sub_mmx () mmx command version */
_ Declspec (naked)
Long sub_mmx (unsigned long * DEST, unsigned long * a, unsigned long * B, size_t Len)
{
_ ASM
{
MoV ECx, dword ptr [esp + 0x10] // Len
XOR eax, eax
Test ECx, ECx
JZ sub_exit

Push EBP
MoV EBP, ECx

Push EBX
MoV EBX, dword ptr [esp + 0x14] // EBX = B
Push ESI
MoV ESI, dword ptr [esp + 0x14] // ESI =
Push EDI
MoV EDI, dword ptr [esp + 0x14] // EDI = dest
Sub ESI, EBX // ESI = A-B
Lea edX, dword ptr [EBX + 4 * ecx-8] // & B [I]
Sub EDI, EBX // EDI = DEST-B

SHR ECx, 1 // Len = Len/2

Movq mm7, base64 // 0x3b9aca003b9aca00
Movq MM5, borrowfirst // 0x0000000100000000
Movq mm6, borrownext // 0x0000000000000001

Pxor mm2, mm2 // clear borrow

Sub_loop:
Movq mm0, dword ptr [ESI + EDX] // A [I]
Movq MM1, dword ptr [edX] // B [I]

Psubd mm0, mm2 // DIF = A [I]-borrow
Pxor mm3, mm3 // mm3 = sign64 (0x0000000000000000)
Psubd mm0, MM1 // dif-= B [I]

Pcmpgtd mm3, mm0 // DIF <0? It is complicated and exquisite. It must be compared twice.
Pand mm3, MM5 // MM5 = borrowfirst
Psrscsi mm3, 32 // get carry borrowfirst, mm3> 32

Pxor mm4, mm4 // mm4 = sign64 (0x0000000000000000)
Psubd mm0, mm3 // progressive subtraction carry

Pcmpgtd mm4, mm0 // DIF <0? After carry, the second comparison

Movq mm2, mm4 // backup comparison result
Pand mm4, mm7 // obtain the carry addition variable for dif + = base
Pand mm2, mm6 // get the next carry, borrownext

Paddd mm0, mm4 // equivalent to DIF + = base
Psllq mm2, 32 // borrow = borrownext <32

Movq dword ptr [EDI + EDX], mm0 // Dest [I] = dif
Sub edX, 8 // edX = & B [I]-8, equivalent to I-= 2
Dec ECx // Len --
JNE sub_loop
///*
Test EBP, 1 // If Len is an odd number, the last number is subtracted.
JZ sub_fast_ret

MoV ECx, dword ptr [ESI + EDX] // ESI = A [I]
MoV EBX, dword ptr [edX] // edX = B [I]
Sub ECx, EBX // DIF = A [I]-B [I]
MoV EBX, base // ESI = base
Add ECx, eax // dif-= borrow
XOR eax, eax // borrow = 0
CMP ECx, EBX // DIF <0?
JB sub_dif // <
MoV eax, 1 // borrow = 1
Add ECx, EBX // DIF + = base

Sub_dif:
MoV dword ptr [EDI + EDX], ECx // Dest [I] = dif

Pop EDI
Pop ESI
Pop EBX
Pop EBP

Emms
Sub_exit:
RET

Sub_fast_ret:
//*/

Psrscsi mm2, 32 // borrow> 32
Movd eax, mm2 // returns borrow

Pop EDI
Pop ESI
Pop EBX
Pop EBP

Emms
// Sub_exit:
RET
}
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.