// This Code does not need to consider the data alignment problem when using the division and control method (if complicated), Len is also an even number, so there is no need to check the Len with an odd number. It is not as fast as the general method, and it is very small. Here, we only provide another idea, which I originally wrote, if you have a better method, please let us know
// DEST = a + B, DEST, A, and B are all at the top and at the bottom, that is, Dest [0] indicates the highest digit of the number, and A and B are similar.
Const unsigned int base = 1000000000; // 10 ^ 9
Const unsigned _ int64 base64 = 0x3b9aca003b9aca00;
Const unsigned _ int64 carryfirst = 0x0000000100000000;
Const unsigned _ int64 carrynext = 0x0000000000000001;
/* Add_mmx () mmx command version */
_ Declspec (naked)
Long add_mmx (unsigned long * DEST, unsigned long * a, unsigned long * B, size_t Len)
{
_ ASM
{
MoV ECx, dword ptr [esp + 0x10] // Len
XOR eax, eax
Test ECx, ECx
JZ add_exit
Push EBP
MoV EBP, ECx
Push EBX
MoV EBX, dword ptr [esp + 0x14] // EBX = B
Push ESI
MoV ESI, dword ptr [esp + 0x14] // ESI =
Push EDI
MoV EDI, dword ptr [esp + 0x14] // EDI = dest
Sub ESI, EBX // ESI = A-B
Lea edX, dword ptr [EBX + 4 * ecx-8] // & B [I]
Sub EDI, EBX // EDI = DEST-B
SHR ECx, 1 // Len = Len/2
Movq mm7, base64 // 0x3b9aca003b9aca00
Movq MM5, carryfirst // 0x0000000100000000
Movq mm6, carrynext // 0x0000000000000001
Pxor mm2, mm2 // carry cleared
Add_loop:
Movq mm0, dword ptr [ESI + EDX] // A [I]
Movq MM1, dword ptr [edX] // B [I]
Paddd mm0, mm2 // sum = A [I] + carry
Movq mm3, mm7 // mm7 = base64
Paddd mm0, MM1 // sum + = B [I]
Pcmpgtd mm3, mm0 // sum> = base (10 ^ 9 )? It is complicated and exquisite. It must be compared twice.
Pandn mm3, MM5 // MM5 = carryfirst
Psrscsi mm3, 32 // get carry carryfirst, mm3> 32
Movq mm4, mm7 // mm7 = base64
Paddd mm0, mm3 // accumulate carry
Pcmpgtd mm4, mm0 // sum> = base (10 ^ 9 )? After carry, the second comparison
Movq mm2, mm4 // backup comparison result
Pandn mm4, mm7 // get the carry subtraction variable for sum-= base
Pandn mm2, mm6 // get the next carry, carrynext
Psubd mm0, mm4 // equivalent to sum-= base
Psllq mm2, 32 // carry = carrynext <32
Movq dword ptr [EDI + EDX], mm0 // Dest [I] = sum
Sub edX, 8 // edX = & B [I]-8, equivalent to I-= 2
Dec ECx // Len --
JNE add_loop
Test EBP, 1 // If Len is an odd number, add the last number.
JZ add_fast_ret
MoV ECx, dword ptr [ESI + EDX] // ESI = A [I]
MoV EBX, dword ptr [edX] // edX = B [I]
Add ECx, EBX // sum = A [I] + B [I]
MoV EBX, base // ESI = base
Add ECx, eax // sum + = carry
XOR eax, eax // carry = 0
CMP ECx, EBX // sum> = base?
JB add_sum // <
MoV eax, 1 // carry = 1
Sub ECx, EBX // sum-= base
Add_sum:
MoV dword ptr [EDI + EDX], ECx // Dest [I] = sum
Pop EDI
Pop ESI
Pop EBX
Pop EBP
Emms
Add_exit:
RET
Add_fast_ret:
Psrscsi mm2, 32 // carry> 32
Movd eax, mm2 // return carry
Pop EDI
Pop ESI
Pop EBX
Pop EBP
Emms
RET
}
}