// This Code does not need to consider the data alignment problem when using the division and control method (if complicated), Len is also an even number, so there is no need to check the Len with an odd number. It is not as fast as the general method, and it is very small. Here, we only provide another idea, which I originally wrote, if you have a better method, please let us know
// DEST = A-B, DEST, A, and B are all at the top and at the bottom, that is, Dest [0] indicates the highest digit of the number, and A and B are similar.
Const unsigned int base = 1000000000; // 10 ^ 9
Const unsigned _ int64 sign64 = 0x0000000000000000;
Const unsigned _ int64 borrowfirst = 0x0000000100000000;
Const unsigned _ int64 borrownext = 0x0000000000000001;
/* Sub_mmx () mmx command version */
_ Declspec (naked)
Long sub_mmx (unsigned long * DEST, unsigned long * a, unsigned long * B, size_t Len)
{
_ ASM
{
MoV ECx, dword ptr [esp + 0x10] // Len
XOR eax, eax
Test ECx, ECx
JZ sub_exit
Push EBP
MoV EBP, ECx
Push EBX
MoV EBX, dword ptr [esp + 0x14] // EBX = B
Push ESI
MoV ESI, dword ptr [esp + 0x14] // ESI =
Push EDI
MoV EDI, dword ptr [esp + 0x14] // EDI = dest
Sub ESI, EBX // ESI = A-B
Lea edX, dword ptr [EBX + 4 * ecx-8] // & B [I]
Sub EDI, EBX // EDI = DEST-B
SHR ECx, 1 // Len = Len/2
Movq mm7, base64 // 0x3b9aca003b9aca00
Movq MM5, borrowfirst // 0x0000000100000000
Movq mm6, borrownext // 0x0000000000000001
Pxor mm2, mm2 // clear borrow
Sub_loop:
Movq mm0, dword ptr [ESI + EDX] // A [I]
Movq MM1, dword ptr [edX] // B [I]
Psubd mm0, mm2 // DIF = A [I]-borrow
Pxor mm3, mm3 // mm3 = sign64 (0x0000000000000000)
Psubd mm0, MM1 // dif-= B [I]
Pcmpgtd mm3, mm0 // DIF <0? It is complicated and exquisite. It must be compared twice.
Pand mm3, MM5 // MM5 = borrowfirst
Psrscsi mm3, 32 // get carry borrowfirst, mm3> 32
Pxor mm4, mm4 // mm4 = sign64 (0x0000000000000000)
Psubd mm0, mm3 // progressive subtraction carry
Pcmpgtd mm4, mm0 // DIF <0? After carry, the second comparison
Movq mm2, mm4 // backup comparison result
Pand mm4, mm7 // obtain the carry addition variable for dif + = base
Pand mm2, mm6 // get the next carry, borrownext
Paddd mm0, mm4 // equivalent to DIF + = base
Psllq mm2, 32 // borrow = borrownext <32
Movq dword ptr [EDI + EDX], mm0 // Dest [I] = dif
Sub edX, 8 // edX = & B [I]-8, equivalent to I-= 2
Dec ECx // Len --
JNE sub_loop
///*
Test EBP, 1 // If Len is an odd number, the last number is subtracted.
JZ sub_fast_ret
MoV ECx, dword ptr [ESI + EDX] // ESI = A [I]
MoV EBX, dword ptr [edX] // edX = B [I]
Sub ECx, EBX // DIF = A [I]-B [I]
MoV EBX, base // ESI = base
Add ECx, eax // dif-= borrow
XOR eax, eax // borrow = 0
CMP ECx, EBX // DIF <0?
JB sub_dif // <
MoV eax, 1 // borrow = 1
Add ECx, EBX // DIF + = base
Sub_dif:
MoV dword ptr [EDI + EDX], ECx // Dest [I] = dif
Pop EDI
Pop ESI
Pop EBX
Pop EBP
Emms
Sub_exit:
RET
Sub_fast_ret:
//*/
Psrscsi mm2, 32 // borrow> 32
Movd eax, mm2 // returns borrow
Pop EDI
Pop ESI
Pop EBX
Pop EBP
Emms
// Sub_exit:
RET
}
}