Http://www.mathblog.dk/project-euler-46-odd-number-prime-square/
Not a hard one to code, but it can be optimized using SSE2 instructions. The code below runs with g++ 4.8.1:
g++-g-c RIDDLE.CPP-STD=C++11-MSSE2-PG
g++-O riddle.exe riddle.o-pg
Objdump-d-M intel-s riddle.o > Assembly.txt
Riddle
Gprof Riddle.exe gmon.out > Report.txt
And here is the code:
#if defined (__sse2__) #include <xmmintrin.h>//sse #include <emmintrin.h>//sse2 #endif #include <
ctime> #include <cstdio> #include <iostream> #include <chrono> using namespace std;
typedef unsigned long UL;
#define MAX_PRIME_CNT #define MAX_CNT 10001 extern int primes[max_prime_cnt];
BOOL Squarem[max_cnt] = {false};
int startprimeinx[max_cnt]; #if defined (__sse2__)//Debug only void printm128i (const __m128i &v) {unsigned* p = (unsigned*)
&v;
cout << p[0] << ":" << p[1] << ":" << p[2] << ":" << p[3] << Endl;
}//Calculate a[i] * b[i], with i[0..3] int sse_v[4] = {0};
Inline __m128i pwr2_sse (const int &A, const int &b) {sse_v[0] = A; sse_v[2] = b;
__m128i mv = _mm_loadu_si128 ((__m128i *) sse_v);
Return _mm_mul_epu32 (MV, MV);
}//Calculate (A[i]-b[i]) >> 1, with i[0..3] inline __m128i sub4_and_shl1_sse (int a[4], int *b) {__m128i va = _mm_loadu_si128 ((__m128i *) a);
__m128i VP = _mm_loadu_si128 ((__m128i *) b);
Return _mm_srli_epi32 (_MM_SUB_EPI32 (VA, VP), 1);
} #endif int main () {Auto start = Std::chrono::high_resolution_clock::now ();
Mark Perfect Square Numbers int vec4[4] = {0};
for (int i = 0; i < +=4) {#if defined (__sse2__) __m128i r = Pwr2_sse (i, i + 1);
unsigned* val = (unsigned*) &r;
Squarem[val[0]] = squarem[val[2]] = true;
R = Pwr2_sse (i + 2, i + 3);
val = (unsigned*) &r;
Squarem[val[0]] = squarem[val[2]] = true; #else squarem[i * i] = squarem[(i + 1) * (i + 1)] = squarem[(i + 2) * (i +
2)] = squarem[(i + 3) * (i + 3)] = true;
#endif}//pre-calculate start Prime Index Register UL Prevprime, currprime;
for (int i = 1; i < max_prime_cnt; i + +) {prevprime = primes[i-1];
Currprime = Primes[i];
Startprimeinx[prevprime] =-2;
for (int j = Prevprime + 2, J < Currprime; J +=2)//Skip all evens startprimeinx[j] = i-1;
}//Main Logic Register UL v = 1;
register int offset;
while (v + = 2) {//register int offset = find1stsmallerprime (v); offset = startprimeinx[v];
Pre-calculate it.
while (offset >= 0) {#if defined (__sse2__)//If we still has more than 4 primes to check, We use SSE2 ins to check 4 primes all together if (Offset > 4) {i
NT Vv[4] = {V,V,V,V};
__m128i r = Sub4_and_shl1_sse (vv, primes + offset-3); Unsigned * PinX = (unsigned *) &r; if (squarem[pinx[3]) | | SQUAREM[PINX[2] | | SQUAREM[PINX[1] | |
Squarem[pinx[0]]) break;
Offset-= 4;
} else {if (squarem[(V-primes[offset]) >> 1]) break;
offset--;
} #else if (squarem[(V-primes[offset]) >> 1]) break;
offset--;
#endif} if (offset = =-1) break;
} printf ("%lu\n", V);
Output time spent in milli-seconds auto end = Std::chrono::high_resolution_clock::now ();
Std::chrono::d uration<double> diff = End-start;
cout << "Time (in second):" <<diff.count () << Endl;
return 0; }//pre-loaded Primes//memory-performance exchange//int primes[max_prime_cnt] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 2 27, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 35 9, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653 , 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 98 3, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 110 9, 1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1 277, 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 157 9, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1 723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 202 9, 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2 203, 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 247 7, 2503, 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687,
2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819 , 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 299 9, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3 181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 349 9, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3 637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4 003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4