Test the effect of prefetching, using the CPU to always view the effect. According to the experiment found that the prefetch address in the address before the use of the 10 lines around the code at the effect is better!
Copy Code code as follows:
#include <stdio.h>
#define Max_len 1000000
static inline void Prefetchnta (void *addr)/prefetch part
{
__asm__ ("Movl%0,%%eax":: "a" (addr));
__asm__ (". Byte 0x0f, 0x18, 0x00");
}
Inline unsigned long long getcputickcount ()
{
unsigned long high32 = 0;
unsigned long low32 = 0;
__asm__ ("RDTSC": "=a" (Low32), "=d" (High32));
unsigned long long counter = High32;
Counter = (counter<<32) + low32;
return counter;
}
int main (int argc, char* argv[])
{
Long long start, end;
Long Long array[max_len];
int i;
for (i = 0; i < Max_len i++)//Disable cache
array[i]++;
Start = Getcputickcount ();
array[0]++;
End = Getcputickcount ();
printf ("Don ' t use prefetch time:%ld\n", End-start);
for (i = 0; i < Max_len; i++)
array[i]++;
Prefetchnta (array);
Start = Getcputickcount ();
array[0]++;
End = Getcputickcount ();
printf ("Use prefetch time:%ld\n", End-start);
return 0;
}