The main reason is that the teacher used different methods to calculate the natural logarithm and understand the characteristics of different parallel languages. So I used multithreading. After OpenMP, I want to use opencl to implement the following. First I will introduce the algorithm.
Method 1.
Code host
/* Project: multiply the matrix of opencl by: Liu Rong time: 2012.11.20 */# include <iostream> # include <time. h> # include <string> # include <math. h> # include <vector> # include <CL/Cl. h> # include <fstream> using namespace STD; // kernel function STD: stringconverttostring (const char * filename) // convert the kernel source code, that is, the self-written parallel function, convert to string {size_t size; char * STR; STD: String s; STD: fstream F (filename, (STD: fstream: In | STD: fstream :: binary); If (F. is_open () {size _ T filesize; F. seekg (0, STD: fstream: End); size = filesize = (size_t) F. tellg (); F. seekg (0, STD: fstream: Beg); STR = new char [size + 1]; If (! Str) {f. close (); STD: cout <"memory allocation failed"; return NULL;} f. read (STR, filesize); F. close (); STR [size] = '\ 0'; S = STR; Delete [] STR; return s;} else {STD :: cout <"\ nfile containg the kernel code (\". cl \ ") not found. please copy the required file in the folder containg the executable. \ n "; exit (1);} return NULL;} int main () {// Double Start, end, time1, time2; // query platform cl_int cierrnum; cl_pla Tform_id platform; cierrnum = clgetplatformids (1, & platform, null); If (cierrnum! = Cl_success) {cout <"failed to get the device" <Endl; return 0 ;}// obtain the device information cl_device_id device; cl_int status; cl_uint maxdims; cl_event events [3]; size_t globalthreads [1]; size_t localthreads [1]; size_t maxworkgroupsize; size_t maxworkitemsizes [3]; // create the cierrnum = clgetdeviceids (platform, platform, 1, & device, null); // create the context cl_context_properties CPS [3] = {cl_context_platform, (cl_context_properties) platform, 0}; cl_context CTX = clcreatecontext (CPS, 1, & device, null, null, & cierrnum); If (cierrnum! = Cl_success) {cout <"failed to create context" <Endl; return 0;} cl_command_queue myqueue = clcreatecommandqueue (CTX, device, 0, & cierrnum); If (cierrnum! = Cl_success) {cout <"command queue failed" <Endl; return 0 ;}// declare the buffer and transmit data double * c = NULL; // output array int maxitem = 1000; int stepnum = 100000000; size_t datasize = sizeof (double) * maxitem * 2; // allocate memory space c = (double *) malloc (datasize); // initialize the input array cl_mem bufferc = clcreatebuffer (CTX, cl_mem_write_only, datasize, null, & cierrnum ); // run the kernel to compile const char * filename = "calue. cl "; STD: String sourcestr = converttostring (filename); cons T char * Source = sourcestr. c_str (); size_t sourcesize [] = {strlen (source)}; // read the CL file directly to the memory cl_program myprog = clcreateprogramwithsource (CTX, 1, & source, sourcesize, & cierrnum); // cl_program myprog = clcreateprogramwithsource (CTX, 1, (const char **) & programsource, null, & cierrnum); If (cierrnum! = 0) {cout <"createprogram failed" <Endl;} cierrnum = clbuildprogram (myprog, 0, null, null); If (cierrnum! = 0) {cout <"clbuildprogram failed" <Endl;} cl_kernel mykernel = clcreatekernel (myprog, "calue", & cierrnum); If (cierrnum! = 0) {cout <"clcreatekernel failed" <Endl;} // run the program, set the parameter clsetkernelarg (mykernel, 0, sizeof (cl_mem), (void *) & bufferc); clsetkernelarg (mykernel, 1, sizeof (INT), & stepnum); clsetkernelarg (mykernel, 2, sizeof (INT), & maxitem); size_t globalworksize [1]; globalworksize [0] = maxitem; // start = clock (); cierrnum = clenqueuendrangekernel (myqueue, mykernel, 1, null, globalworksize, null, 0, null, & events [0]); If (cierrnum! = 0) {cout <"clenqueuendrangekernel failed" <Endl;} // time synchronization status = clwaitforevents (1, & events [0]); If (status! = Cl_success) {STD: cout <"error: Waiting For kernel run to finish. \ (clwaitforevents0) \ n "; return 0 ;}cout <" O "<Endl; status = clreleaseevent (events [0]); // copy the result to the host end = clock (); time1 = end-start; cout <"Shijian" <time1 <Endl; cierrnum = clenqueuereadbuffer (myqueue, bufferc, cl_true, 0, datasize, C, 0, null, & events [1]); status = clwaitforevents (1, & events [1]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [1]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0;} Double E = 0; double result = 0; double temp = 1; // For (INT I = 0; I <maxitem; I ++) {result = C [I * 2]; e + = (1/temp) * result; temp = C [I * 2 + 1];} printf ("e = % 1.22f", e); Return 0 ;}
Kernel Function
// Enter your kernel in this window _ kernelvoid calue (_ global double * result, int stepnum, int maxitem) {int id = get_global_id (0); Double Start, end, res; int offest = stepnum/maxitem; // obtain the initial start = ID + 1; end = ID + offest; // start to calculate res = 0; double fact = 1; for (INT I = start; I <end; I ++) {fact * = I; Res + = (1.0/fact );} // return result [ID * 2] = res; Result [ID * 2 + 1] = fact; barrier (clk_local_mem_fence );};
Method 2
Host Program
/* Project: multiply the matrix of opencl by: Liu Rong time: 2012.11.20 */# include <iostream> # include <time. h> # include <string> # include <math. h> # include <vector> # include <CL/Cl. h> # include <fstream> using namespace STD; // kernel function STD: stringconverttostring (const char * filename) // convert the kernel source code, that is, the self-written parallel function, convert to string {size_t size; char * STR; STD: String s; STD: fstream F (filename, (STD: fstream: In | STD: fstream :: binary); If (F. is_open () {size _ T filesize; F. seekg (0, STD: fstream: End); size = filesize = (size_t) F. tellg (); F. seekg (0, STD: fstream: Beg); STR = new char [size + 1]; If (! Str) {f. close (); STD: cout <"memory allocation failed"; return NULL;} f. read (STR, filesize); F. close (); STR [size] = '\ 0'; S = STR; Delete [] STR; return s;} else {STD :: cout <"\ nfile containg the kernel code (\". cl \ ") not found. please copy the required file in the folder containg the executable. \ n "; exit (1);} return NULL;} int main () {// Double Start, end, time1, time2; // query platform cl_int cierrnum; cl_pla Tform_id platform; cierrnum = clgetplatformids (1, & platform, null); If (cierrnum! = Cl_success) {cout <"failed to get the device" <Endl; return 0 ;}// obtain the device information cl_device_id device; cl_int status; cl_uint maxdims; cl_event events [3]; size_t globalthreads [1]; size_t localthreads [1]; size_t maxworkgroupsize; size_t maxworkitemsizes [3]; // create the cierrnum = clgetdeviceids (platform, platform, 1, & device, null); // create the context cl_context_properties CPS [3] = {cl_context_platform, (cl_context_properties) platform, 0}; cl_context CTX = clcreatecontext (CPS, 1, & device, null, null, & cierrnum); If (cierrnum! = Cl_success) {cout <"failed to create context" <Endl; return 0;} cl_command_queue myqueue = clcreatecommandqueue (CTX, device, 0, & cierrnum); If (cierrnum! = Cl_success) {cout <"command queue failed" <Endl; return 0 ;}// declare the buffer and transmit data double * c = NULL; // output array int maxitem = 10; int stepnum = 1000000000; size_t datasize = sizeof (double) * maxitem; // allocate memory space c = (double *) malloc (datasize ); // initialize the input array cl_mem bufferc = clcreatebuffer (CTX, cl_mem_write_only, datasize * sizeof (float), null, & cierrnum ); // run the kernel to compile const char * filename = "calue. cl "; STD: String sourcestr = converttostring (File Name); const char * Source = sourcestr. c_str (); size_t sourcesize [] = {strlen (source)}; // read the CL file directly to the memory cl_program myprog = clcreateprogramwithsource (CTX, 1, & source, sourcesize, & cierrnum); // cl_program myprog = clcreateprogramwithsource (CTX, 1, (const char **) & programsource, null, & cierrnum); If (cierrnum! = 0) {cout <"createprogram failed" <Endl;} cierrnum = clbuildprogram (myprog, 0, null, null); If (cierrnum! = 0) {cout <"clbuildprogram failed" <Endl;} cl_kernel mykernel = clcreatekernel (myprog, "calue", & cierrnum); If (cierrnum! = 0) {cout <"clcreatekernel failed" <Endl;} // run the program, set the parameter clsetkernelarg (mykernel, 0, sizeof (cl_mem), (void *) & bufferc); clsetkernelarg (mykernel, 1, sizeof (INT), & stepnum); clsetkernelarg (mykernel, 2, sizeof (INT), & maxitem); size_t globalworksize [1]; globalworksize [0] = maxitem; // start = clock (); cierrnum = clenqueuendrangekernel (myqueue, mykernel, 1, null, globalworksize, null, 0, null, & events [0]); If (cierrnum! = 0) {cout <"clenqueuendrangekernel failed" <Endl;} // time synchronization status = clwaitforevents (1, & events [0]); If (status! = Cl_success) {STD: cout <"error: Waiting For kernel run to finish. \ (clwaitforevents0) \ n "; return 0 ;}cout <" O "<Endl; status = clreleaseevent (events [0]); // copy the result to the host end = clock (); time1 = end-start; cout <"Shijian" <time1 <Endl; cierrnum = clenqueuereadbuffer (myqueue, bufferc, cl_true, 0, datasize, C, 0, null, & events [1]); status = clwaitforevents (1, & events [1]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [1]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0;} Double E = 0; // For (INT I = 0; I <maxitem; I ++) {cout <C [I] <Endl; e + = C [I];} printf ("e = % 1.22f", e); Return 0 ;}
Kernel Function
// Enter your kernel in this window__kernelvoid CaluE(__global double* result, int StepNum, int MaxItem ){ int id = get_global_id(0); float fact = 1;double e = 0;for(int i = id+1; i <= StepNum;i+=MaxItem){for(int j=0; j<MaxItem && j<i;j++) { fact *= (i-j); } e += (1.0/fact); }result[id] = e;barrier(CLK_LOCAL_MEM_FENCE); };
// Enter your kernel in this window _ kernelvoid calue (_ global double * result, int stepnum, int maxitem) {int id = get_global_id (0); Double Start, end, res; int offest = stepnum/maxitem; // obtain the initial start = ID + 1; end = ID + offest; // start to calculate res = 0; double fact = 1; for (INT I = start; I <end; I ++) {fact * = I; Res + = (1.0/fact );} // return result [ID * 2] = res; Result [ID * 2 + 1] = fact; barrier (clk_local_mem_fence );};