Use opencl to calculate the natural logarithm

Source: Internet
Author: User
Tags natural logarithm

The main reason is that the teacher used different methods to calculate the natural logarithm and understand the characteristics of different parallel languages. So I used multithreading. After OpenMP, I want to use opencl to implement the following. First I will introduce the algorithm.

 

 

Method 1.

Code host

/* Project: multiply the matrix of opencl by: Liu Rong time: 2012.11.20 */# include <iostream> # include <time. h> # include <string> # include <math. h> # include <vector> # include <CL/Cl. h> # include <fstream> using namespace STD; // kernel function STD: stringconverttostring (const char * filename) // convert the kernel source code, that is, the self-written parallel function, convert to string {size_t size; char * STR; STD: String s; STD: fstream F (filename, (STD: fstream: In | STD: fstream :: binary); If (F. is_open () {size _ T filesize; F. seekg (0, STD: fstream: End); size = filesize = (size_t) F. tellg (); F. seekg (0, STD: fstream: Beg); STR = new char [size + 1]; If (! Str) {f. close (); STD: cout <"memory allocation failed"; return NULL;} f. read (STR, filesize); F. close (); STR [size] = '\ 0'; S = STR; Delete [] STR; return s;} else {STD :: cout <"\ nfile containg the kernel code (\". cl \ ") not found. please copy the required file in the folder containg the executable. \ n "; exit (1);} return NULL;} int main () {// Double Start, end, time1, time2; // query platform cl_int cierrnum; cl_pla Tform_id platform; cierrnum = clgetplatformids (1, & platform, null); If (cierrnum! = Cl_success) {cout <"failed to get the device" <Endl; return 0 ;}// obtain the device information cl_device_id device; cl_int status; cl_uint maxdims; cl_event events [3]; size_t globalthreads [1]; size_t localthreads [1]; size_t maxworkgroupsize; size_t maxworkitemsizes [3]; // create the cierrnum = clgetdeviceids (platform, platform, 1, & device, null); // create the context cl_context_properties CPS [3] = {cl_context_platform, (cl_context_properties) platform, 0}; cl_context CTX = clcreatecontext (CPS, 1, & device, null, null, & cierrnum); If (cierrnum! = Cl_success) {cout <"failed to create context" <Endl; return 0;} cl_command_queue myqueue = clcreatecommandqueue (CTX, device, 0, & cierrnum); If (cierrnum! = Cl_success) {cout <"command queue failed" <Endl; return 0 ;}// declare the buffer and transmit data double * c = NULL; // output array int maxitem = 1000; int stepnum = 100000000; size_t datasize = sizeof (double) * maxitem * 2; // allocate memory space c = (double *) malloc (datasize); // initialize the input array cl_mem bufferc = clcreatebuffer (CTX, cl_mem_write_only, datasize, null, & cierrnum ); // run the kernel to compile const char * filename = "calue. cl "; STD: String sourcestr = converttostring (filename); cons T char * Source = sourcestr. c_str (); size_t sourcesize [] = {strlen (source)}; // read the CL file directly to the memory cl_program myprog = clcreateprogramwithsource (CTX, 1, & source, sourcesize, & cierrnum); // cl_program myprog = clcreateprogramwithsource (CTX, 1, (const char **) & programsource, null, & cierrnum); If (cierrnum! = 0) {cout <"createprogram failed" <Endl;} cierrnum = clbuildprogram (myprog, 0, null, null); If (cierrnum! = 0) {cout <"clbuildprogram failed" <Endl;} cl_kernel mykernel = clcreatekernel (myprog, "calue", & cierrnum); If (cierrnum! = 0) {cout <"clcreatekernel failed" <Endl;} // run the program, set the parameter clsetkernelarg (mykernel, 0, sizeof (cl_mem), (void *) & bufferc); clsetkernelarg (mykernel, 1, sizeof (INT), & stepnum); clsetkernelarg (mykernel, 2, sizeof (INT), & maxitem); size_t globalworksize [1]; globalworksize [0] = maxitem; // start = clock (); cierrnum = clenqueuendrangekernel (myqueue, mykernel, 1, null, globalworksize, null, 0, null, & events [0]); If (cierrnum! = 0) {cout <"clenqueuendrangekernel failed" <Endl;} // time synchronization status = clwaitforevents (1, & events [0]); If (status! = Cl_success) {STD: cout <"error: Waiting For kernel run to finish. \ (clwaitforevents0) \ n "; return 0 ;}cout <" O "<Endl; status = clreleaseevent (events [0]); // copy the result to the host end = clock (); time1 = end-start; cout <"Shijian" <time1 <Endl; cierrnum = clenqueuereadbuffer (myqueue, bufferc, cl_true, 0, datasize, C, 0, null, & events [1]); status = clwaitforevents (1, & events [1]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [1]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0;} Double E = 0; double result = 0; double temp = 1; // For (INT I = 0; I <maxitem; I ++) {result = C [I * 2]; e + = (1/temp) * result; temp = C [I * 2 + 1];} printf ("e = % 1.22f", e); Return 0 ;}

Kernel Function

// Enter your kernel in this window _ kernelvoid calue (_ global double * result, int stepnum, int maxitem) {int id = get_global_id (0); Double Start, end, res; int offest = stepnum/maxitem; // obtain the initial start = ID + 1; end = ID + offest; // start to calculate res = 0; double fact = 1; for (INT I = start; I <end; I ++) {fact * = I; Res + = (1.0/fact );} // return result [ID * 2] = res; Result [ID * 2 + 1] = fact; barrier (clk_local_mem_fence );};

 

Method 2

Host Program

/* Project: multiply the matrix of opencl by: Liu Rong time: 2012.11.20 */# include <iostream> # include <time. h> # include <string> # include <math. h> # include <vector> # include <CL/Cl. h> # include <fstream> using namespace STD; // kernel function STD: stringconverttostring (const char * filename) // convert the kernel source code, that is, the self-written parallel function, convert to string {size_t size; char * STR; STD: String s; STD: fstream F (filename, (STD: fstream: In | STD: fstream :: binary); If (F. is_open () {size _ T filesize; F. seekg (0, STD: fstream: End); size = filesize = (size_t) F. tellg (); F. seekg (0, STD: fstream: Beg); STR = new char [size + 1]; If (! Str) {f. close (); STD: cout <"memory allocation failed"; return NULL;} f. read (STR, filesize); F. close (); STR [size] = '\ 0'; S = STR; Delete [] STR; return s;} else {STD :: cout <"\ nfile containg the kernel code (\". cl \ ") not found. please copy the required file in the folder containg the executable. \ n "; exit (1);} return NULL;} int main () {// Double Start, end, time1, time2; // query platform cl_int cierrnum; cl_pla Tform_id platform; cierrnum = clgetplatformids (1, & platform, null); If (cierrnum! = Cl_success) {cout <"failed to get the device" <Endl; return 0 ;}// obtain the device information cl_device_id device; cl_int status; cl_uint maxdims; cl_event events [3]; size_t globalthreads [1]; size_t localthreads [1]; size_t maxworkgroupsize; size_t maxworkitemsizes [3]; // create the cierrnum = clgetdeviceids (platform, platform, 1, & device, null); // create the context cl_context_properties CPS [3] = {cl_context_platform, (cl_context_properties) platform, 0}; cl_context CTX = clcreatecontext (CPS, 1, & device, null, null, & cierrnum); If (cierrnum! = Cl_success) {cout <"failed to create context" <Endl; return 0;} cl_command_queue myqueue = clcreatecommandqueue (CTX, device, 0, & cierrnum); If (cierrnum! = Cl_success) {cout <"command queue failed" <Endl; return 0 ;}// declare the buffer and transmit data double * c = NULL; // output array int maxitem = 10; int stepnum = 1000000000; size_t datasize = sizeof (double) * maxitem; // allocate memory space c = (double *) malloc (datasize ); // initialize the input array cl_mem bufferc = clcreatebuffer (CTX, cl_mem_write_only, datasize * sizeof (float), null, & cierrnum ); // run the kernel to compile const char * filename = "calue. cl "; STD: String sourcestr = converttostring (File Name); const char * Source = sourcestr. c_str (); size_t sourcesize [] = {strlen (source)}; // read the CL file directly to the memory cl_program myprog = clcreateprogramwithsource (CTX, 1, & source, sourcesize, & cierrnum); // cl_program myprog = clcreateprogramwithsource (CTX, 1, (const char **) & programsource, null, & cierrnum); If (cierrnum! = 0) {cout <"createprogram failed" <Endl;} cierrnum = clbuildprogram (myprog, 0, null, null); If (cierrnum! = 0) {cout <"clbuildprogram failed" <Endl;} cl_kernel mykernel = clcreatekernel (myprog, "calue", & cierrnum); If (cierrnum! = 0) {cout <"clcreatekernel failed" <Endl;} // run the program, set the parameter clsetkernelarg (mykernel, 0, sizeof (cl_mem), (void *) & bufferc); clsetkernelarg (mykernel, 1, sizeof (INT), & stepnum); clsetkernelarg (mykernel, 2, sizeof (INT), & maxitem); size_t globalworksize [1]; globalworksize [0] = maxitem; // start = clock (); cierrnum = clenqueuendrangekernel (myqueue, mykernel, 1, null, globalworksize, null, 0, null, & events [0]); If (cierrnum! = 0) {cout <"clenqueuendrangekernel failed" <Endl;} // time synchronization status = clwaitforevents (1, & events [0]); If (status! = Cl_success) {STD: cout <"error: Waiting For kernel run to finish. \ (clwaitforevents0) \ n "; return 0 ;}cout <" O "<Endl; status = clreleaseevent (events [0]); // copy the result to the host end = clock (); time1 = end-start; cout <"Shijian" <time1 <Endl; cierrnum = clenqueuereadbuffer (myqueue, bufferc, cl_true, 0, datasize, C, 0, null, & events [1]); status = clwaitforevents (1, & events [1]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [1]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0;} Double E = 0; // For (INT I = 0; I <maxitem; I ++) {cout <C [I] <Endl; e + = C [I];} printf ("e = % 1.22f", e); Return 0 ;}

 

Kernel Function

// Enter your kernel in this window__kernelvoid CaluE(__global double* result,           int StepNum,           int MaxItem ){    int id = get_global_id(0);    float fact = 1;double e = 0;for(int i = id+1; i <= StepNum;i+=MaxItem){for(int j=0; j<MaxItem && j<i;j++)        {              fact *= (i-j);        }        e += (1.0/fact);  }result[id] = e;barrier(CLK_LOCAL_MEM_FENCE);  };

 

// Enter your kernel in this window _ kernelvoid calue (_ global double * result, int stepnum, int maxitem) {int id = get_global_id (0); Double Start, end, res; int offest = stepnum/maxitem; // obtain the initial start = ID + 1; end = ID + offest; // start to calculate res = 0; double fact = 1; for (INT I = start; I <end; I ++) {fact * = I; Res + = (1.0/fact );} // return result [ID * 2] = res; Result [ID * 2 + 1] = fact; barrier (clk_local_mem_fence );};

 

 

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.