Parallel understanding of opencl-work-item

Source: Internet
Author: User

Recently I am looking at opencl programs, but I am not very familiar with the working-item running mechanism. As a result, I took a look at it intuitively with a few small programs, mainly using OpenMP testing ideas to output work-item and the data processing results. I personally think this is very helpful for me to understand its operating mechanism. The following is a program:

Host Program: Main. cpp

/* Project: multiply the matrix of opencl by: Liu Rong time: 2012.11.20 */# include <iostream> # include <time. h> # include <string> # include <math. h> # include <vector> # include <CL/Cl. h> # include <fstream> using namespace STD; // kernel function STD: stringconverttostring (const char * filename) // convert the kernel source code, that is, the self-written parallel function, convert to string {size_t size; char * STR; STD: String s; STD: fstream F (filename, (STD: fstream: In | STD: fstream :: binary); If (F. is_open () {size _ T filesize; F. seekg (0, STD: fstream: End); size = filesize = (size_t) F. tellg (); F. seekg (0, STD: fstream: Beg); STR = new char [size + 1]; If (! Str) {f. close (); STD: cout <"memory allocation failed"; return NULL;} f. read (STR, filesize); F. close (); STR [size] = '\ 0'; S = STR; Delete [] STR; return s;} else {STD :: cout <"\ nfile containg the kernel code (\". cl \ ") not found. please copy the required file in the folder containg the executable. \ n "; exit (1);} return NULL;} int main () {Double Start, end, time1, time2; // query platform cl_int cierrnum; cl_platf Orm_id platform; cierrnum = clgetplatformids (1, & platform, null); If (cierrnum! = Cl_success) {cout <"failed to get the device" <Endl; return 0 ;}// obtain the device information cl_device_id device; cl_int status; cl_uint maxdims; cl_event events [3]; size_t globalthreads [1]; size_t localthreads [1]; size_t maxworkgroupsize; size_t maxworkitemsizes [3]; //////////////////////////////////////// //// // Step 7 analyzing proper workgroup size for the kernel/ /by querying device information // 7.1 Device Info cl_device_max_work_group_size // 7.2 device info cl_device_max_work_item_dimensions // 7.3 device Info packages ////////////////////////// //////////////////////////////////////// /// *** query device capabilities. maximum * Work Item dimensions and the maximmum * Work Item sizes */cierrnum = clgetdeviceids (platform, cl_device_type_all, 1, & device, null); status = clgetde Viceinfo (device, cl_device_max_work_group_size, sizeof (size_t), (void *) & maxworkgroupsize, null); If (status! = Cl_success) {STD: cout <"error: Getting device info. (clgetdeviceinfo) \ n "; return 0;} status = clgetdeviceinfo (device, cl_device_max_work_item_dimensions, sizeof (cl_uint), (void *) & maxdims, null); If (status! = Cl_success) {STD: cout <"error: Getting device info. (clgetdeviceinfo) \ n "; return 0;} status = clgetdeviceinfo (device, identifier, sizeof (size_t) * maxdims, (void *) maxworkitemsizes, null); If (status! = Cl_success) {STD: cout <"error: Getting device info. (clgetdeviceinfo) \ n "; return 0 ;}cout <" maxworkitemsizes "<maxworkitemsizes <Endl; cout <" maxdims "<maxdims <Endl; cout <"maxworkgroupsize" <(INT) maxworkgroupsize <Endl; // create the context cl_context_properties CPS [3] = {cl_context_platform, (cl_context_properties) platform, 0 }; cl_context CTX = clcreatecontext (CPS, 1, & device, null, null, & cierrnum); If (cier Rnum! = Cl_success) {cout <"failed to create context" <Endl; return 0;} cl_command_queue myqueue = clcreatecommandqueue (CTX, device, 0, & cierrnum); If (cierrnum! = Cl_success) {cout <"command queue failed" <Endl; return 0 ;}// declare the buffer and transmit data float * c = NULL; // output array float * B = NULL; // output array int c = 10; size_t datasize = sizeof (float) * C; // allocate memory space c = (float *) malloc (datasize); B = (float *) malloc (datasize); // initialize the input array cl_mem bufferc = clcreatebuffer (CTX, cl_mem_write_only, C * sizeof (float), null, & cierrnum); cl_mem bufferb = clcreatebuffer (CTX, cl_mem_write_only, C * sizeof (float), null, & cierrnum );/ /Run the kernel to compile const char * filename = "simplemultiply. cl "; STD: String sourcestr = converttostring (filename); const char * Source = sourcestr. c_str (); size_t sourcesize [] = {strlen (source)}; // read the CL file directly to the memory cl_program myprog = clcreateprogramwithsource (CTX, 1, & source, sourcesize, & cierrnum); // cl_program myprog = clcreateprogramwithsource (CTX, 1, (const char **) & programsource, null, & cierrnum); If (cierrn Um! = 0) {cout <"createprogram failed" <Endl;} cierrnum = clbuildprogram (myprog, 0, null, null); If (cierrnum! = 0) {cout <"clbuildprogram failed" <Endl;} cl_kernel mykernel = clcreatekernel (myprog, "vecadd", & cierrnum); If (cierrnum! = 0) {cout <"clcreatekernel failed" <Endl;} // run the program clsetkernelarg (mykernel, 0, sizeof (cl_mem), (void *) & bufferb ); clsetkernelarg (mykernel, 1, sizeof (cl_mem), (void *) & bufferc); size_t globalworksize [1]; globalworksize [0] = C/2; /// start = clock (); cierrnum = clenqueuendrangekernel (myqueue, mykernel, 1, null, globalworksize, null, 0, null, & events [0]); if (cierrnum! = 0) {cout <"clenqueuendrangekernel failed" <Endl;} // time synchronization status = clwaitforevents (1, & events [0]); If (status! = Cl_success) {STD: cout <"error: Waiting For kernel run to finish. \ (clwaitforevents0) \ n "; return 0 ;}cout <" O "<Endl; status = clreleaseevent (events [0]); // copy the result to the host end = clock (); time1 = end-start; cout <"Shijian" <time1 <Endl; cierrnum = clenqueuereadbuffer (myqueue, bufferc, cl_true, 0, datasize, C, 0, null, & events [1]); status = clwaitforevents (1, & events [1]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [1]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0;} cierrnum = clenqueuereadbuffer (myqueue, bufferb, cl_true, 0, datasize, B, 0, null, & events [2]); status = clwaitforevents (1, & events [2]); If (status! = Cl_success) {STD: cout <"error: Waiting For read buffer call to finish. \ (clwaitforevents1) N "; return 0;} status = clreleaseevent (events [2]); If (status! = Cl_success) {STD: cout <"error: Release event object. \ (clreleaseevent) \ n "; return 0 ;}// for (INT I = 0; I <C/2; I ++) {cout <" work-item: "<B [I] <": "; for (Int J = 0; j <2; j ++) {cout <C [I + J] <";}cout <Endl;} return 0 ;}

Kernel Function simplemultiply. cl

// Enter your kernel in this window__kernel                                         void vecadd(__global float* B,__global float* C)                              {                                                   int id = get_global_id(0);  // barrier(CLK_LOCAL_MEM_FENCE);     B[id] = id;     for(int i =0;i<2;i++)   {                  C[id*2+i] = i;    }                     //  barrier(CLK_LOCAL_MEM_FENCE);             };                       

Running result:

From the above results, we can see that each work-item runs independently,

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.