Implementing matrix Additions
1#include <stdlib.h>2#include <stdio.h>3#include <opencv/cv.h>4#include <opencv/highgui.h>5#include <opencv2/opencv.hpp>6 7#include"cuda_runtime.h"8#include"Device_launch_parameters.h"9 using namespacestd;Ten using namespaceCV; One A__global__voidAdd_kernel (ConstInt2* d_a,Constint2* d_b,int2* D_c,intWidthintheight) - { - intx = threadidx.x + blockidx.x *blockdim.x; the inty = threadidx.y + blockidx.y *blockdim.y; - - if(x < width && y <height) - { + intoffset = x + y*width; -d_c[offset].x = d_a[offset].x +d_b[offset].x; +D_C[OFFSET].Y = D_a[offset].y +d_b[offset].y; A } at } - intMain () - { -Mat img (3,4, Cv_32s, scalar_<int> (0)); - -cout<Endl; incout<<Endl; - to + for(inti =0; i < img.rows; i++) - { the for(intj =0; J < Img.cols; J + +) * { $img.at<int> (I,J) =i+J;Panax Notoginseng } - } thecout<<Endl; + Acout<Endl; the + -size_t memsize = Img.step *img.rows; $int2* d_a =NULL; $int2* d_b =NULL; -int2* D_c =NULL; -Cudamalloc ((void* *) &d_a, memsize); theCudamalloc ((void* *) &d_b, memsize); -Cudamalloc ((void* *) &D_c, memsize);Wuyi the cudamemcpy (d_a,img.data,memsize, cudamemcpyhosttodevice); - cudamemcpy (d_b,img.data,memsize, cudamemcpyhosttodevice); Wu -DIM3 Threads ( -, -); AboutDIM3 grids (img.rows + threads.x-1)/threads.x, (Img.cols + Threads.y-1)/threads.y); $Add_kernel<<<grids,threads>>>(d_a, D_b, D_c, Img.rows, img.cols); - - cudamemcpy (Img.data, d_c,memsize,cudamemcpydevicetohost); -cout<<"GPU"<<Endl; Acout<Endl; + Cudafree (d_a); the Cudafree (d_b); - Cudafree (D_c); $ theSystem"Pause"); the return 0; the}
CUDA_OPENCV Matrix Addition