File directory:
Cudatest
|--utils.cu
|--utils.h
|--squaresum.cu
|--squaresum.h
|--test.cpp
|--cmakelists.txt
Compile command:
$CD/root/cudatest
$mkdir Build
$CD Build
$cmake:
$make
The relationship between the helpers:
Utils: Provide common tools, here to provide query equipment information function;
Squaresum: Calculates the square sum function, realizes the core function of Cuda running
Test: Call the square sum function
CMakeLists.txt: Organize all file compilation to generate executable files
Note: When calling a function in a CU file, declare the header file as extern "C"
File contents:
CMakeLists.txt
# CMakeLists.txt to build hellocuda.cucmake_minimum_required (VERSION 2.8# Specify Binary name and source file to build itfrom#add_library (utils utils.cpp)cuda_add_ Executable ( squaresum test. cpp squaresum.cu utils. CU)#target_link_libraries (squaresum utils)
Test.cpp
#include <iostream>"squaresum.h"//extern "C" int squaresum ( ); int Main () { squaresum (); return 0 ;}
Squaresum.h
" Utils.h " <cuda_runtime.h>extern"C" { int Squaresum ();}
Squaresum.cu
#include <stdio.h>#include<stdlib.h>//#include "utils.h"#include <iostream>#include"squaresum.h"//======== Define Area ========#defineData_size 1048576//1M//======== Global Area ========intdata[data_size];__global__Static voidSquaressum (int*data,int*sum, clock_t *Time ) { intsum_t =0; clock_t start=clock (); for(inti =0; i < data_size; ++i) {sum_t+ = data[i] *data[i];} *sum =sum_t;*time = Clock ()-start;}//======== used to generate Rand Datas ========voidGeneratedata (int*data,intsize) { for(inti =0; i < size; ++i) {Data[i]= rand ()%Ten; }}intsquaresum () {//init CUDA Device if(!Initcuda ()) { return 0; } printf ("CUDA initialized.\n"); //Generate Rand Datasgeneratedata (data, data_size);//malloc space for datas in GPU int*gpudata, *sum; clock_t*Time ; Cudamalloc ((void* *) &gpudata,sizeof(int) *data_size); Cudamalloc ((void* *) &sum,sizeof(int)); Cudamalloc ((void* *) &time,sizeof(clock_t)); cudamemcpy (gpudata, data,sizeof(int) *data_size, Cudamemcpyhosttodevice); //calculate the squares ' s sumsquaressum<<<1,1,0>>>(Gpudata, sum, time);//Copy the result from the GPU to HOST intresult; clock_t time_used; cudamemcpy (&result, Sum,sizeof(int), cudamemcpydevicetohost); cudamemcpy (&time_used, Time,sizeof(clock_t), cudamemcpydevicetohost); //Free GPU SpacesCudafree (Gpudata); Cudafree (sum); Cudafree (time) ;//Print Resultprintf"(GPU) sum:%d time:%ld\n", result, time_used); //CPU Calculateresult =0; clock_t start=clock (); for(inti =0; i < data_size; ++i) {result+ = data[i] *data[i];} Time_used= Clock ()-start; printf ("(CPU) sum:%d time:%ld\n", result, time_used); return 0;}
Utils.h
#include <stdio.h><cuda_runtime.h>extern"C" { bool Initcuda ();}
Utils.cu
#include"Utils.h"#include<cuda_runtime.h>#include<iostream>voidPrintdeviceprop (ConstCudadeviceprop &prop) {printf ("Device Name:%s.\n", Prop.name); printf ("Totalglobalmem:%d.\n", Prop.totalglobalmem); printf ("Sharedmemperblock:%d.\n", Prop.sharedmemperblock); printf ("Regsperblock:%d.\n", Prop.regsperblock); printf ("warpsize:%d.\n", prop.warpsize); printf ("Mempitch:%d.\n", Prop.mempitch); printf ("Maxthreadsperblock:%d.\n", Prop.maxthreadsperblock); printf ("Maxthreadsdim[0-2]:%d%d%d.\n", prop.maxthreadsdim[0], prop.maxthreadsdim[1], prop.maxthreadsdim[2]); printf ("Maxgridsize[0-2]:%d%d%d.\n", prop.maxgridsize[0], prop.maxgridsize[1], prop.maxgridsize[2]); printf ("Totalconstmem:%d.\n", Prop.totalconstmem); printf ("Major.minor:%d.%d.\n", Prop.major, Prop.minor); printf ("clockrate:%d.\n", prop.clockrate); printf ("texturealignment:%d.\n", prop.texturealignment); printf ("Deviceoverlap:%d.\n", Prop.deviceoverlap); printf ("Multiprocessorcount:%d.\n", Prop.multiprocessorcount);}BOOLInitcuda () {//used to count the device numbers intcount; //get the Cuda device CountCudagetdevicecount (&count);//print ("%d\n", count);Std::cout << Count <<Std::endl;if(Count = =0) {fprintf (stderr,"there is no device.\n"); return false; } //Find the device >= 1.X inti; for(i =0; I < count; ++i) {Cudadeviceprop prop; if(Cudagetdeviceproperties (&prop, i) = =cudasuccess) { if(Prop.major >=1) {printdeviceprop (prop); Break; } } } //if can ' t find the device if(i = =count) {fprintf (stderr,"There is no device supporting CUDA 1.x.\n"); return false; } //Set Cuda deviceCudasetdevice (i);return true;}//int main () {//Initcuda ();//}
Linux uses cmakelists to compile Cuda programs