Linux uses cmakelists to compile Cuda programs

Source: Internet
Author: User

File directory:

Cudatest

|--utils.cu

|--utils.h

|--squaresum.cu

|--squaresum.h

|--test.cpp

|--cmakelists.txt

Compile command:

$CD/root/cudatest

$mkdir Build

$CD Build

$cmake:

$make

The relationship between the helpers:

Utils: Provide common tools, here to provide query equipment information function;

Squaresum: Calculates the square sum function, realizes the core function of Cuda running

Test: Call the square sum function

CMakeLists.txt: Organize all file compilation to generate executable files

Note: When calling a function in a CU file, declare the header file as extern "C"

File contents:

CMakeLists.txt

# CMakeLists.txt to build hellocuda.cucmake_minimum_required (VERSION 2.8#  Specify Binary name and source file to build itfrom#add_library (utils utils.cpp)cuda_add_ Executable (    squaresum    test. cpp squaresum.cu utils.  CU)#target_link_libraries (squaresum utils)

Test.cpp

#include <iostream>"squaresum.h"//extern "C" int squaresum ( ); int Main () {  squaresum ();   return 0 ;}

Squaresum.h

" Utils.h "  <cuda_runtime.h>extern"C"  {   int  Squaresum ();}

Squaresum.cu

#include <stdio.h>#include<stdlib.h>//#include "utils.h"#include <iostream>#include"squaresum.h"//======== Define Area ========#defineData_size 1048576//1M//======== Global Area ========intdata[data_size];__global__Static voidSquaressum (int*data,int*sum, clock_t *Time ) { intsum_t =0; clock_t start=clock (); for(inti =0; i < data_size; ++i) {sum_t+ = data[i] *data[i];} *sum =sum_t;*time = Clock ()-start;}//======== used to generate Rand Datas ========voidGeneratedata (int*data,intsize) {  for(inti =0; i < size; ++i) {Data[i]= rand ()%Ten; }}intsquaresum () {//init CUDA Device if(!Initcuda ()) {  return 0; } printf ("CUDA initialized.\n"); //Generate Rand Datasgeneratedata (data, data_size);//malloc space for datas in GPU int*gpudata, *sum; clock_t*Time ; Cudamalloc ((void* *) &gpudata,sizeof(int) *data_size); Cudamalloc ((void* *) &sum,sizeof(int)); Cudamalloc ((void* *) &time,sizeof(clock_t)); cudamemcpy (gpudata, data,sizeof(int) *data_size, Cudamemcpyhosttodevice); //calculate the squares ' s sumsquaressum<<<1,1,0>>>(Gpudata, sum, time);//Copy the result from the GPU to HOST intresult; clock_t time_used; cudamemcpy (&result, Sum,sizeof(int), cudamemcpydevicetohost); cudamemcpy (&time_used, Time,sizeof(clock_t), cudamemcpydevicetohost); //Free GPU SpacesCudafree (Gpudata); Cudafree (sum); Cudafree (time) ;//Print Resultprintf"(GPU) sum:%d time:%ld\n", result, time_used); //CPU Calculateresult =0; clock_t start=clock (); for(inti =0; i < data_size; ++i) {result+ = data[i] *data[i];} Time_used= Clock ()-start; printf ("(CPU) sum:%d time:%ld\n", result, time_used); return 0;}

Utils.h

#include <stdio.h><cuda_runtime.h>extern"C"  {   bool  Initcuda ();}

Utils.cu

#include"Utils.h"#include<cuda_runtime.h>#include<iostream>voidPrintdeviceprop (ConstCudadeviceprop &prop) {printf ("Device Name:%s.\n", Prop.name); printf ("Totalglobalmem:%d.\n", Prop.totalglobalmem); printf ("Sharedmemperblock:%d.\n", Prop.sharedmemperblock); printf ("Regsperblock:%d.\n", Prop.regsperblock); printf ("warpsize:%d.\n", prop.warpsize); printf ("Mempitch:%d.\n", Prop.mempitch); printf ("Maxthreadsperblock:%d.\n", Prop.maxthreadsperblock); printf ("Maxthreadsdim[0-2]:%d%d%d.\n", prop.maxthreadsdim[0], prop.maxthreadsdim[1], prop.maxthreadsdim[2]); printf ("Maxgridsize[0-2]:%d%d%d.\n", prop.maxgridsize[0], prop.maxgridsize[1], prop.maxgridsize[2]); printf ("Totalconstmem:%d.\n", Prop.totalconstmem); printf ("Major.minor:%d.%d.\n", Prop.major, Prop.minor); printf ("clockrate:%d.\n", prop.clockrate); printf ("texturealignment:%d.\n", prop.texturealignment); printf ("Deviceoverlap:%d.\n", Prop.deviceoverlap); printf ("Multiprocessorcount:%d.\n", Prop.multiprocessorcount);}BOOLInitcuda () {//used to count the device numbers intcount; //get the Cuda device CountCudagetdevicecount (&count);//print ("%d\n", count);Std::cout << Count <<Std::endl;if(Count = =0) {fprintf (stderr,"there is no device.\n"); return false; } //Find the device >= 1.X inti; for(i =0; I < count; ++i) {Cudadeviceprop prop; if(Cudagetdeviceproperties (&prop, i) = =cudasuccess) {   if(Prop.major >=1) {printdeviceprop (prop);  Break; }  } } //if can ' t find the device if(i = =count) {fprintf (stderr,"There is no device supporting CUDA 1.x.\n"); return false; } //Set Cuda deviceCudasetdevice (i);return true;}//int main () {//Initcuda ();//}

Linux uses cmakelists to compile Cuda programs

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.