AMD Graphics Ubuntu System OPENCL Environment Setup
1. Install the video driver
1) Download the driver in Http://support.amd.com/zh-cn/download/linux, be sure to note the version
2) Install Fglrx-core_15.302-0ubuntu1_amd64_ub_14.01.deb First, you may encounter a lack of libc6-i385 lib32gcc1 dkms, perform
sudo apt-get autoremove && sudo apt-get autoclean
Sudo apt-get-f Update
Sudo apt-get-f Install ibc6-i385 lib32gcc1
Sudo dpkg-i Fglrx-core_15.302-0ubuntu1_amd64_ub_14.01.deb
Sudo dpkg-i Fglrx_15.302-0ubuntu1_amd64_ub_14.01.deb
Sudo dpkg-i Fglrx-amdcccle_15.302-0ubuntu1_amd64_ub_14.01.deb
Sudo dpkg-i Fglrx-dev_15.302-0ubuntu1_amd64_ub_14.01.deb
2. Installing the OPENCL Environment
1) Download SDK
http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/
2) Unzip the AMD-APP-SDKINSTALLER-V3.0.130.136-GA-LINUX64.TAR.BZ2
Sh amd-app-sdkinstaller-v3.0.130.136-ga-linux64.sh
3) Set up OpenCL, and test a Damo below.
Hello_world.cpp
#include "stdafx.h"
#include <iostream>
#include <fstream>
#include "string.h"
#include <Windows.h>
#include "cl\cl.h"
using namespace Std;
#define NUM 16
Defining kernel Functions
#define KERNEL (...) #__VA_ARGS__//Macro definition: Primarily makes kernel functions look more comfortable. Use the kernel function. cl file instead of the macro definition, either using the following method
const char *
Kernelsourcecode=kernel (
__kernel void Hello_world (__global uint *buffer)
{
size_t gidx=get_global_id (0);
size_t gidy=get_global_id (1);
size_t lidx=get_global_id (0);
Buffer[gidx+16*gidy]=gidx+16*gidy;
});
The second way to define kernel functions
const char * kernelsourcecode= "__kernel void Hello_world (__global uint *buffer) {size_t gidx=get_global_id (0); \nsize_ T gidy=get_global_id (1); \nsize_t lidx=get_global_id (0); \nbuffer[gidx+4*gidy]= (1<<GIDX) | (0x10<<gidy); \ n} ";
int _tmain (int argc, _tchar* argv[])
{
Cl_int status=0;
size_t Devicelistsize;
Cl_uint numplatforms;
CL_PLATFORM_ID Platform=null;
Get the number of platforms, the first parameter is the number of platform list to join, the second parameter is the platform list, the third parameter is the number of platforms
The first argument is 0, the second argument is generally null
status= Clgetplatformids (0,null,&numplatforms);
if (status!=cl_success)//If the return is unsuccessful
{
printf ("error:getting platforms.\n");
System ("pause");
return exit_failure;
}
if (numplatforms>0)//If there is a platform
{
Allocates numplatforms sizeof (CL_PLATFORM_ID) memory for the platform.
cl_platform_id* platforms= (cl_platform_id*) malloc (numplatforms*sizeof (cl_platform_id));
Status = Clgetplatformids (Numplatforms,platforms,null);//Get platform list stored in platforms, a total of numplatforms platform.
if (status!=cl_success)//If you get unsuccessful
{
printf ("Error:getting Platform ids.\n");
System ("pause");
return-1;
}
for (unsigned int i=0;i<numplatforms;++i)//traversal of each platform
{
Char pbuff[100];
To get platform information, the first parameter is the platform ID, the second parameter is the platform information to query, the third parameter is the number of bytes in the memory block referred to in the fourth parameter,
The fourth parameter corresponds to the value of the first argument
Status=clgetplatforminfo (Platforms[i],cl_platform_vendor,sizeof (pbuff), pbuff,null);
Platform=platforms[i];
if (!strcmp (Pbuff, "Advanced Micro Devices, Inc.")) If you find a platform, jump
printf ("%s\n", Pbuff);
if (!strcmp (Pbuff, "NVIDIA Corporation"))
if (!strcmp (Pbuff, "inter<r> Corporation"))
{
Break
}
}
Delete platforms;
}
Cl_context_properties Cps[3]={cl_context_platform, (cl_context_properties) platform,0};//Specify the platform to use
cl_context_properties* cprops= (null==platform)? Null:cps;
An OPENCL context is created based on the device type, the first parameter lists the context property name and its corresponding value, and the name of each property immediately follows its corresponding value, which ends with 0.
The second parameter identifies the device type, and the third parameter is the one that the application registers to destroy the function that reports the error that occurred in this context. The fourth parameter is just the data that you want the user to provide.
The fifth parameter is used to return an error code.
Cl_context context = Clcreatecontextfromtype (cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
Cl_context context = Clcreatecontextfromtype (cprops,cl_device_type_cpu,null,null,&status);
if (status!=cl_success)
{
printf ("error:creating context.\n");
System ("pause");
return exit_failure;
}
Status=clgetcontextinfo (context,cl_context_devices,0,null,&devicelistsize);//Gets the size to allocate memory for DeviceID.
if (status!=cl_success)
{
printf ("error:getting context info.\n");
return exit_failure;
}
cl_device_id *devices= (cl_device_id *) malloc (devicelistsize);//Allocate memory for DeviceID
if (devices==0)
{
printf ("Error:no device found!");
return exit_failure;
}
Get a list of devices
The first parameter is the context to query, the second parameter is the information to query, and the third parameter is the number of bytes of memory referred to in the fourth parameter
The fourth parameter is the return query result, and the fifth is the actual number of bytes returned
Status = Clgetcontextinfo (Context,cl_context_devices,devicelistsize,devices,null);
if (status!=cl_success)
{
printf ("error:getting context info.\n");
return exit_failure;
}
FILE *FP;
Char *source=null;
size_t sourcelength=0;
if ((Fopen_s (&FP, "hello_world.cl", "RB")!=0)//Open File
{
printf ("Unable to load source\n");
}
Fseek (fp,0,2);//positioned at the end of the file stream.
Sourcelength=ftell (FP);//function Ftell The number of offset bytes from the current position of the file position pointer relative to the top of the file.
Fseek (fp,0,0);//Locate the header in the file stream.
Source= (char*) malloc ((sourcelength+1) *sizeof (char));//Allocating memory
Memset (source,0, (sourcelength+1) *sizeof (char)),//void *memset (void *s, int ch, size_t N), function interpretation: the first n bytes in s (typedef unsigned int size_t) Replace with CH and return s
if (Fread (SOURCE,SOURCELENGTH,1,FP)!=1)
{
printf ("Cannot read source file\n");
}
size_t Sourcesize[]={strlen (Kernelsourcecode)};//get the length of the kernel code
To create a program object for the context
The first argument must be a valid context, the second parameter represents the number of string pointer arrays, the third parameter is the source code source,
The fourth parameter represents the source code length, and the fifth represents the return error type.
Cl_program Program=clcreateprogramwithsource (context,1, (const char * *) &source,&sourcelength,&status);
if (status!=cl_success)
{
printf ("error:loading source into cl_program.\n");
}
All device builder execution body or binary code for the context associated with the program.
The first parameter is a program object, the second parameter is the number of devices, and the third parameter is the device list.
The fourth parameter is the compile option, the fifth one is the callback function, and sixth is the parameter of the callback function.
Status=clbuildprogram (Program,1,devices,null,null,null);
if (status!=cl_success)
{
printf ("error:building program\n");
System ("pause");
}
Creating Kernel objects
The first parameter is a program object with a successfully constructed executor, and a function name with the _kernel qualifier in the second program.
The third is a return error type.
Cl_kernel Kernel=clcreatekernel (program, "Hello_world", &status);
if (status!=cl_success)
{
printf ("Error:creating kernel form program.\n");
System ("pause");
return exit_failure;
}
To create a command queue on a specific device
The first parameter is a valid context, and the second parameter is a device associated with the context.
The third parameter is a property that points to the command queue. The fourth parameter is the return type of the error
Cl_command_queue Commendqueue=clcreatecommandqueue (context,devices[0],0,&status);
if (status!=cl_success)
{
printf ("Error:creating commend queue.\n");
return exit_failure;
}
unsigned int *outbuffer=new unsigned int [num*num];
memset (outbuffer,0,num*num*4);//Fill Outbuffer is 0
Create a Cache object
The first parameter is a valid context, and the second parameter is used to specify the dispatch and usage information. , the third parameter is the number of bytes of the allocated cache object
The fourth parameter points to the cached data to which the app is assigned, and the fifth is the return error type.
Cl_mem Outputbuffer=clcreatebuffer (context,cl_mem_alloc_host_ptr,num*num*4,null,&status);
if (status!=cl_success)
{
printf ("error:clcreatebuffer.\n");
return exit_failure;
}
Used to set the value of the kernel function parameter
The first parameter is a valid kernel object, the second parameter is the index of the parameter, and the third parameter is the size of the parameter
The fourth parameter is a parameter
int a=num;
Status=clsetkernelarg (kernel,1,sizeof (int), (void*) &a);
Status=clsetkernelarg (Kernel,0,sizeof (Cl_mem), (void*) &outputbuffer);
if (status!=cl_success)
{
printf ("Error:setting lernel argument.\n");
return exit_failure;
}
size_t Globalthreads[]={num,num};
size_t LOCALTHREADS[]={NUM/2,NUM/2};
To queue a command
The first parameter is a valid command, the second argument is a valid kernel, the third parameter is the dimension, and the fourth parameter is NULL
The fifth parameter is the number of global workgroups, the sixth is the number of local workgroups, and 7,8,9 is the set wait event before the kernel executes.
Status=clenqueuendrangekernel (Commendqueue,kernel,2,null,globalthreads,localthreads,0,null,null);
if (status!=cl_success)
{
printf ("error:enqueueing kernel\n");
System ("pause");
return exit_failure;
}
Will block, knowing that all OpenCL commands that were queued before are committed and completed
A valid command queue
Status=clfinish (Commendqueue);
if (status!=cl_success)
{
printf ("Error:finish commend queue\n");
return exit_failure;
}
Read cache memory into host
The first parameter holds the command queue for read and write commands, and the second is a valid cache object.
The fifth parameter is the number of bytes read, and the sixth parameter points to the host memory, and 7,8,9 is the event setting to wait before executing the read command.
Status=clenqueuereadbuffer (Commendqueue,outputbuffer,cl_true,0,num*num*4,outbuffer,0,null,null);
if (status!=cl_success)
{
printf ("Error:read buffer queue\n");
return exit_failure;
}
printf ("out:\n");
for (int i=0;i<num*num;i++)
{
printf ("%d\t", Outbuffer[i]);
if (i%4==3)
printf ("\ n");
}
Status=clreleasekernel (kernel);
Status=clreleaseprogram (program);
Status=clreleasememobject (OutputBuffer);
Status=clreleasecommandqueue (Commendqueue);
Status=clreleasecontext (context);
Free (devices);
Delete Outbuffer;
System ("pause");
return 0;
}
hello_world.cl
/* Please Write the OpenCL Kernel (s) Code here*/
__kernel void Hello_world (__global uint *buffer)
{
size_t gidx=get_global_id (0);
size_t gidy=get_global_id (1);
size_t lidx=get_global_id (0);
Buffer[gidx+16*gidy]=gidx+16*gidy;
}
Compiling g++ hello_world.cpp-i $AMDAPPSDKROOT/include-l $AMDAPPSDKROOT/lib/x86_64-lopencl-o Hello_world
Execution./hello_world
Caffe Installation
First, install the build-essential
sudo apt-get install build-essential
If the essential package is unavailable, sudo apt-get update. This problem does not occur with the replacement of the software source.
Second, installation Clblas
Download Https://github.com/clMathLibraries/clBLAS
Extract
Cd CLBLAS-MASTER/SRC
Reference https://travis-ci.org/clMathLibraries/clBLAS/jobs/128050473#L434
{
Export Debian_frontend=noninteractive
git clone--depth=50--branch=develop https://github.com/clMathLibraries/clBLAS.git Clmathlibraries/clblas
CD Clmathlibraries/clblas
Export Debian_frontend=noninteractive
SUDO-E apt-add-repository-y "Ppa:kubuntu-ppa/backports"
SUDO-E apt-add-repository-y "Ppa:boost-latest/ppa"
SUDO-E apt-get-yq Update &>> ~/apt-get-update.log
Sudo-e apt-get-yq--no-install-suggests--no-install-recommends--force-yes Install Gfortran cmake Libboost-program-options1.55-dev
$ Export Clblas_root=${travis_build_dir}/bin/make/release
$ Export OPENCL_REGISTRY=HTTPS://WWW.KHRONOS.ORG/REGISTRY/CL
$ Export OPENCL_ROOT=${TRAVIS_BUILD_DIR}/BIN/OPENCL
Export cxx=g++
Export CC=GCC
Mkdir-p ${clblas_root}
pushd ${clblas_root}
Cmake-dcmake_build_type=release-dbuild_test=off-dbuild_client=off-docl_version=2.0-dopencl_root=${opencl_root} ${travis_build_dir}/src
Make Package
}
You may need to install the FORTRAN compiler sudo apt-get install Gfortran
ACML may require
{
Download ACML
http://developer.amd.com/tools-and-sdks/archive/amd-core-math-library-acml/acml-downloads-resources/
Installing ACML
TAR-ZXVF acml-5-3-1-gfortran-64bit.tgz
sudo sh install-acml-5-3-1-gfortran-64bit.sh
}
third, installation OpenCV
Reference: Http://www.tuicool.com/articles/nYJrYra
Http://docs.opencv.org/3.0-last-rst/doc/tutorials/introduction/linux_install/linux_install.html
3.1 Libraries required to install OPENCV
GCC 4.4.x or later
CMake 2.6 or higher
Git
gtk+2.x or higher, including headers (Libgtk2.0-dev)
Pkg-config
Python 2.6 or later and Numpy 1.5 or later with developer packages (Python-dev, python-numpy)
FFmpeg or LIBAV development Packages:libavcodec-dev, Libavformat-dev, Libswscale-dev
[Optional] libtbb2 Libtbb-dev
[Optional] libdc1394 2.x
[optional] Libjpeg-dev, Libpng-dev, Libtiff-dev, Libjasper-dev, Libdc1394-22-dev
[Compiler] sudo apt-get install build-essential
[Required] sudo apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev Libswscale-dev
[Optional] sudo apt-get install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-d EV Libdc1394-22-dev
3.2 Official website Download the latest OPENCV source code
3.3 Compiling OpenCV
A. Place opencv-3.0.0.zip in any directory, unzip unzip Opencv-3.0.0.zip
B. Create a compiled directory, compile
CD ~/opencv-3.0.0
mkdir Release
CD release
Percent of the first run the next time there is a problem: prompt to download ippicv_linux_20141027.tgz, and then not download
Percent, refer to: http://my.oschina.net/u/1046919/blog/479947, download the file independently, and then replace
Percent of the original file (originally only 14m, downloaded 28m)
Percent Download Address http://sourceforge.net/projects/opencvlibrary/files/3rdparty/ippicv/
Percent-to-replace file path ~/opencv-3.0.0/3rdparty/ippicv/downloads/linux-8b449a536a2157bcad08a2b9f266828b
Cmake-d cmake_build_type=release-d cmake_install_prefix=/usr/local.
Make–j4
The number behind the percent J is best to select the number of cores of the computer
sudo make install
3.4 Test OpenCV "not necessary"
3.4.1 Creating a working directory
mkdir ~/opencv-lena
CD ~/opencv-lena
Gedit DisplayImage.cpp
3.4.2 Edit the following code
#include <stdio.h>
#include <opencv2/opencv.hpp>
using namespace CV;
int main (int argc, char** argv)
{
if (argc! = 2)
{
printf ("Usage:DisplayImage.out <image_path>\n");
return-1;
}
Mat image;
Image = Imread (argv[1], 1);
if (!image.data)
{
printf ("No image data \ n");