As we all know, GPU acceleration technology has a great impact on image processing, in the previous blog in contrast to verify the GPU acceleration technology for image filtering efficiency. But GPU technology is not omnipotent, this paper compares the efficiency of GPU computing histogram is not the traditional method of computing efficiency. The table below is the comparison result, the time is averaged by running 20 times, and the corresponding comparison code is given later. From the results can be seen in the CPU histogram is more efficient operation, when the picture database training, if there are 5000 pictures to be processed, the use of CPU calculation can save about 75 minutes of time, saving time is considerable.
GPU and CPU compute histogram efficiency comparison
Way |
CPU Memory |
GPU Memory |
Efficiency |
0.855328s |
1.71659s |
The test image is shown below, and the size is 400*300, which is converted to grayscale in the pre-test program.
Using the Histeven function to calculate the histogram in GPU memory, the average run time is 1.71659s after running 20 times. The following code has header files that are not required.
#include <stdio.h> #include <tchar.h> #include <afxwin.h> #include <opencv.hpp> #include < opencv2/gpu/gpu.hpp> #include <fstream> #include <algorithm> #include "my_function.h" #include < memory> #include <vector> #define PI 3.14156#define classnum 102#define tr_num 15#define te_num 15using Namespac e std; using namespace CV; using namespace cv::gpu;int _tmain (int argc, _tchar* argv[]) {Mat img = imread ("Sunflower.jpg", 0 );//Read image file Ptr<filterengine_gpu> fdct[11][11]; Gpumat Dst_gpu, Src_gpu; Mat KD; Mat dst;double Fmin,fmax;dctfilter (kd,11);//Set up DCT transform filter for (int i=0;i<11;i++) for (int j=0;j<11;j++) fdct[i][j]= Createseparablelinearfilter_gpu (cv_8u,cv_32f, Kd.col (i), Kd.col (j));//construct OPENCV Filter class object Double T = (double) Cvgettickcount (); Src_gpu.upload (IMG);//upload data into GPU memory for (int k=0;k<20;k++) {for (int u=0;u<11;u++) for (int v=0;v <11;v++) {fdct[u][v]->apply (SRC_GPU,DST_GPU);//Perform filtering operation normalize (DST_GPU,DST_GPU,255.0,0.0,NORM_MINMAX);//filter matrix value range normalized to 0~255 Dst_gpu.convertto (dst_gpu,cv_8u);//histeven function requires cv_8u, cv_16u, or cv_16s data type Histeven (dst_ gpu,dst_gpu,10,0,255);//Calculate histogram normalize (Dst_gpu,dst_gpu,1.0,0.0,norm_minmax) in GPU memory,//normalize histogram statistic values to 0~1 dst_ Gpu.download (DST);//read data from GPU Memory}} t= (double) cvgettickcount ()-t; printf ("Run time =%gs\n", t/(Cvgettickfrequency () *1000000)/20); System ("Pause");}
UsecalchistThe function calculates the histogram in computer memory and runs 20 times with an average run time of 0.855328s. The following code has header files that are not required.
#include <stdio.h> #include <tchar.h> #include <afxwin.h> #include <opencv.hpp> #include < opencv2/gpu/gpu.hpp> #include <fstream> #include <algorithm> #include "my_function.h" #include < memory> #include <vector> #define PI 3.14156#define classnum 102#define tr_num 15#define te_num 15using Namespac e std; using namespace CV; using namespace cv::gpu;int _tmain (int argc, _tchar* argv[]) {Mat img = imread ("Sunflower.jpg", 0 );//Read image file Ptr<filterengine_gpu> fdct[11][11]; Gpumat Dst_gpu, Src_gpu; Mat KD; Mat dst;double Fmin,fmax;dctfilter (kd,11);//Set up DCT transform filter for (int i=0;i<11;i++) for (int j=0;j<11;j++) fdct[i][j]= Createseparablelinearfilter_gpu (cv_8u,cv_32f, Kd.col (i), Kd.col (j));//construct OPENCV Filter class object Double T = (double) Cvgettickcount (); Src_gpu.upload (IMG);//upload data into GPU memory for (int k=0;k<20;k++) {for (int u=0;u<11;u++) for (int v=0;v <11;v++) {fdct[u][v]->apply (SRC_GPU,DST_GPU);//Perform filtering operations Dst_gpu.download (DST);//read data from GPU Memory Minmaxidx (DST,&AMp;fmin,&fmax); Dst.convertto (dst,cv_8u,255.0/(fmax-fmin), -255.0*fmin/(fmax-fmin));//filter matrix value range normalized to 0~255 int histsize[] = {10}; Float hranges[] = {0, 256}; Const float* ranges[] = {Hranges}; Matnd hist; int channels[] = {0}; Calchist (&DST, 1, channels, Mat (),//Don't use mask hist, 1, histsize, ranges, true,//-the histogram is Unifor M false); Double maxval=0; Minmaxloc (hist, 0, &maxval, 0, 0); Hist=hist.mul (1.0f/maxval);//Normalize the histogram statistic value to 0~1}} t= (double) cvgettickcount ()-t; printf ("Run time =%gs\n", t/(Cvgettickfrequency () *1000000)/20.0); System ("Pause");}
OpenCV histogram calculation do I need GPU acceleration?