For image processing, the most time-consuming operation should be the convolution operation step. If you have the opportunity to use C ++ for image processing projects in the future, this convolution part should be optimized. MATLAB does not need to be optimized because it is a verification algorithm. So I listed a single function in the convolution part and implemented it in assembly. I am an electronic engineering engineer, but the compilation is of course nothing.
Functions are compiled by sink, and C ++ embedded ASM assembly is used. The C ++ code with the same function is also implemented, but the comments are commented out and the comments are removed to get the same result.
The opencv function library is used to read and display images. It seems that this library has never been used since the end of the previous project. After all, it is impossible to really understand the principle of calling functions in the Library alone.
# Include <iostream> # include "CV. H "# include" highgui. H "using namespace STD; // * IMG is the image data, I _H is the image height, I _w is the image width // * m is the convolution template, m_h is the template height, m_w is the width of the template // X, and Y is the convolution at the coordinate of the image (x, y) // return the convolution value int Conv (int * IMG, int I _H, int I _w, int * m, int m_h, int M_w, int y, int X) {int re; int sum1; int sum2; int half_m_w; int half_m_h; int I; Int J; int II; int JJ; _ ASM {mov re, 0; MoV sum1, 0; MoV sum2, 0; MoV eax, M_w; // half_m_w = (m_w-1)/2; dec eax; m Ov bl, 2; Div BL; MoV ah, 0; MoV half_m_w, eax; MoV eax, m_h; // half_m_h = (m_h-1)/2; Dec eax; MoV BL, 2; Div BL; MoV ah, 0; MoV half_m_h, eax; MoV ECx, M_w; // M_w * h_h imul ECx, m_h; MoV eax, 0; label1: mov EBX, m; // for (I = 0; I <M_w * m_h; I ++) add eax, [EBX]; // {Add EBX, 4; // sum2 + = m [I]; Dec ECx; //} jnz label1; MoV sum2, eax; MoV I, 0; MoV II, 0; MoV eax, Y; // I = y-half_m_h; sub eax, half_m_h; MoV I, eax; label2: mov J, 0; MoV JJ, 0; MoV eax, X; // J = x-half_m_w; sub eax, half_m_w; MoV J, eax; label3: mov EBX, IMG; // re-assign IMG base address mov edX, m in each loop; // re-assign M base address mov eax, I; // I * I _w + J imul eax, 4; imul eax, I _w; MoV ECx, J; imul ECx, 4; add eax, ECx; add EBX, eax; MoV eax, II; // II * M_w + JJ imul eax, 4; imul eax, M_w; MoV ECx, JJ; imul ECx, 4; add eax, ECx; add edX, eax; MoV eax, [EBX]; // sum1 + = IMG [I * I _w + J] * m [II * M_w + JJ]; imul eax, [edX]; add eax, Sum1; MoV sum1, eax; MoV eax, JJ; // JJ ++ Inc eax; MoV JJ, eax; MoV eax, X; // J? <X + half_m_w add eax, half_m_w; MoV ECx, eax; sub ECx, J; MoV eax, J; // J ++ Inc eax; MoV J, eax; test ECx, ECX; jnz label3; MoV eax, II; // II ++ Inc eax; MoV II, eax; MoV eax, Y; // I? <Y + half_m_h add eax, half_m_h; MoV ECx, eax; sub ECx, I; MoV eax, I; // I ++ Inc eax; MoV I, eax; test ECx, ECX; jnz label2; MoV eax, sum1; // sum1/sum2 mov EBX, sum2; Div BL; MoV ah, 0; MoV re, eax ;} /* half_m_h = (m_h-1)/2; half_m_w = (m_w-1)/2; sum1 = 0; sum2 = 0; for (I = 0; I <M_w * m_h; I ++) {sum2 + = m [I];} for (I = y-half_m_h, II = 0; I <= Y + half_m_h; I ++, II ++) {for (j = x-half_m_w, JJ = 0; j <= x + half_m_w; j ++, JJ ++) {sum1 + = IMG [I * I _w + J] * m [II * M_w + JJ];} Re = int (sum1/sum2); */return re ;} int main () {iplimage * image; cvscalar s; image = cvloadimage ("C:/users/TC/documents/Visual Studio 2010/projects/Vm/debug/lena.jpg ", 0); int * IMG; IMG = new int [Image-> height * image-> width]; for (INT I = 0; I <image-> height; I ++) {for (Int J = 0; j <image-> width; j ++) {S = cvget2d (image, I, j ); IMG [I * image-> width + J] = (INT) s. val [0] ;}} int * m; M = new int [9]; for (INT I = 0; I <9; I ++) {M [I] = 1 ;}for (INT I = 1; I <image-> height-1; I ++) {for (Int J = 1; j <image-> width-1; j ++) {S = cvget2d (image, I, j); S. val [0] = Conv (IMG, image-> height, image-> width, M, 3, I, j); cvset2d (image, I, J, s) ;}} cvnamedwindow ("Lena", 1); cvshowimage ("Lena", image); cvwaitkey (0); cvreleaseimage (& image); cvdestroyallwindows (); delete [] IMG; Delete [] m; return 0 ;}
NOTE: If MMX is used, the SSE command works better, but I am not familiar with it.