The mean blur principle is very easy and I will not explain it.
After I tested it, the compilation code processing time is about 1-2 ms.
The processing time of CPP Code is about 3-4 ms.
Only key code is provided here, and other code can be found here.
Void asmmeanfilter (bitmapdata * Data) {uint Height = data-> height; uint s_height = height-1; uint width = data-> width; uint s_width = width-1; PIX * P = (pix *) Data-> scan0; uint H, W, I, j; _ ASM {push ESI; push EDI; pxor mm7, mm7; MoV ESI, [P]; MoV EDI, [p]; MoV h, 1; h_loop: mov W, 1; w_loop: pxor MM1, MM1; MoV ECx, H; Dec ECx; moV I, ECx; I _loop: mov edX, W; Dec edX; MoV J, EDX; j_loop: mov eax, I; MoV EBX, width; Mul EBX; add eax, J; moV EBX, 4; // each pixel occupies 4 bytes of mul EBX; movd mm0, [ESI + eax]; punpcklbw mm0, mm7; paddw MM1, mm0; inc j; edX mov, w; add edX, 2; CMP edX, J; jnz j_loop; // end J loop Inc I; MoV ECx, H; add ECx, 2; CMP ECx, I; jnz I _loop; // end I loop movq mm2, MM1; movq mm3, MM1; psrlw MM1, 4; // each word in the register shifts four psrlw mm2, 5; psrlw mm3, 6; paddw MM1, mm2; paddw MM1, mm3; // here, we used to divide every word in MM1 by 9. I used 1/16 + 1/32 + 1/64 to simulate packuswb MM1 and mm7; // shrink the word into the byte mov eax, H; MoV EBX, width; Mul EBX; add eax, W; MoV EBX, 4; Mul EBX; movd [EDI + eax], MM1; MoV [EDI + eax + 3], 0xff; // set the image alpha channel to 255 Inc W; MoV EBX, W; cmp ebx, s_width; jnz w_loop; // end W loop Inc h; MoV eax, H; CMP eax, s_height; jnz h_loop; // end H loop pop EDI; pop ESI; Emms ;}} void cppmeanfilter (bitmapdata * Data) {pix * P = (pix *) Data-> scan0; For (uint H = 1; H <data-> height-1; ++ h) {for (uint W = 1; W <data-> width-1; ++ W) {unsigned int R, G, B; r = 0; G = 0; B = 0; For (uint I = h-1; I <= H + 1; I ++) {for (uint J = W-1; j <= W + 1; j ++) {r = P [J + I * Data-> width]. red + R; G = P [J + I * Data-> width]. green + G; B = P [J + I * Data-> width]. blue + B ;}} P [W + H * Data-> width]. red = r/9; P [W + H * Data-> width]. green = g/9; P [W + H * Data-> width]. blue = B/9 ;;}}}
Processing result: