I am bored waiting for an offer in the past two days. I recall what I did more than a year ago. The MPEG4 standard RTP stream was played back locally after I received it.
I used ddraw for display. I started to debug the display first, and focused on synchronizing and receiving streams. So I used yuv2bmp.
Convert yv12 to BMP, and then 1 second 1 second paste to the window DC. Haha, you can think about the effect.) After matrix conversion, It is draw-> DC
Later, the project entered the optimization stage and decided to use the overlay of ddraw to display the yv12 video. The specific method is to refer to the "mosquito" program in dxsdk7.
Directly upload yv12 to Overlay display.
The reality of mosquitoes is yuv422. Compared with yv12, the lines between memcpy and psurf are different. Just refer to the YUV standard.
(The utility of ddraw is introduced. dxsdk7 is the last version of ddraw)
Implementation of yuv2bmp:
Void yuv2rgb_32 (uint8_t * puc_y, int stride_y,
Uint8_t * puc_u, uint8_t * puc_v, int stride_uv,
Uint8_t * puc_out, int width_y, int height_y,
Unsigned int _ stride_out)
{
/* Int X, Y;
Int stride_diff = 4 * (_ stride_out-width_y );
If (height_y <0 ){
// We are flipping our output upside-down
Height_y =-height_y;
Puc_y + = (height_y-1) * stride_y;
Puc_u + = (height_y/2-1) * stride_uv;
Puc_v + = (height_y/2-1) * stride_uv;
Stride_y =-stride_y;
Stride_uv =-stride_uv;
}
For (y = 0; y {
For (x = 0; x <width_y; X ++)
{
Signed int _ r, _ g, _ B;
Signed int R, G, B;
Signed int y, U, V;
Y = puc_y [x] + 10; //-16;
U = puc_u [x> 1]-128;
V = puc_v [x> & gt; 1]-128;
_ R = _ r (Y, U, V );
_ G = _ g (Y, U, V );
_ B = _ B (Y, U, V );
R = _ S (_ R );
G = _ S (_ g );
B = _ S (_ B );
Puc_out [0] = R;
Puc_out [1] = g;
Puc_out [2] = B;
Puc_out [3] = 0;
Puc_out + = 4;
}
Puc_y + = stride_y;
If (Y % 2 ){
Puc_u + = stride_uv;
Puc_v + = stride_uv;
}
Puc_out + = stride_diff;
}*/
//// // Intel MMX ///////////////
Int y, horiz_count;
Int stride_out = width_y <2;
If (height_y <0 ){
// We are flipping our output upside-down
Height_y =-height_y;
Puc_y + = (height_y-1) * stride_y;
Puc_u + = (height_y> 1)-1) * stride_uv;
Puc_v + = (height_y> 1)-1) * stride_uv;
Stride_y =-stride_y;
Stride_uv =-stride_uv;
}
Horiz_count =-(width_y> 3 );
For (y = 0; y
_ ASM {
Push eax
Push EBX
Push ECx
Push edX
Push EDI
MoV eax, puc_out
MoV EBX, puc_y
MoV ECx, puc_u
MoV edX, puc_v
MoV EDI, horiz_count
Horiz_loop:
Movd mm2, [ECx]
Pxor mm7, mm7
Movd mm3, [edX]
Punpcklbw mm2, mm7; mm2 = _ U3 _ U2 _ U1 _ U0
Movq mm0, [EBX]; mm0 = y7y6y5y4y3y2y1y0
Punpcklbw mm3, mm7; mm3 = _ V3 _ V2 _ V1 _ V0
Movq MM1, mmw_0x00ff; MM1 = 00ff00ff00ff00ff
Psubusb mm0, mmb_0x10; mm0-= 16
Psubw mm2, mmw_0x0080, and mm2-= 128
Pand MM1, mm0; MM1 = _ y6 _ Y4 _ Y2 _ y0
Psubw mm3, mmw_0x0080, mm3-= 128
Psllw MM1, 3; MM1 * = 8
Psrlw mm0, 8; mm0 = _ y7 _ Y5 _ Y3 _ Y1
Psllw mm2, 3; mm2 * = 8
Pmulhw MM1, mmw_mult_y; MM1 * = Luma coeff
Psllw mm0, 3; mm0 * = 8
Psllw mm3, 3; mm3 * = 8
Movq MM5, mm3; MM5 = mm3 = V
Pmulhw MM5, mmw_mult_v_r; MM5 = red chroma
Movq mm4, mm2; mm4 = mm2 = u
Pmulhw mm0, mmw_mult_y; mm0 * = Luma coeff
Movq mm7, MM1; even Luma part
Pmulhw mm2, mmw_mult_u_g; mm2 * = u green coeff
Paddsw mm7, mm5; mm7 = Luma + chroma _ R6 _ R4 _ R2 _ R0
Pmulhw mm3, mmw_mult_v_g; mm3 * = V green coeff
Packuswb mm7, mm7; mm7 = r6r4r2r0r6r4r2r0
Pmulhw mm4, mmw_mult_u_ B; mm4 = blue chroma
Paddsw MM5, mm0; MM5 = Luma + chroma _ R7 _ R5 _ r3 _ r1
Packuswb MM5, mm5; mm6 = r7r5r3r1r7r5r3r1
Paddsw mm2, mm3; mm2 = green chroma
Movq mm3, MM1; mm3 = _ y6 _ Y4 _ Y2 _ y0
Movq mm6, MM1; mm6 = _ y6 _ Y4 _ Y2 _ y0
Paddsw mm3, mm4; mm3 = Luma + chroma _ B6 _ B4 _ b2 _ B0
Paddsw mm6, mm2; mm6 = Luma + chroma _ G6 _ G4 _ G2 _ G0
Punpcklbw mm7, mm5; mm7 = r7r6r5r4r3r2r1r0
Paddsw mm2, mm0; odd Luma part plus chroma part _ G7 _ G5 _ G3 _ G1
Packuswb mm6, mm6; mm2 = g6g4g21_g6g4g2g0
Packuswb mm2, mm2; mm2 = g7g5g3161g7g5g3g1
Packuswb mm3, mm3; mm3 = b6b4b2b0b6b4b2b0
Paddsw mm4, mm0; odd Luma part plus chroma part _ B7 _ B5 _ B3 _ b1
Packuswb mm4, mm4; mm4 = b7b5b3b1b7b5b3b1
Punpcklbw mm6, mm2; mm6 = g7g6g5g4g3162g1g0
Punpcklbw mm3, mm4; mm3 = b7b6b5b4b3b2b1b0
// 32-bit shuffle ....
Pxor mm0, mm0; is this needed?
Movq MM1, mm6; MM1 = g7g6g5g4g3162g1g0
Punpcklbw MM1, mm0; MM1 = _ G3 _ G2 _ G1 _ G0
Movq mm0, mm3; mm0 = b7b6b5b4b3b2b1b0
Punpcklbw mm0, mm7; mm0 = r3b3r2b2r1b1r0b0
Movq mm2, mm0; mm2 = r3b3r2b2r1b1r0b0
Punpcklbw mm0, MM1; mm0 = _ r1g1b1 _ r01_b0
Punpckhbw mm2, MM1; mm2 = _ r%3b3 _ r2g2b2
// 32-bit save...
Movq [eax], mm0; eax [0] = _ r1g1b1 _ r01_b0
Movq MM1, mm6; MM1 = g7g6g5g4g3162g1g0
Movq 8 [eax], mm2; eax [8] = _ r1_3b3 _ r2g2b2
// 32-bit shuffle ....
Pxor mm0, mm0; is this needed?
Punpckhbw MM1, mm0; MM1 = _ G7 _ G6 _ G5 _ G4
Movq mm0, mm3; mm0 = b7b6b5b4b3b2b1b0
Punpckhbw mm0, mm7; mm0 = r7b7r6b6r5b5r4b4
Movq mm2, mm0; mm2 = r7b7r6b6r5b5r4b4
Punpcklbw mm0, MM1; mm0 = _ r5g5b5 _ r4g4b4
Punpckhbw mm2, MM1; mm2 = _ r7g7b7 _ r6g6b6
// 32-bit save...
Add EBX, 8; puc_y + = 8;
Add ECx, 4; puc_u + = 4;
Movq 16 [eax], mm0; eax [16] = _ r5g5b5 _ r4g4b4
Add edX, 4; puc_v + = 4;
Movq 24 [eax], mm2; eax [24] = _ r7g7b7 _ r6g6b6
// 0 1 2 3 4 5 6 7 RGB save order
Add eax, 32; puc_out + = 32
INC EDI
JNE horiz_loop
Pop EDI
Pop edX
Pop ECx
Pop EBX
Pop eax
Emms
}
Puc_y + = stride_y;
If (Y & 0x01) {// % 2 ){
Puc_u + = stride_uv;
Puc_v + = stride_uv;
}
Puc_out + = stride_out;
}
}