Recently, we used Intel's media SDK to play H264 videos. We found that the frames transcoded by intel's SDK are in the NV12 pixel format and must be converted to the RGB32 format by ourselves. Although this type of code looks for a lot on the Internet, you can do it yourself.
Google's conversion formula:
C = Y-16
D = U-128
E = V-128
R = clip (round (1.164383 * C + 1.596027 * E ))
G = clip (round (1.164383 * C-(0.391762 * D)-(0.812968 * E )))
B = clip (round (1.164383 * C + 2.017232 * D ))
I studied how to use SSE2 for an hour. I tried to debug the code once, and I was in a good mood. I pasted the core code.
_ M128i c0 = _ mm_setzero_si128 ();
_ M128i c128 = _ mm_setpaiepi16 (128 );
_ M128i c128_32 = _ mm_setpaiepi32 (128 );
_ M128i c16 = _ mm_setpaiepi16 (16 );
_ M128i c255 = _ mm_setpaiepi16 (255 );
_ M128i c_1_1596 = _ mm_set1_epi32 (0x199012a );
_ M128i c_1_2017 = _ mm_set1_epi32 (0x204012a );
_ M128i c_0_392 = _ mm_set1_epi32 (0xff9c0000 );
_ M128i c_0000813 = _ mm_set0000epi32 (0xff30012a );
For (int y = 0; y <src. Height; y ++)
{
BYTE * dest = (BYTE *) data. Scan0 + data. Stride * y;
BYTE * srcY = src. Y + src. Pitch * y;
BYTE * srcUV = src. UV + src. Pitch * (y/2 );
For (int x = 0; x <src. Width; x + = 4)
{
// Y0Y1Y2Y30000-16
_ M128i Ymm = _ mm_sub_epi16 (_ mm_unpacklo_epi8 (_ mm_cvtsi32_si128 (* (int *) (srcY + x), c0), c16 );
// U0V0U2V20000-128
_ M128i UVmm = _ mm_sub_epi16 (_ mm_unpacklo_epi8 (_ mm_cvtsi32_si128 (* (int *) (srcUV + x), c0), c128 );
// U0U0U2U20000
_ M128i Umm = _ mm_shufflelo_epi16 (UVmm, _ MM_SHUFFLE (2, 2, 0, 0 ));
// V0V0V2V20000
_ M128i Vmm = _ mm_shufflelo_epi16 (UVmm, _ MM_SHUFFLE (3, 3, 1, 1 ));
// Y0V0Y1V0Y2V2Y3V2
_ M128i YVmm = _ mm_unpacklo_epi16 (Ymm, Vmm );
// Y0U0Y1U0Y2U2Y3U2
_ M128i YUmm = _ mm_unpacklo_epi16 (Ymm, Umm );
_ M128i Rmm = _ mm_srai_epi32 (_ mm_add_epi32 (_ mm_madd_epi16 (YVmm, c_00001596), c128_32), 8 );
_ M128i Bmm = _ mm_srai_epi32 (_ mm_add_epi32 (_ mm_madd_epi16 (YUmm, c_1_2017), c128_32), 8 );
_ M128i Gmm = _ second (_ mm_madd_epi16 (YVmm, c_1_813), _ mm_madd_epi16 (YUmm, c_0_392), c128_32), 8 );
Rmm = _ mm_slli_epi32 (_ mm_and_si128 (Rmm, _ mm_cmpgt_epi32 (Rmm, c0), 16 );
Bmm = _ mm_and_si128 (Bmm, _ mm_cmpgt_epi32 (Bmm, c0 ));
Gmm = _ mm_slli_epi32 (_ mm_min_epi16 (_ mm_and_si128 (Gmm, _ mm_cmpgt_epi32 (Gmm, c0), c255), 8 );
* (_ M128i *) dest = _ mm_or_si128 (_ mm_min_epi16 (_ mm_or_si128 (Rmm, Bmm), c255), Gmm );
Dest + = 16;
}
}