Today, I tried to write a camera Application Based on v4l2. Currently, I only want to capture the video from the camera and then display it locally. as shown in the preceding figure, the small dot on the left is the pre-supervised window, and the compression on the right is x264, tcp transmission, libavcodec decompression, and qt display ., low latency and low latency: mainly involves the following knowledge points: 1.
Today, I tried to write a camera Application Based on v4l2. Currently, I only want to capture the video from the camera and then display it locally. as shown in the preceding figure, the small dot on the left is the pre-supervised window, and the compression on the right is x264, tcp transmission, libavcodec decompression, and qt display ., low latency and low latency: mainly involves the following knowledge points: 1.
Today, I tried to write a camera Application Based on v4l2. Currently, I only want to capture the video from the camera and then display it locally.
As shown in the preceding figure, the small dot on the left is the pre-supervised window, and the compression on the right is x264, tcp transmission, libavcodec decompression, and qt display. The delay is very low and low :)
It mainly involves the following knowledge points:
1. v4l2 interface:
2. Local echo of X11:
3. Use libswscale for stretching:
4. Use libx264 for compression:
1. v4l2 interface: As you can see, VIDIOC_XXXX is actually not very clear. The general process is as follows:
Capture_open (name)
Open/dev/video0 // open the device
Check driver caps // check some caps
VIDIOC_REQBUFS // use streaming mode, mmap mode, assign
VIDIOC_QUERYBUF // obtain the allocated buf and map the mmap to the process space.
Mmap
VIDIOC_QBUF // buf Column
VIDIOC_STREAMON // start
Data Structure Used
Struct Buffer {void * start; size_t length ;}; typedef struct Buffer; struct Ctx {int vid; int width, height; // output image size struct SwsContext * sws; // used to convert int rows; // used to sws_scale () int bytesperrow; // used to cp to pic_srcAVPicture pic_src, pic_target; // used to sws_scaleBuffer bufs [2]; // For mmap}; typedef struct Ctx;
Capture_open (...) Open the device
Void * capture_open (const char * dev_name, int t_width, int t_height) {int id = open (dev_name, O_RDWR); if (id <0) return 0; ctx * ctx = new Ctx; ctx-> vid = id; // to query capsv4l2_capability caps; ioctl (id, VIDIOC_QUERYCAP, & caps); if (caps. capabilities & V4L2_CAP_VIDEO_CAPTURE) {if (caps. capabilities & V4L2_CAP_READWRITE) {// TODO :...} if (caps. capabilities & V4L2_CAP_STREAMING) {// check whether MMAP or USER is supported PTRv4l2_requestbuffers bufs; memset (& bufs, 0, sizeof (bufs); bufs. count = 2; bufs. type = V4L2_BUF_TYPE_VIDEO_CAPTURE; bufs. memory = V4L2_MEMORY_MMAP; if (ioctl (id, VIDIOC_REQBUFS, & bufs) <0) {fprintf (stderr, "% s: don't support MEMORY_MMAP mode! /N ", _ func _); close (id); delete ctx; return 0;} fprintf (stderr," % s: using MEMORY_MMAP mode, buf cnt = % d/n ", _ func __, bufs. count); // mmapfor (int I = 0; I <2; I ++) {v4l2_buffer buf; memset (& buf, 0, sizeof (buf); buf. type = bufs. type; buf. memory = bufs. memory; if (ioctl (id, VIDIOC_QUERYBUF, & buf) <0) {fprintf (stderr, "% s: VIDIOC_QUERYBUF ERR/n", _ func __); close (id); delete ctx; return 0;} ctx-> bufs [I]. leng Th = buf. length; ctx-> bufs [I]. start = mmap (0, buf. length, PROT_READ | PROT_WRITE, MAP_SHARED, id, buf. m. offset) ;}} else {fprintf (stderr, "% s: can't support read ()/write () mode and streaming mode/n ", _ func _); close (id); delete ctx; return 0 ;}} else {fprintf (stderr, "% s: can't support video capture! /N ", _ func _); close (id); delete ctx; return 0;} int rc; // enum all support image fmtv4l2_fmtdesc fmt_desc; uint32_t index = 0; // It seems that plane fmt is not supported. Use yuyv directly, and then use libswscale to convert # if 0do {fmt_desc.index = index; fmt_desc.type = disabled; rc = ioctl (id, VIDIOC_ENUM_FMT, & fmt_desc); if (rc> = 0) {fprintf (stderr, "/t support % s/n", fmt_desc.description);} index ++ ;} while (rc> = 0); # endif // 0v4l2_format fmt; fmt. type = V4L2_BUF_TYPE_VIDEO_CAPTURE; rc = ioctl (id, VIDIOC_G_FMT, & fmt); if (rc <0) {fprintf (stderr, "% s: can't VIDIOC_G_FMT... /n ", _ func _); return 0;} PixelFormat pixfmt = PIX_FMT_NONE; switch (fmt. fmt. pix. pixelformat) {case when: pixfmt = PIX_FMT_YUYV422; break;} if (pixfmt = PIX_FMT_NONE) {fprintf (stderr, "% s: can't support % 4 s/n ", _ func __, (char *) & fmt. fmt. pix. pixelformat); return 0;} // constructor fprintf (stderr, "capture_width = % d, height = % d, stride = % d/n", fmt. fmt. pix. width, fmt. fmt. pix. height, fmt. fmt. pix. bytesperline); ctx-> width = t_width; ctx-> height = t_height; ctx-> sws = sws_getContext (fmt. fmt. pix. width, fmt. fmt. pix. height, pixfmt, ctx-> width, ctx-> height, PIX_FMT_YUV420P, // PIX_FMT_YUV420P corresponds to rows, 0, 0, 0); ctx-> rows = fmt. fmt. pix. height; ctx-> bytesperrow = fmt. fmt. pix. bytesperline; avpicture_alloc (& ctx-> pic_target, PIX_FMT_YUV420P, ctx-> width, ctx-> height); // queue buffor (int I = 0; I <sizeof (ctx-> bufs)/sizeof (Buffer); I ++) {v4l2_buffer buf; memset (& buf, 0, sizeof (buf); buf. type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf. memory = V4L2_MEMORY_MMAP; buf. index = I; if (ioctl (id, VIDIOC_QBUF, & buf) <0) {fprintf (stderr, "% s: VIDIOC_QBUF err/n", _ func __); exit (-1) ;}} int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; if (ioctl (id, VIDIOC_STREAMON, & type) <0) {fprintf (stderr, "% s: VIDIOC_STREAMON err/n ", _ func _); exit (-1);} return ctx ;}
Capture_get_pic ()
VIDIOC_DQBUF // column,
Sws_scale // format conversion/stretch to PIX_FMT_YUV420P for convenient Compression
VIDIOC_QBUF // re-import the column
Capture_get_picture (...) get an image from the camera
Int capture_get_picture (void * id, Picture * pic) {// obtain, convert Ctx * ctx = (Ctx *) id; v4l2_buffer buf; memset (& buf, 0, sizeof (buf); buf. type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf. memory = V4L2_MEMORY_MMAP; if (ioctl (ctx-> vid, VIDIOC_DQBUF, & buf) <0) {fprintf (stderr, "% s: VIDIOC_DQBUF err/n ", _ func _); return-1;} // _ save_pic (ctx-> bufs [buf. index]. start, buf. length); // _ asm ("int $3"); ctx-> pic_src.data [0] = (unsigned char *) ctx-> bufs [buf. index]. start; ctx-> pic_src.data [1] = ctx-> pic_src.data [2] = ctx-> pic_src.data [3] = 0; ctx-> pic_src.linesize [0] = ctx-> bytesperrow; ctx-> pic_src.linesize [1] = ctx-> pic_src.linesize [2] = ctx-> pic_src.linesize [3] = 0; // sws_scaleint rs = sws_scale (ctx-> sws, ctx-> pic_src.data, ctx-> pic_src.linesize, 0, ctx-> rows, ctx-> pic_target.data, ctx-> pic_target.linesize ); // outfor (int I = 0; I <4; I ++) {pic-> data [I] = ctx-> pic_target.data [I]; pic-> stride [I] = ctx-> pic_target.linesize [I];} // re queue bufif (ioctl (ctx-> vid, VIDIOC_QBUF, & buf) <0) {fprintf (stderr, "% s: VIDIOC_QBUF err/n", _ func _); return-1;} return 1 ;}
2. Local echo of X11: XShm is used, and the efficiency is good.
Vs_open ()
XOpenDisplay ()
XCreateSimpleWindow ()
XCreateGC ()
XMapWindow ()
XShmCreateImage ()
Shmget ()
Shmat ()
Data Structure Used
Struct Ctx {Display * display; int screen; Window window Window; GC; XVisualInfo vinfo; XImage * image; XShmSegmentInfo segment; SwsContext * sws; PixelFormat target_pixfmt; AVPicture pic_target; int v_width, v_height; int curr_width, curr_height;}; typedef struct Ctx;
Vs_open (...) Open the device
Void * vs_open (int v_width, int v_height) {Ctx * ctx = new Ctx; ctx-> v_width = v_width; ctx-> v_height = v_height; // windowctx-> display = XOpenDisplay (0); ctx-> window = XCreateSimpleWindow (ctx-> display, RootWindow (ctx-> display, 0), 100,100, v_width, v_height, 0, BlackPixel (ctx-> display, 0), WhitePixel (ctx-> display, 0); ctx-> screen = 0; ctx-> gc = XCreateGC (ctx-> display, ctx-> window, 0, 0); XMapWindow (ctx-> di Splay, ctx-> window); // current screen pix fmtWindow root; unsigned int cx, cy, border, depth; int x, y; XGetGeometry (ctx-> display, ctx-> window, & root, & x, & y, & cx, & cy, & border, & depth); // visual infoXMatchVisualInfo (ctx-> display, ctx-> screen, depth, DirectColor, & ctx-> vinfo); // imagectx-> image = XShmCreateImage (ctx-> display, ctx-> vinfo. visual, depth, ZPixmap, 0, & ctx-> segment, cx, cy); if (! Ctx-> image) {fprintf (stderr, "% s: can't XShmCreateImage! /N ", _ func _); exit (-1);} ctx-> segment. shmid = shmget (IPC_PRIVATE, ctx-> image-> bytes_per_line * ctx-> image-> height, IPC_CREAT | 0777); if (ctx-> segment. shmid <0) {fprintf (stderr, "% s: shmget err/n", _ func _); exit (-1);} ctx-> segment. shmaddr = (char *) shmat (ctx-> segment. shmid, 0, 0); if (ctx-> segment. shmaddr = (char *)-1) {fprintf (stderr, "% s: shmat err/n", _ func _); exit (-1 );} ctx-> image-> data = ctx-> segment. shmaddr; ctx-> segment. readOnly = 0; XShmAttach (ctx-> display, & ctx-> segment); PixelFormat target_pix_fmt = PIX_FMT_NONE; switch (ctx-> image-> bits_per_pixel) {case 32: target_pix_fmt = break; case 24: target_pix_fmt = PIX_FMT_RGB24; break; default: break;} if (target_pix_fmt = PIX_FMT_NONE) {fprintf (stderr, "% s: screen depth format err/n ", _ func _); delete ctx; return 0 ;}// swsctx-> target_pixfmt = target_pix_fmt; ctx-> curr_width = cx; ctx-> curr_height = cy; ctx-> sws = sws_getContext (v_width, v_height, PIX_FMT_YUV420P, cx, cy, target_pix_fmt, SWS_FAST_BILINEAR, 0, 0, 0 ); avpicture_alloc (& ctx-> pic_target, target_pix_fmt, cx, cy); XFlush (ctx-> display); return ctx ;}
Vs_show ()
Sws_scale () // stretch to the current window size, conversion format
XShmPutImage () // display, haha, really simple
The main code of vs_show (...) is to handle window changes.
Int vs_show (void * ctx, unsigned char * data [4], int stride [4]) {// The first choice is to check whether sws is valid, determines Ctx * c = (Ctx *) ctx; Window root; int x, y; unsigned int cx, cy, border, depth; XGetGeometry (c-> display, c-> window, & root, & x, & y, & cx, & cy, & border, & depth); if (cx! = C-> curr_width | cy! = C-> curr_height) {avpicture_free (& c-> pic_target); sws_freeContext (c-> sws); c-> sws = sws_getContext (c-> v_width, c-> v_height, weight, cx, cy, c-> target_pixfmt, SWS_FAST_BILINEAR, 0, 0, 0); avpicture_alloc (& c-> pic_target, c-> target_pixfmt, cx, cy); c-> curr_width = cx; c-> curr_height = cy; // re create imageXShmDetach (c-> display, & c-> segment ); shmdt (c-> segment. shmaddr); shmctl (c-> segment. shmid, IPC_RMID, 0); XDestroyImage (c-> image); c-> image = XShmCreateImage (c-> display, c-> vinfo. visual, depth, ZPixmap, 0, & c-> segment, cx, cy); c-> segment. shmid = shmget (IPC_PRIVATE, c-> image-> bytes_per_line * c-> image-> height, IPC_CREAT | 0777); c-> segment. shmaddr = (char *) shmat (c-> segment. shmid, 0, 0); c-> image-> data = c-> segment. shmaddr; c-> segment. readOnly = 0; XShmAttach (c-> display, & c-> segment);} // sws_scale (c-> sws, data, stride, 0, c-> v_height, c-> pic_target.data, c-> pic_target.linesize); // cp to imageunsigned char * p = c-> pic_target.data [0], * q = (unsigned char *) c-> image-> data; int xx = MIN (c-> image-> bytes_per_line, c-> pic_target.linesize [0]); for (int I = 0; I <c-> curr_height; I ++) {memcpy (q, p, xx); p + = c-> image-> bytes_per_line; q + = c-> pic_target.linesize [0];} // display the image to XShmPutImage (c-> display, c-> window, c-> gc, c-> image, 0, 0, 0, 0, c-> curr_width, c-> curr_height, 1); return 1 ;}
3. libswscale: used for picture format/size conversion. The cpu usage is quite high :). It is easy to use, basically
Sws = sws_getContext (....);
Sws_scale (sws ,...)
4. libx264 compression: it is mainly used for interaction, so preset = fast, tune = zerolatency, 320x240, 10fps, 300 kbps, jj test delay is very low, less than 100 ms
Data Structure Used
Struct Ctx {x264_t * x264; x264_picture_t picture; x264_param_t param; void * output; // The int output_bufsize, output_datasize; int64_t pts; // input ptsint64_t (* get_pts) (struct Ctx *); int64_t info_pts, info_dts; int info_key_frame; int info_valid ;};
Vc_open (...) sets necessary parameters to enable Encoder
Void * vc_open (int width, int height) {Ctx * ctx = new Ctx; // set the encoding attribute // x1__param_default (& ctx-> param ); x1__param_default_preset (& ctx-> param, "fast", "zerolatency"); ctx-> param. I _width = width; ctx-> param. I _height = height; ctx-> param. B _repeat_headers = 1; // repeat SPS/PPS before the key frame ctx-> param. B _cabac = 1; ctx-> param. I _fps_num = 10; ctx-> param. I _fps_den = 1; ctx-> param. I _keyint_max = 30; ctx-> param. I _keyint_min = 1 0; // rcctx-> param. rc. I _rc_method = x1__rc_crf; ctx-> param. rc. I _bitrate = 300; // ctx-> param. rc. f_rate_tolerance = 0.1; // ctx-> param. rc. I _vbv_max_bitrate = ctx-> param. rc. I _bitrate * 1.3; // ctx-> param. rc. f_rf_constant = 600; // ctx-> param. rc. f_rf_constant_max = ctx-> param. rc. f_rf_constant * 1.3; # ifdef DEBUGctx-> param. I _log_level = x1__log_warning; # elsectx-> param. I _log_level = X264_LOG_NONE; # endif // rele Asectx-> x264 = x264_encoder_open (& ctx-> param); if (! Ctx-> x264) {fprintf (stderr, "% s: x264_encoder_open err/n", _ func _); delete ctx; return 0 ;} x264_picture_init (& ctx-> picture); ctx-> picture. img. I _csp = x1__csp_i420; ctx-> picture. img. I _plane = 3; ctx-> output = malloc (128*1024); ctx-> output_bufsize = 128*1024; ctx-> output_datasize = 0; ctx-> get_pts = first_pts; ctx-> info_valid = 0; return ctx ;}
Vc_compress (...) compression. If the compression succeeds, the stream is obtained.
Static int encode_nals (Ctx * c, x1__nal_t * nals, int nal_cnt) {char * pout = (char *) c-> output; c-> output_datasize = 0; for (int I = 0; I <nal_cnt; I ++) {if (c-> output_datasize + nals [I]. I _payload> c-> output_bufsize) {// extended c-> output_bufsize = (c-> output_datasize + nals [I]. I _payload + 4095)/4096*4096; c-> output = realloc (c-> output, c-> output_bufsize);} memcpy (pout + c-> output_datasize, nals [I]. p_payload, nals [I]. I _payload); c-> output_datasize + = nals [I]. I _payload;} return c-> output_datasize;} int vc_compress (void * ctx, unsigned char * data [4], int stride [4], const void ** out, int * len) {Ctx * c = (Ctx *) ctx; // set the picture data for (int I = 0; I <4; I ++) {c-> picture. img. plane [I] = data [I]; c-> picture. img. I _stride [I] = stride [I];} // encodex1__nal_t * nals; int nal_cnt; x1__picture_t pic_out; c-> picture. I _pts = c-> g Et_pts (c); # ifdef DEBUG_MOREstatic int64_t _ last_pts = c-> picture. I _pts; fprintf (stderr, "DBG: pts delta = % lld/n", c-> picture. I _pts-_ last_pts); _ last_pts = c-> picture. I _pts; # endif // x264_picture_t * pic = & c-> picture; do {// here we try to consume delayed frames ??? // When zerolatency preset is used, the result is good enough: int rc = x1__encoder_encode (c-> x264, & nals, & nal_cnt, pic, & pic_out); if (rc <0) return-1; encode_nals (c, nals, nal_cnt);} while (0); * out = c-> output; * len = c-> output_datasize; if (nal_cnt> 0) {c-> info_valid = 1; c-> info_key_frame = pic_out. B _keyframe; c-> info_pts = pic_out. I _pts; c-> info_dts = pic_out. I _dts ;} else {fprintf (stderr ,". "); return 0; // continue} # ifdef DEBUG _ MOREstatic size_t _ seq = 0; fprintf (stderr, "# % lu: [% c] frame type = % d, size = % d/n", _ seq, pic_out. B _keyframe? '*': '.', Pic_out. I _type, c-> output_datasize); _ seq ++; # endif // debugreturn 1 ;}
Attached source code: Alas, the source code is constantly updated, and csdn does not have a repository like git or svn. Forget it. If anyone wants it, email it.
Main. cpp main PROCESS
Capture. cpp, capture. h get the image frame of v4l2
Vcompress. cpp vcompress. h implements x264 Compression
Vshow. cpp vsho. h: Display Real-Time Images with X11