結論: pcap預設使用mmap方式讀取包。所以不需要過多的去考慮抓包的效率問題。內部已經做得很完美了。
ref: http://www.diybl.com/course/3_program/c++/cppxl/20100408/202002_3.html
在libpcap-1.0.0中引入了zerocopy BPF,那麼這個zerocopy BPF又是什麼呢?PACKET_MMAP
查看兩個版本libpcap編譯的程式的strace的差異,除了poll之外,對於setsockopt還有一個差異:
setsockopt(4,
SOL_PACKET, PACKET_RX_RING, "\0@\0\0\376\0\0\0@ \0\0\376\0\0\0", 16) = 0
mmap2(NULL,
4161536, PROT_READ|PROT_WRITE, MAP_SHARED, 4, 0) = 0xb7a54000
從字面上來猜猜看:setsockopt設定socket的PACKET_RX_RING選項,至於這個選項是做什麼的,只能夠猜測是一個接收環形緩衝區相關的東西,具體其他的要看其他的參數了。
mmap2將一段核心空間地址映射到使用者空間,這樣使用者空間就可以直接操作核心緩衝區中的資料了,至於核心緩衝區中的資料如何來的,就是所謂的zerocopy BPF底層實現的了。
這個zerocopy叫做PACKET_MMAP,之前也叫做PACKET_RING,查看kernel的config檔案的話是:
CONFIG_PACKET_MMAP=y【viktor:3.1版本核心裡面沒有這個選項。
packet: Kill CONFIG_PACKET_MMAP. Early on this was an experimental facility that few people other than Alexey Kuznetsov played with. Now it's a pretty fundamental thing and as people add more features to AF_PACKET sockets this config options creates ifdef spaghetti. So kill it off.
】
以前的時候有一個專門的PACKET_MMAP版本的libpcap,但是在libpcap-1.0.0中已經增加了部分平台的PACKET_MMAP/PACKET_RING支援。
以上為轉載。
源碼分析:libpcap 1.3.0-pre-git
pcap_open_live調用的……pcap-linux.c: pcap_activate_linux
static intpcap_activate_linux(pcap_t *handle){ const char *device; int status = 0;//viktor: 首先設定成普通的檔案讀取方式。 device = handle->opt.source; handle->inject_op = pcap_inject_linux; handle->setfilter_op = pcap_setfilter_linux; handle->setdirection_op = pcap_setdirection_linux; handle->set_datalink_op = NULL; /* can't change data link type */ handle->getnonblock_op = pcap_getnonblock_fd; handle->setnonblock_op = pcap_setnonblock_fd; handle->cleanup_op = pcap_cleanup_linux; handle->read_op = pcap_read_linux; handle->stats_op = pcap_stats_linux;...... /* * viktor:activate_new啟用 PF_PACKET 串連。老版本核心是 SOCK_PACKET,維持相容性.把判錯代碼都刪掉了 */ status = activate_new(handle); if (status < 0) { goto fail; } if (status == 1) { /* * Success. * Try to use memory-mapped access. */ switch (activate_mmap(handle, &status)) { // <<-----------------------------這裡 case 1: /* * We succeeded. status has been * set to the status to return, * which might be 0, or might be * a PCAP_WARNING_ value. */ return status; case 0: /* * Kernel doesn't support it - just continue * with non-memory-mapped access. */ break;......
pcap-linux.c: activate_mmap
#ifdef HAVE_PACKET_RING/* * viktor: 成功返回1.不支援返回0.出錯返回-1 以下代碼把判錯的部分都刪掉了。 */static intactivate_mmap(pcap_t *handle, int *status){ int ret; handle->md.oneshot_buffer = malloc(handle->snapshot); if (handle->opt.buffer_size == 0) { /* by default request 2M for the ring buffer */ handle->opt.buffer_size = 2*1024*1024; } ret = prepare_tpacket_socket(handle); ret = create_ring(handle, status); // <<-----------------------------這裡 /* * Success. 把操作都改成mmap操作...... */ handle->read_op = pcap_read_linux_mmap; handle->cleanup_op = pcap_cleanup_linux_mmap; handle->setfilter_op = pcap_setfilter_linux_mmap; handle->setnonblock_op = pcap_setnonblock_mmap; handle->getnonblock_op = pcap_getnonblock_mmap; handle->oneshot_callback = pcap_oneshot_mmap; handle->selectable_fd = handle->fd; return 1;}#else /* HAVE_PACKET_RING */static intactivate_mmap(pcap_t *handle _U_, int *status _U_){ return 0;}#endif /* HAVE_PACKET_RING */
pcap-linux.c: create_ring
/* * Attempt to set up memory-mapped access. *...... */static intcreate_ring(pcap_t *handle, int *status){ unsigned i, j, frames_per_block; struct tpacket_req req; socklen_t len; unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; unsigned int frame_size; /* * Start out assuming no warnings or errors. */ *status = 0;.....................Line #3471 req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size; /* compute the minumum block size that will handle this frame. * The block has to be page size aligned. * The max block size allowed by the kernel is arch-dependent and * it's not explicitly checked here. */ req.tp_block_size = getpagesize(); while (req.tp_block_size < req.tp_frame_size) req.tp_block_size <<= 1; frames_per_block = req.tp_block_size/req.tp_frame_size;.....................Line #3588
retry: req.tp_block_nr = req.tp_frame_nr / frames_per_block; /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */ req.tp_frame_nr = req.tp_block_nr * frames_per_block; if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING, //<<------------------------這裡 (void *) &req, sizeof(req))) { if ((errno == ENOMEM) && (req.tp_block_nr > 1)) { /* * Memory failure; try to reduce the requested ring * size. * * We used to reduce this by half -- do 5% instead. * That may result in more iterations and a longer * startup, but the user will be much happier with * the resulting buffer size. */ if (req.tp_frame_nr < 20) req.tp_frame_nr -= 1; else req.tp_frame_nr -= req.tp_frame_nr/20; goto retry; } if (errno == ENOPROTOOPT) { /* * We don't have ring buffer support in this kernel. */ return 0; } snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "can't create rx ring on packet socket: %s", pcap_strerror(errno)); *status = PCAP_ERROR; return -1; } /* memory map the rx ring */ handle->md.mmapbuflen = req.tp_block_nr * req.tp_block_size; handle->md.mmapbuf = mmap(0, handle->md.mmapbuflen, // <<-----------------------------這裡 PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0); if (handle->md.mmapbuf == MAP_FAILED) { snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "can't mmap rx ring: %s", pcap_strerror(errno)); /* clear the allocated ring on error*/ destroy_ring(handle); *status = PCAP_ERROR; return -1; } /* allocate a ring for each frame header pointer*/ handle->cc = req.tp_frame_nr; handle->buffer = malloc(handle->cc * sizeof(union thdr *)); if (!handle->buffer) { snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "can't allocate ring of frame headers: %s", pcap_strerror(errno)); destroy_ring(handle); *status = PCAP_ERROR; return -1; } /* fill the header ring with proper frame ptr*/ handle->offset = 0; for (i=0; imd.mmapbuf[i*req.tp_block_size]; for (j=0; joffset) { RING_GET_FRAME(handle) = base; base += req.tp_frame_size; } } handle->bufsize = req.tp_frame_size; handle->offset = 0; return 1;}
我自己的strace結果:
bind(3, {sa_family=AF_PACKET, proto=0x03, if3, pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0setsockopt(3, SOL_PACKET, PACKET_AUXDATA, [1], 4) = 0getsockopt(3, SOL_PACKET, PACKET_HDRLEN, [28], [4]) = 0setsockopt(3, SOL_PACKET, PACKET_VERSION, [1], 4) = 0setsockopt(3, SOL_PACKET, PACKET_RESERVE, [4], 4) = 0setsockopt(3, SOL_PACKET, PACKET_RX_RING, {block_size=131072, block_nr=31, frame_size=65600, frame_nr=31}, 16) = 0mmap2(NULL, 4063232, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0xb714d000 //<<----------------這裡write(2, "capture_short.cpp:42: Start capt"..., 47capture_short.cpp:42: Start capture on ��...) = 47
之後就是抓包-讀時間-輸出的迴圈:
poll([{fd=3, events=POLLIN}], 1, 1000) = 1 ([{fd=3, revents=POLLIN}])stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=834, ...}) = 0write(2, "capture_short.cpp:46: 10:16:29,4"..., 49capture_short.cpp:46: 10:16:29,483605 us, len:79) = 49