libpcap steps (3) pcap記憶體映射分析

來源:互聯網
上載者:User

結論: pcap預設使用mmap方式讀取包。所以不需要過多的去考慮抓包的效率問題。內部已經做得很完美了。

ref: http://www.diybl.com/course/3_program/c++/cppxl/20100408/202002_3.html

在libpcap-1.0.0中引入了zerocopy BPF,那麼這個zerocopy BPF又是什麼呢?PACKET_MMAP

查看兩個版本libpcap編譯的程式的strace的差異,除了poll之外,對於setsockopt還有一個差異:
setsockopt(4,
SOL_PACKET, PACKET_RX_RING, "\0@\0\0\376\0\0\0@ \0\0\376\0\0\0", 16) = 0
mmap2(NULL,
4161536, PROT_READ|PROT_WRITE, MAP_SHARED, 4, 0) = 0xb7a54000
從字面上來猜猜看:setsockopt設定socket的PACKET_RX_RING選項,至於這個選項是做什麼的,只能夠猜測是一個接收環形緩衝區相關的東西,具體其他的要看其他的參數了。
mmap2將一段核心空間地址映射到使用者空間,這樣使用者空間就可以直接操作核心緩衝區中的資料了,至於核心緩衝區中的資料如何來的,就是所謂的zerocopy BPF底層實現的了。

這個zerocopy叫做PACKET_MMAP,之前也叫做PACKET_RING,查看kernel的config檔案的話是:
CONFIG_PACKET_MMAP=y【viktor:3.1版本核心裡面沒有這個選項。

packet: Kill CONFIG_PACKET_MMAP.        Early on this was an experimental facility that few    people other than Alexey Kuznetsov played with.        Now it's a pretty fundamental thing and as people add    more features to AF_PACKET sockets this config options    creates ifdef spaghetti.        So kill it off.

以前的時候有一個專門的PACKET_MMAP版本的libpcap,但是在libpcap-1.0.0中已經增加了部分平台的PACKET_MMAP/PACKET_RING支援。


以上為轉載。

源碼分析:libpcap 1.3.0-pre-git

pcap_open_live調用的……pcap-linux.c: pcap_activate_linux

static intpcap_activate_linux(pcap_t *handle){        const char      *device;        int             status = 0;//viktor: 首先設定成普通的檔案讀取方式。        device = handle->opt.source;        handle->inject_op = pcap_inject_linux;        handle->setfilter_op = pcap_setfilter_linux;        handle->setdirection_op = pcap_setdirection_linux;        handle->set_datalink_op = NULL; /* can't change data link type */        handle->getnonblock_op = pcap_getnonblock_fd;        handle->setnonblock_op = pcap_setnonblock_fd;        handle->cleanup_op = pcap_cleanup_linux;        handle->read_op = pcap_read_linux;        handle->stats_op = pcap_stats_linux;......        /*         * viktor:activate_new啟用 PF_PACKET 串連。老版本核心是 SOCK_PACKET,維持相容性.把判錯代碼都刪掉了         */        status = activate_new(handle);        if (status < 0) {                goto fail;        }        if (status == 1) {                /*                 * Success.                 * Try to use memory-mapped access.                 */                switch (activate_mmap(handle, &status)) {            //  <<-----------------------------這裡                case 1:                        /*                         * We succeeded.  status has been                         * set to the status to return,                         * which might be 0, or might be                         * a PCAP_WARNING_ value.                         */                        return status;                case 0:                        /*                         * Kernel doesn't support it - just continue                         * with non-memory-mapped access.                         */                        break;......

pcap-linux.c: activate_mmap

#ifdef HAVE_PACKET_RING/* * viktor: 成功返回1.不支援返回0.出錯返回-1 以下代碼把判錯的部分都刪掉了。 */static intactivate_mmap(pcap_t *handle, int *status){        int ret;        handle->md.oneshot_buffer = malloc(handle->snapshot);        if (handle->opt.buffer_size == 0) {                /* by default request 2M for the ring buffer */                handle->opt.buffer_size = 2*1024*1024;        }        ret = prepare_tpacket_socket(handle);        ret = create_ring(handle, status);                         //  <<-----------------------------這裡        /*         * Success.  把操作都改成mmap操作......         */        handle->read_op = pcap_read_linux_mmap;        handle->cleanup_op = pcap_cleanup_linux_mmap;        handle->setfilter_op = pcap_setfilter_linux_mmap;        handle->setnonblock_op = pcap_setnonblock_mmap;        handle->getnonblock_op = pcap_getnonblock_mmap;        handle->oneshot_callback = pcap_oneshot_mmap;        handle->selectable_fd = handle->fd;        return 1;}#else /* HAVE_PACKET_RING */static intactivate_mmap(pcap_t *handle _U_, int *status _U_){        return 0;}#endif /* HAVE_PACKET_RING */

pcap-linux.c: create_ring

/* * Attempt to set up memory-mapped access. *...... */static intcreate_ring(pcap_t *handle, int *status){        unsigned i, j, frames_per_block;        struct tpacket_req req;        socklen_t len;        unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;        unsigned int frame_size;        /*         * Start out assuming no warnings or errors.         */        *status = 0;.....................Line #3471        req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);        req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size;        /* compute the minumum block size that will handle this frame.          * The block has to be page size aligned.          * The max block size allowed by the kernel is arch-dependent and          * it's not explicitly checked here. */        req.tp_block_size = getpagesize();        while (req.tp_block_size < req.tp_frame_size)                req.tp_block_size <<= 1;        frames_per_block = req.tp_block_size/req.tp_frame_size;.....................Line #3588
retry:        req.tp_block_nr = req.tp_frame_nr / frames_per_block;        /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */        req.tp_frame_nr = req.tp_block_nr * frames_per_block;        if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,              //<<------------------------這裡                                        (void *) &req, sizeof(req))) {                if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {                        /*                         * Memory failure; try to reduce the requested ring                         * size.                         *                         * We used to reduce this by half -- do 5% instead.                         * That may result in more iterations and a longer                         * startup, but the user will be much happier with                         * the resulting buffer size.                         */                        if (req.tp_frame_nr < 20)                                req.tp_frame_nr -= 1;                        else                                req.tp_frame_nr -= req.tp_frame_nr/20;                        goto retry;                }                if (errno == ENOPROTOOPT) {                        /*                         * We don't have ring buffer support in this kernel.                         */                        return 0;                }                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                    "can't create rx ring on packet socket: %s",                    pcap_strerror(errno));                *status = PCAP_ERROR;                return -1;        }        /* memory map the rx ring */        handle->md.mmapbuflen = req.tp_block_nr * req.tp_block_size;        handle->md.mmapbuf = mmap(0, handle->md.mmapbuflen,              //  <<-----------------------------這裡            PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0);        if (handle->md.mmapbuf == MAP_FAILED) {                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                    "can't mmap rx ring: %s", pcap_strerror(errno));                /* clear the allocated ring on error*/                destroy_ring(handle);                *status = PCAP_ERROR;                return -1;        }        /* allocate a ring for each frame header pointer*/        handle->cc = req.tp_frame_nr;        handle->buffer = malloc(handle->cc * sizeof(union thdr *));        if (!handle->buffer) {                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,                    "can't allocate ring of frame headers: %s",                    pcap_strerror(errno));                destroy_ring(handle);                *status = PCAP_ERROR;                return -1;        }        /* fill the header ring with proper frame ptr*/        handle->offset = 0;        for (i=0; imd.mmapbuf[i*req.tp_block_size];                for (j=0; joffset) {                        RING_GET_FRAME(handle) = base;                        base += req.tp_frame_size;                }        }        handle->bufsize = req.tp_frame_size;        handle->offset = 0;        return 1;}

我自己的strace結果:

bind(3, {sa_family=AF_PACKET, proto=0x03, if3, pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0setsockopt(3, SOL_PACKET, PACKET_AUXDATA, [1], 4) = 0getsockopt(3, SOL_PACKET, PACKET_HDRLEN, [28], [4]) = 0setsockopt(3, SOL_PACKET, PACKET_VERSION, [1], 4) = 0setsockopt(3, SOL_PACKET, PACKET_RESERVE, [4], 4) = 0setsockopt(3, SOL_PACKET, PACKET_RX_RING, {block_size=131072, block_nr=31, frame_size=65600, frame_nr=31}, 16) = 0mmap2(NULL, 4063232, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0xb714d000             //<<----------------這裡write(2, "capture_short.cpp:42: Start capt"..., 47capture_short.cpp:42: Start capture on ��...) = 47

之後就是抓包-讀時間-輸出的迴圈:

poll([{fd=3, events=POLLIN}], 1, 1000)  = 1 ([{fd=3, revents=POLLIN}])stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=834, ...}) = 0write(2, "capture_short.cpp:46: 10:16:29,4"..., 49capture_short.cpp:46: 10:16:29,483605 us, len:79) = 49

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.