linux核心網路通訊協定棧學習筆記:vlan

來源:互聯網
上載者:User

這篇是我臨時加的,本來不打算放在整個核心協議棧分析的系列裡的,但我現在覺得vlan還是蠻重要的,而且討論vlan源碼的文章很少,不知道我這篇算不算第一篇

vlan的代碼都在net/8021q/的核心目錄下,首先我們來看8021q模組 (net/8021q/vlan.c)

vlan_proto_init , vlan_cleanup_module 是模組的init/exit函數,我們來看vlan_proto_init,vlan_cleanup_module基本就是反過來做一遍

static int __init vlan_proto_init(void)
{   
    int err;

    pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
    pr_info("All bugs added by %s\n", vlan_buggyright);
    
    err = register_pernet_gen_device(&vlan_net_id, &vlan_net_ops);
    if (err < 0)
        goto err0;
        
    err = register_netdevice_notifier(&vlan_notifier_block);
    if (err < 0)
        goto err2; 

register_pernet_gen_device, register_netdevice_notifier是網路裝置註冊的常規流程

    err = vlan_gvrp_init();
    if (err < 0)
        goto err3;

    err = vlan_netlink_init();
    if (err < 0)
        goto err4;

    dev_add_pack(&vlan_packet_type);

dev_add_pack,把802.1q當做另一種協議來處理
    vlan_ioctl_set(vlan_ioctl_handler);

把vlan_ioctl_handler註冊為vconfig命令的handler
    return 0;

err4:
    vlan_gvrp_uninit();
err3:
    unregister_netdevice_notifier(&vlan_notifier_block);
err2:
    unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
err0:
    return err;
}

vconfig配置vlan介面的參數被封裝在vlan_ioctl_args中

struct vlan_ioctl_args {
    int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
    char device1[24];
        
        union {
        char device2[24];
        int VID;
        unsigned int skb_priority;
        unsigned int name_type;
        unsigned int bind_type;
        unsigned int flag; /* Matches vlan_dev_info flags */
        } u;
        
    short vlan_qos;       
};                

vlan_ioctl_handler就是針對不同的vconfig的cmd參數有不同的行為,目前已知的cmd有:

SET_VLAN_INGRESS_PRIORITY_CMD
SET_VLAN_EGRESS_PRIORITY_CMD
SET_VLAN_FLAG_CMD
ADD_VLAN_CMD
DEL_VLAN_CMD
GET_VLAN_REALDEV_NAME_CMD
GET_VLAN_VID_CMD

對於添加vlan裝置而言,最重要的無非是register_vlan_device咯

先提下vlan group的概念,我的理解是同一個物理裝置上的vlan裝置屬於同一個vlan group,核心用全域雜湊表struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]儲存所有的vlan group,雜湊表的大小為32,以net_device的ifindex為雜湊值。

struct vlan_group {
    struct net_device   *real_dev; /* The ethernet(like) device
                        * the vlan is attached to.
                        */
    unsigned int        nr_vlans;
    struct hlist_node   hlist;  /* linked list */
    struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
    struct rcu_head     rcu;
};  

基於real_device的vlan_group可以有多個vlan_device,基於vlan_id,vlan_group把所有的vlan_device分在一個二維net_device數組裡,即vlan_device_arrays。該二維數組是一個VLAN_GROUP_ARRAY_SPLIT_PARTS * VLAN_GROUP_ARRAY_PART_LEN的二維數組,可以從vlan_group_get_device看出來

static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
                               u16 vlan_id)
{   
    struct net_device **array;  
    array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
    return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}

下面來看register_vlan_device:

首先進行一系列的check,之後調用alloc_netdev建立vlan_device,這個函數我們之前的文章討論過。這裡建立的net_device之後會接一個線性空間,裡面是一個vlan_dev_info結構。alloc_netdev會調用vlan_setup,vlan_setup的代碼相當直觀:    

void vlan_setup(struct net_device *dev)
{   
    ether_setup(dev); 
    
    dev->priv_flags     |= IFF_802_1Q_VLAN;
    dev->priv_flags     &= ~IFF_XMIT_DST_RELEASE;
    netdev_extended(dev)->ext_priv_flags &= ~IFF_TX_SKB_SHARING;
    dev->tx_queue_len   = 0;
   
    dev->netdev_ops     = &vlan_netdev_ops;
    dev->destructor     = free_netdev;
    dev->ethtool_ops    = &vlan_ethtool_ops;

    memset(dev->broadcast, 0, ETH_ALEN);
}   

static const struct net_device_ops vlan_netdev_ops = {
    .ndo_change_mtu     = vlan_dev_change_mtu,
    .ndo_init       = vlan_dev_init,
    .ndo_uninit     = vlan_dev_uninit,
    .ndo_open       = vlan_dev_open,
    .ndo_stop       = vlan_dev_stop,
    .ndo_start_xmit =  vlan_dev_hard_start_xmit,
    .ndo_validate_addr  = eth_validate_addr,
    .ndo_set_mac_address    = vlan_dev_set_mac_address,
    .ndo_set_rx_mode    = vlan_dev_set_rx_mode,
    .ndo_set_multicast_list = vlan_dev_set_rx_mode,
    .ndo_change_rx_flags    = vlan_dev_change_rx_flags,
    .ndo_do_ioctl       = vlan_dev_ioctl,
    .ndo_neigh_setup    = vlan_dev_neigh_setup,
    .ndo_get_stats      = vlan_dev_get_stats,

}

與vlan_netdev_ops對應的是vlan_netdev_accel_ops,如果網卡是有vlan acceleration功能(比如自動計算vlan校正和等),就會被初始化為vlan_netdev_accel_ops

之後調用register_vlan_dev,該函數主要就是初始化對應的vlan_group->vlan_devices_arrays的數群組成員,還有調用相應驅動的註冊代碼

int register_vlan_dev(struct net_device *dev)
{       
    struct vlan_dev_info *vlan = vlan_dev_info(dev);
    struct net_device *real_dev = vlan->real_dev;
    const struct net_device_ops *ops = real_dev->netdev_ops;
    u16 vlan_id = vlan->vlan_id;
    struct vlan_group *grp, *ngrp = NULL;
    int err;
    
    grp = __vlan_find_group(real_dev);
    if (!grp) {
        ngrp = grp = vlan_group_alloc(real_dev);
        if (!grp)
            return -ENOBUFS;
        err = vlan_gvrp_init_applicant(real_dev);
        if (err < 0)
            goto out_free_group;
    }
先拿到real_dev對應的vlan_group,如果沒有就調用vlan_group_alloc一個

    err = vlan_group_prealloc_vid(grp, vlan_id);
    if (err < 0)
        goto out_uninit_applicant;
vlan_group_prealloc_vid用來初始化vlan_group->vlan_devices_arrays對應的雜湊數組

    err = register_netdevice(dev);
    if (err < 0)
        goto out_uninit_applicant;
註冊網路裝置

    /* Account for reference in struct vlan_dev_info */
    dev_hold(real_dev);

    vlan_transfer_operstate(real_dev, dev);
    linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */

    /* So, got the sucker initialized, now lets place
     * it into our local structure.
     */
    vlan_group_set_device(grp, vlan_id, dev);
    grp->nr_vlans++;

    if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
        ops->ndo_vlan_rx_register(real_dev, ngrp);
    if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
        ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
調用驅動的ndo_vlan_rx_register, ndo_vlan_rx_add_vid初始化裝置

    return 0;

out_uninit_applicant:
    if (ngrp)
        vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
    if (ngrp) {
        hlist_del_rcu(&ngrp->hlist);
        /* Free the group, after all cpu's are done. */
        call_rcu(&ngrp->rcu, vlan_rcu_free);
    }
    return err;
}

下面來研究下vlan_netdev_ops的操作:

vlan_dev_change_mtu,設定mtu,對於vlan裝置而言mtu在vlan_dev_info->mtu中

vlan_dev_init,主要是設定dev->flags, dev->iflink, dev->state, dev->features, dev->dev_id, dev->gso_max_size,然後判斷真實裝置有沒有NETIF_F_HW_VLAN_TX,如果設定了NETIF_F_HW_VLAN_TX,說明網卡可以自動處理802.1q的vlan頭,因此上層無需考慮二層頭的tci空間,直接有dev->hard_header_len = real_dev->hard_header_len,否則需要有dev->hard_header_len
= real_dev->hard_header_len + VLAN_HLEN;同樣的根據有沒有NETIF_F_HW_VLAN_TX,設定的dev->netdev_ops也不同,但vlan_netdev_accel_ops和vlan_netdev_ops的唯一差別只是在發送函數ndo_start_xmit上

vlan_dev_open,很多是和net_device開啟重複的調用,代碼很清晰不多說了,最重要的就是調用netif_carrier_on;同樣的vlan_dev_close最重要的就是調用netif_carrier_off

下面是vlan發送需要調用的兩個重要函數:vlan_dev_hard_start_xmit,以及vlan_dev_hwaccel_hard_start_xmit

vlan_dev_hard_start_xmit

static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
                        struct net_device *dev)
{
    int i = skb_get_queue_mapping(skb);
    struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
    struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
    unsigned int len;
    int ret;

    /* Handle non-VLAN frames if they are sent to us, for example by DHCP.
     *
     * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
     * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
     */
    if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
        vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
        unsigned int orig_headroom = skb_headroom(skb);
        u16 vlan_tci;
這個分支用來加上vlan頭,前提是要麼veth0>h_vlan_proto!=0x8100,此時沒有vlan頭;要麼vlan裝置打上了REORDER flag       

        vlan_dev_info(dev)->cnt_encap_on_xmit++;
        vlan_tci = vlan_dev_info(dev)->vlan_id;
        vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
        skb = __vlan_put_tag(skb, vlan_tci);
        if (!skb) {
            txq->tx_dropped++;
            return NETDEV_TX_OK;
        }       
        if (orig_headroom < VLAN_HLEN)
            vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
    }

    skb->dev = vlan_dev_info(dev)->real_dev;

vlan_dev_info(dev)->real_dev才是真正發送的裝置
    len = skb->len;
    ret = dev_queue_xmit(skb);
調用dev_queue_xmit發送skb

    if (likely(ret == NET_XMIT_SUCCESS)) {
        txq->tx_packets++;
        txq->tx_bytes += len;
    } else
        txq->tx_dropped++;

    return NETDEV_TX_OK;
}

vlan_dev_hwaccel_hard_start_xmit

static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
                            struct net_device *dev)
{   
    int i = skb_get_queue_mapping(skb);
    struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
    u16 vlan_tci;
    unsigned int len;
    int ret;

    vlan_tci = vlan_dev_info(dev)->vlan_id;
    vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
    skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
這裡__vlan_hwaccel_put_tag只是產生了vlan_tci之後,簡單放到skb->vlan_tci裡面,給報文添加vlan前序交給網卡去做

                             
    skb->dev = vlan_dev_info(dev)->real_dev;
    len = skb->len; 
    ret = dev_queue_xmit(skb);
   
    if (likely(ret == NET_XMIT_SUCCESS)) {
        txq->tx_packets++;
        txq->tx_bytes += len;
    } else
        txq->tx_dropped++;

    return NETDEV_TX_OK;
}

對於接收報文而言,在裝置接收到之後,如果是vlan裝置,會再一次調用vlan_skb_recv,

static struct packet_type vlan_packet_type __read_mostly = {
    .type = cpu_to_be16(ETH_P_8021Q),
    .func = vlan_skb_recv, /* VLAN receive method */
};

vlan_skb_recv代碼很直觀,不多說了

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.