這篇是我臨時加的,本來不打算放在整個核心協議棧分析的系列裡的,但我現在覺得vlan還是蠻重要的,而且討論vlan源碼的文章很少,不知道我這篇算不算第一篇
vlan的代碼都在net/8021q/的核心目錄下,首先我們來看8021q模組 (net/8021q/vlan.c)
vlan_proto_init , vlan_cleanup_module 是模組的init/exit函數,我們來看vlan_proto_init,vlan_cleanup_module基本就是反過來做一遍
static int __init vlan_proto_init(void)
{
int err;
pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
pr_info("All bugs added by %s\n", vlan_buggyright);
err = register_pernet_gen_device(&vlan_net_id, &vlan_net_ops);
if (err < 0)
goto err0;
err = register_netdevice_notifier(&vlan_notifier_block);
if (err < 0)
goto err2;
register_pernet_gen_device, register_netdevice_notifier是網路裝置註冊的常規流程
err = vlan_gvrp_init();
if (err < 0)
goto err3;
err = vlan_netlink_init();
if (err < 0)
goto err4;
dev_add_pack(&vlan_packet_type);
dev_add_pack,把802.1q當做另一種協議來處理
vlan_ioctl_set(vlan_ioctl_handler);
把vlan_ioctl_handler註冊為vconfig命令的handler
return 0;
err4:
vlan_gvrp_uninit();
err3:
unregister_netdevice_notifier(&vlan_notifier_block);
err2:
unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
err0:
return err;
}
vconfig配置vlan介面的參數被封裝在vlan_ioctl_args中
struct vlan_ioctl_args {
int cmd; /* Should be one of the vlan_ioctl_cmds enum above. */
char device1[24];
union {
char device2[24];
int VID;
unsigned int skb_priority;
unsigned int name_type;
unsigned int bind_type;
unsigned int flag; /* Matches vlan_dev_info flags */
} u;
short vlan_qos;
};
vlan_ioctl_handler就是針對不同的vconfig的cmd參數有不同的行為,目前已知的cmd有:
SET_VLAN_INGRESS_PRIORITY_CMD
SET_VLAN_EGRESS_PRIORITY_CMD
SET_VLAN_FLAG_CMD
ADD_VLAN_CMD
DEL_VLAN_CMD
GET_VLAN_REALDEV_NAME_CMD
GET_VLAN_VID_CMD
對於添加vlan裝置而言,最重要的無非是register_vlan_device咯
先提下vlan group的概念,我的理解是同一個物理裝置上的vlan裝置屬於同一個vlan group,核心用全域雜湊表struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]儲存所有的vlan group,雜湊表的大小為32,以net_device的ifindex為雜湊值。
struct vlan_group {
struct net_device *real_dev; /* The ethernet(like) device
* the vlan is attached to.
*/
unsigned int nr_vlans;
struct hlist_node hlist; /* linked list */
struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
struct rcu_head rcu;
};
基於real_device的vlan_group可以有多個vlan_device,基於vlan_id,vlan_group把所有的vlan_device分在一個二維net_device數組裡,即vlan_device_arrays。該二維數組是一個VLAN_GROUP_ARRAY_SPLIT_PARTS * VLAN_GROUP_ARRAY_PART_LEN的二維數組,可以從vlan_group_get_device看出來
static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
u16 vlan_id)
{
struct net_device **array;
array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}
下面來看register_vlan_device:
首先進行一系列的check,之後調用alloc_netdev建立vlan_device,這個函數我們之前的文章討論過。這裡建立的net_device之後會接一個線性空間,裡面是一個vlan_dev_info結構。alloc_netdev會調用vlan_setup,vlan_setup的代碼相當直觀:
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
dev->priv_flags |= IFF_802_1Q_VLAN;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
netdev_extended(dev)->ext_priv_flags &= ~IFF_TX_SKB_SHARING;
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
dev->destructor = free_netdev;
dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
}
static const struct net_device_ops vlan_netdev_ops = {
.ndo_change_mtu = vlan_dev_change_mtu,
.ndo_init = vlan_dev_init,
.ndo_uninit = vlan_dev_uninit,
.ndo_open = vlan_dev_open,
.ndo_stop = vlan_dev_stop,
.ndo_start_xmit = vlan_dev_hard_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = vlan_dev_set_mac_address,
.ndo_set_rx_mode = vlan_dev_set_rx_mode,
.ndo_set_multicast_list = vlan_dev_set_rx_mode,
.ndo_change_rx_flags = vlan_dev_change_rx_flags,
.ndo_do_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
.ndo_get_stats = vlan_dev_get_stats,
}
與vlan_netdev_ops對應的是vlan_netdev_accel_ops,如果網卡是有vlan acceleration功能(比如自動計算vlan校正和等),就會被初始化為vlan_netdev_accel_ops
之後調用register_vlan_dev,該函數主要就是初始化對應的vlan_group->vlan_devices_arrays的數群組成員,還有調用相應驅動的註冊代碼
int register_vlan_dev(struct net_device *dev)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct net_device *real_dev = vlan->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
u16 vlan_id = vlan->vlan_id;
struct vlan_group *grp, *ngrp = NULL;
int err;
grp = __vlan_find_group(real_dev);
if (!grp) {
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
return -ENOBUFS;
err = vlan_gvrp_init_applicant(real_dev);
if (err < 0)
goto out_free_group;
}
先拿到real_dev對應的vlan_group,如果沒有就調用vlan_group_alloc一個
err = vlan_group_prealloc_vid(grp, vlan_id);
if (err < 0)
goto out_uninit_applicant;
vlan_group_prealloc_vid用來初始化vlan_group->vlan_devices_arrays對應的雜湊數組
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_applicant;
註冊網路裝置
/* Account for reference in struct vlan_dev_info */
dev_hold(real_dev);
vlan_transfer_operstate(real_dev, dev);
linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
/* So, got the sucker initialized, now lets place
* it into our local structure.
*/
vlan_group_set_device(grp, vlan_id, dev);
grp->nr_vlans++;
if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
ops->ndo_vlan_rx_register(real_dev, ngrp);
if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
調用驅動的ndo_vlan_rx_register, ndo_vlan_rx_add_vid初始化裝置
return 0;
out_uninit_applicant:
if (ngrp)
vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
if (ngrp) {
hlist_del_rcu(&ngrp->hlist);
/* Free the group, after all cpu's are done. */
call_rcu(&ngrp->rcu, vlan_rcu_free);
}
return err;
}
下面來研究下vlan_netdev_ops的操作:
vlan_dev_change_mtu,設定mtu,對於vlan裝置而言mtu在vlan_dev_info->mtu中
vlan_dev_init,主要是設定dev->flags, dev->iflink, dev->state, dev->features, dev->dev_id, dev->gso_max_size,然後判斷真實裝置有沒有NETIF_F_HW_VLAN_TX,如果設定了NETIF_F_HW_VLAN_TX,說明網卡可以自動處理802.1q的vlan頭,因此上層無需考慮二層頭的tci空間,直接有dev->hard_header_len = real_dev->hard_header_len,否則需要有dev->hard_header_len
= real_dev->hard_header_len + VLAN_HLEN;同樣的根據有沒有NETIF_F_HW_VLAN_TX,設定的dev->netdev_ops也不同,但vlan_netdev_accel_ops和vlan_netdev_ops的唯一差別只是在發送函數ndo_start_xmit上
vlan_dev_open,很多是和net_device開啟重複的調用,代碼很清晰不多說了,最重要的就是調用netif_carrier_on;同樣的vlan_dev_close最重要的就是調用netif_carrier_off
下面是vlan發送需要調用的兩個重要函數:vlan_dev_hard_start_xmit,以及vlan_dev_hwaccel_hard_start_xmit
vlan_dev_hard_start_xmit
static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
int i = skb_get_queue_mapping(skb);
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
unsigned int len;
int ret;
/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
*
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
unsigned int orig_headroom = skb_headroom(skb);
u16 vlan_tci;
這個分支用來加上vlan頭,前提是要麼veth0>h_vlan_proto!=0x8100,此時沒有vlan頭;要麼vlan裝置打上了REORDER flag
vlan_dev_info(dev)->cnt_encap_on_xmit++;
vlan_tci = vlan_dev_info(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_put_tag(skb, vlan_tci);
if (!skb) {
txq->tx_dropped++;
return NETDEV_TX_OK;
}
if (orig_headroom < VLAN_HLEN)
vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
}
skb->dev = vlan_dev_info(dev)->real_dev;
vlan_dev_info(dev)->real_dev才是真正發送的裝置
len = skb->len;
ret = dev_queue_xmit(skb);
調用dev_queue_xmit發送skb
if (likely(ret == NET_XMIT_SUCCESS)) {
txq->tx_packets++;
txq->tx_bytes += len;
} else
txq->tx_dropped++;
return NETDEV_TX_OK;
}
vlan_dev_hwaccel_hard_start_xmit
static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
int i = skb_get_queue_mapping(skb);
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
u16 vlan_tci;
unsigned int len;
int ret;
vlan_tci = vlan_dev_info(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
這裡__vlan_hwaccel_put_tag只是產生了vlan_tci之後,簡單放到skb->vlan_tci裡面,給報文添加vlan前序交給網卡去做
skb->dev = vlan_dev_info(dev)->real_dev;
len = skb->len;
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS)) {
txq->tx_packets++;
txq->tx_bytes += len;
} else
txq->tx_dropped++;
return NETDEV_TX_OK;
}
對於接收報文而言,在裝置接收到之後,如果是vlan裝置,會再一次調用vlan_skb_recv,
static struct packet_type vlan_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_8021Q),
.func = vlan_skb_recv, /* VLAN receive method */
};
vlan_skb_recv代碼很直觀,不多說了