Data Packet Processing Process in Linux kernel bridge

Source: Internet
Author: User
The packet processing process in Linux kernel bridge-general Linux technology-Linux programming and kernel information. The following is a detailed description. 1. Preface

This article briefly introduces the process of handling data packets on the Linux network protocol stack after they enter the NIC, and describes the handling of hook points of netfilter, detailed handling of specific parts will be described in subsequent articles.

The following kernel code version is 2.6.19.2.

2. function processing process
Bridge entry point handle_bridge ()
/* Net/core/dev. c */

Int netif_receive_skb (struct sk_buff * skb)
{
......
If (handle_bridge (& skb, & pt_prev, & ret, orig_dev ))
Goto out;
......
}

Bridge: br_handle_frame_hook ()

Static _ inline _ int handle_bridge (struct sk_buff ** pskb,
Struct packet_type ** pt_prev, int * ret,
Struct net_device * orig_dev)
{
Struct net_bridge_port * port;
If (* pskb)-> pkt_type = PACKET_LOOPBACK |
(Port = rcu_dereference (* pskb)-> dev-> br_port) = NULL)
Return 0;
If (* pt_prev ){
* Ret = deliver_skb (* pskb, * pt_prev, orig_dev );
* Pt_prev = NULL;
}

Return br_handle_frame_hook (port, pskb );
}

The actual implementation of bridge_handle_frame_hook:
/* Net/bridge/br. c */
Static int _ init br_init (void)
{
......
Br_handle_frame_hook = br_handle_frame;
......
}

Br_handle_frame: prerouting point of PF_BEIDGE
/* Net/bridge/br_input.c */
Int br_handle_frame (struct net_bridge_port * p, struct sk_buff ** pskb)
{
Struct sk_buff * skb = * pskb;
Const unsigned char * dest = eth_hdr (skb)-> h_dest;
If (! Is_valid_ether_addr (eth_hdr (skb)-> h_source ))
Goto err;
If (unlikely (is_link_local (dest ))){
// Its package enters the INPUT Point of PF_BEIDGE. Generally, the number of packets processed is small.
Skb-> pkt_type = PACKET_HOST;
// Normally, 1 is returned, and 1 is returned, indicating that the bridge module has full permission to process the package.
Return NF_HOOK (PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb-> dev,
NULL, br_handle_local_finish )! = 0;
}
If (p-> state = BR_STATE_FORWARDING | p-> state = BR_STATE_LEARNING ){
// The br_should_route_hook function is not defined.
If (br_should_route_hook ){
If (br_should_route_hook (pskb ))
Return 0;
Skb = * pskb;
Dest = eth_hdr (skb)-> h_dest;
}
If (! Compare_ether_addr (p-> br-> dev-> dev_addr, dest ))
Skb-> pkt_type = PACKET_HOST;
// Enter br_handle_frame_finish after the prerouting of PF_BRIDGE is processed.
NF_HOOK (PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb-> dev, NULL,
Br_handle_frame_finish );
// After processing, 1 is always returned, indicating that no other protocol families are processed. The data packet has been completely processed by bridge.
Return 1;
}
Err:
Kfree_skb (skb );
// After processing, 1 is always returned, indicating that no other protocol families are processed. The data packet has been completely processed by bridge.
Return 1;
}
Use br_handle_frame_finish to forward data to the bridge:
/* Note: already called with rcu_read_lock (preempt_disabled )*/
Int br_handle_frame_finish (struct sk_buff * skb)
{
Const unsigned char * dest = eth_hdr (skb)-> h_dest;
Struct net_bridge_port * p = rcu_dereference (skb-> dev-> br_port );
Struct net_bridge * br;
Struct net_bridge_fdb_entry * dst;
Int passedup = 0;
If (! P | p-> state = BR_STATE_DISABLED)
Goto drop;
/* Insert into forwarding database after filtering to avoid spoofing */
Br = p-> br;
Br_fdb_update (br, p, eth_hdr (skb)-> h_source );
If (p-> state = BR_STATE_LEARNING)
Goto drop;
If (br-> dev-> flags & IFF_PROMISC ){
Struct sk_buff * skb2;
Skb2 = skb_clone (skb, GFP_ATOMIC );
If (skb2! = NULL ){
Passedup = 1;
Br_pass_frame_up (br, skb2 );
}
}
If (is_multicast_ether_addr (dest )){
// Multicast forwarding, also called Broadcast Processing
Br-> statistics. multicast ++;
Br_flood_forward (br, skb ,! Passedup );
If (! Passedup)
Br_pass_frame_up (br, skb );
Goto out;
}
// Find the target exit based on the target MAC address
Dst = _ br_fdb_get (br, dest );
If (dst! = NULL & dst-> is_local ){
If (! Passedup)
Br_pass_frame_up (br, skb );
Else
Kfree_skb (skb );
Goto out;
}
If (dst! = NULL ){
// Unicast forwarding
Br_forward (dst-> dst, skb );
Goto out;
}
// Broadcast forwarding
Br_flood_forward (br, skb, 0 );
Out:
Return 0;
Drop:
Kfree_skb (skb );
Goto out;
}
Broadcast/multicast forwarding: br_flood_forward/br_flood
/* Called under bridge lock */
Void br_flood_forward (struct net_bridge * br, struct sk_buff * skb, int clone)
{
Br_flood (br, skb, clone, _ br_forward );
}
/* Called under bridge lock */
Static void br_flood (struct net_bridge * br, struct sk_buff * skb, int clone,
Void (* _ packet_hook) (const struct net_bridge_port * p,
Struct sk_buff * skb ))
{
Struct net_bridge_port * p;
Struct net_bridge_port * prev;
If (clone ){
Struct sk_buff * skb2;
If (skb2 = skb_clone (skb, GFP_ATOMIC) = NULL ){
Br-> statistics. tx_dropped ++;
Return;
}
Skb = skb2;
}
Prev = NULL;
List_for_each_entry_rcu (p, & br-> port_list, list ){
If (should_deliver (p, skb )){
If (prev! = NULL ){
Struct sk_buff * skb2;
If (skb2 = skb_clone (skb, GFP_ATOMIC) = NULL ){
Br-> statistics. tx_dropped ++;
Kfree_skb (skb );
Return;
}
// Here is actually _ br_forward
_ Packet_hook (prev, skb2 );
}
Prev = p;
}
}
If (prev! = NULL ){
// Here is actually _ br_forward
_ Packet_hook (prev, skb );
Return;
}
Kfree_skb (skb );
}

Unicast forwarding: br_forward
/* Net/bridge/br_forward.c */
/* Called with rcu_read_lock */
Void br_forward (const struct net_bridge_port * to, struct sk_buff * skb)
{
If (should_deliver (to, skb )){
// Also called _ br_forward
_ Br_forward (to, skb );
Return;
}
Kfree_skb (skb );
}
FORWARD point:
Static void _ br_forward (const struct net_bridge_port * to, struct sk_buff * skb)
{
Struct net_device * indev;
Indev = skb-> dev;
Skb-> dev = to-> dev;
Skb-> ip_summed = CHECKSUM_NONE;
// Enter the forward hook of PF_BRIDGE, and then enter br_forward_finish ()
NF_HOOK (PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb-> dev,
Br_forward_finish );
}

POSTROUTING point:
// Directly go to the POSTROUTING point after processing from the FORWARD point
Int br_forward_finish (struct sk_buff * skb)
{
// Enter the postrouting hook of PF_BRIDGE, and then enter br_dev_queue_push_xmit ()
Return NF_HOOK (PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb-> dev,
Br_dev_queue_push_xmit );
}
Packet sending:
Int br_dev_queue_push_xmit (struct sk_buff * skb)
{
/* Drop mtu oversized packets into T gso */
If (packet_length (skb)> skb-> dev-> mtu &&! Skb_is_gso (skb ))
Kfree_skb (skb );
Else {
/* Ip_refrag CILS ip_fragment, doesn't copy the MAC header .*/
If (nf_bridge_maybe_copy_header (skb ))
Kfree_skb (skb );
Else {
Skb_push (skb, ETH_HLEN );
// Call the hard_start_xmit () function of the dev device.
Dev_queue_xmit (skb );
}
}
Return 0;
}
The hard_start_xmit () function of the NIC is defined:
/* Net/bridge/br_device.c */
Void br_dev_setup (struct net_device * dev)
{
......
Dev-> hard_start_xmit = br_dev_xmit;
......
}
/* Net device transmit always called with no BH (preempt_disabled )*/
Int br_dev_xmit (struct sk_buff * skb, struct net_device * dev)
{
Struct net_bridge * br = netdev_priv (dev );
Const unsigned char * dest = skb-> data;
Struct net_bridge_fdb_entry * dst;
Br-> statistics. tx_packets ++;
Br-> statistics. tx_bytes + = skb-> len;
Skb-> mac. raw = skb-> data;
Skb_pull (skb, ETH_HLEN );
If (dest [0] & 1)
// Multicast Transmission
Br_flood_deliver (br, skb, 0 );
Else if (dst = _ br_fdb_get (br, dest ))! = NULL)
// Unicast Transmission
Br_deliver (dst-> dst, skb );
Else
// Broadcast Transmission
Br_flood_deliver (br, skb, 0 );
// These sending functions will eventually call the _ br_deliver () function.
Return 0;
}

/* Net/bridge/br_forward.c */
Static void _ br_deliver (const struct net_bridge_port * to, struct sk_buff * skb)
{
Skb-> dev = to-> dev;
// OUTPUT point of PF_BRIDGE
NF_HOOK (PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb-> dev,
Br_forward_finish );
}

Summary: The hooks in PF_BRIDGE are different from those in PF_INET. They are shown in the following figure:

PREROUTING -- + -- FORWARD-----POSTROUTING ------ + ---- OUTPUT
|
|
INPUT

3. BF_BRIDGE hook Point

The following hook points are defined in net/bridge/br_netfilter.c. Note that these hook points are mainly from the PF_BRIDGE protocol family.
/* Net/bridge/br_netfilter.c */
/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
* PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* Ip_refrag () can return NF_STOLEN .*/
Static struct nf_hook_ops br_nf_ops [] = {
// Mount point of PF_BRIDGE
// PREROUTING point
{. Hook = br_nf_pre_routing,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_PRE_ROUTING,
. Priority = NF_BR_PRI_BRNF ,},
// INPUT Point
{. Hook = br_nf_local_in,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_LOCAL_IN,
. Priority = NF_BR_PRI_BRNF ,},
// FORWARD point
{. Hook = br_nf_forward_ip,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_FORWARD,
. Priority = NF_BR_PRI_BRNF-1 ,},
// FORWARD point
{. Hook = br_nf_forward_arp,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_FORWARD,
. Priority = NF_BR_PRI_BRNF ,},
// OUTPUT point
{. Hook = br_nf_local_out,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_LOCAL_OUT,
. Priority = NF_BR_PRI_FIRST ,},
// POSTROUTING point
{. Hook = br_nf_post_routing,
. Owner = THIS_MODULE,
. Pf = PF_BRIDGE,
. Hooknum = NF_BR_POST_ROUTING,
. Priority = NF_BR_PRI_LAST ,},
// The following is the mount point of PF_INET/PF_INET6. In fact, no packet operation is performed,
// It means that the input and output packets cannot be processed across the bridge and short-circuited.
{. Hook = ip_sabotage_in,
. Owner = THIS_MODULE,
. Pf = PF_INET,
. Hooknum = NF_IP_PRE_ROUTING,
. Priority = NF_IP_PRI_FIRST ,},
{. Hook = ip_sabotage_in,
. Owner = THIS_MODULE,
. Pf = PF_INET6,
. Hooknum = NF_IP6_PRE_ROUTING,
. Priority = NF_IP6_PRI_FIRST ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET,
. Hooknum = NF_IP_FORWARD,
. Priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET6,
. Hooknum = NF_IP6_FORWARD,
. Priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET,
. Hooknum = NF_IP_LOCAL_OUT,
. Priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET6,
. Hooknum = NF_IP6_LOCAL_OUT,
. Priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET,
. Hooknum = NF_IP_POST_ROUTING,
. Priority = NF_IP_PRI_FIRST ,},
{. Hook = ip_sabotage_out,
. Owner = THIS_MODULE,
. Pf = PF_INET6,
. Hooknum = NF_IP6_POST_ROUTING,
. Priority = NF_IP6_PRI_FIRST ,},
};

// PRROUTING point processing function of PF_BRIDGE
Static unsigned int br_nf_pre_routing (unsigned int hook, struct sk_buff ** pskb,
Const struct net_device * in,
Const struct net_device * out,
Int (* okfn) (struct sk_buff *))
{
......
// Continue to call the hook processing of the prerouting point of the PF_INET family.
NF_HOOK (PF_INET, NF_IP_PRE_ROUTING, skb, skb-> dev, NULL,
Br_nf_pre_routing_finish );
Return NF_STOLEN;
Inhdr_error:
// IP_INC_STATS_BH (IpInHdrErrors );
Out:
Return NF_DROP;
}

// FORWARD point processing of PF_BRIDGE
Static unsigned int br_nf_forward_ip (unsigned int hook, struct sk_buff ** pskb,
Const struct net_device * in,
Const struct net_device * out,
Int (* okfn) (struct sk_buff *))
{
......
// Call the FORWARD point hook of the PF_INET/PF_INET6 family.
NF_HOOK (pf, NF_IP_FORWARD, skb, bridge_parent (in), parent,
Br_nf_forward_finish );
Return NF_STOLEN;
}
// PF_BRIDGE OUTPUT point Processing
Static unsigned int br_nf_local_out (unsigned int hook, struct sk_buff ** pskb,
Const struct net_device * in,
Const struct net_device * out,
Int (* okfn) (struct sk_buff *))
{
......
/* IP forwarded traffic has a physindev, locally
* Generated traffic hasn' t .*/
If (realindev! = NULL ){
If (! (Nf_bridge-> mask & BRNF_DONT_TAKE_PARENT )){
Struct net_device * parent = bridge_parent (realindev );
If (parent)
Realindev = parent;
}
// Continue to call the FORWARD point hook of the PF_INET/PF_INET6 family here, but the priority value must be //
NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1 or more
NF_HOOK_THRESH (pf, NF_IP_FORWARD, skb, realindev,
Realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1 );
} Else {
// Call the FORWARD point hook of the PF_INET/PF_INET6 family. However, the priority value must be in
// NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1 or more
NF_HOOK_THRESH (pf, NF_IP_LOCAL_OUT, skb, realindev,
Realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1 );
}
Out:
Return NF_STOLEN;
}

// POSTROUTING point of PF_BRIDGE
Static unsigned int br_nf_post_routing (unsigned int hook, struct sk_buff ** pskb,
Const struct net_device * in,
Const struct net_device * out,
Int (* okfn) (struct sk_buff *))
{
......
// Continue to call the hook processing of the postrouting point of the PF_INET/PF_INET6 family.
NF_HOOK (pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
Br_nf_dev_queue_xmit );
Return NF_STOLEN;
# Ifdef CONFIG_NETFILTER_DEBUG
Print_error:
If (skb-> dev! = NULL ){
Printk ("[% s]", skb-> dev-> name );
If (realoutdev)
Printk ("[% s]", realoutdev-> name );
}
Printk ("head: % p, raw: % p, data: % p \ n", skb-> head, skb-> mac. raw,
Skb-> data );
Dump_stack ();
Return NF_ACCEPT;
# Endif
}

It can be seen that each hook point of PF_INET is also called by each hook point of PF_BRIDGE. Therefore, you can perform filtering or NAT operations in the NIC.

4. Conclusion

BRIDGE's data processing process is an independent process. If the process is normal, it will not be returned to other protocols for processing.
The processing level of the bridge is the same as that of the IP protocol. You can mount multiple mount points of the PF_BRIDGE. These mount points call the mount points of the PF_INET family, so as to implement functions such as filtering and NAT under the bridge.
Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.