Copyleft of this document belongs to yfydz and can be freely copied and reproduced when published using GPL. It is strictly prohibited to be used for any commercial purposes.
MSN: yfydz_no1@hotmail.com
Source: http://yfydz.cublog.cn
1. Preface
Reorganizing IP fragments is an important way for the firewall to improve security. By reorganizing fragments in advance, it can effectively defend against various fragmentation attacks, the Linux kernel firewall netfilter automatically restructured the IP Fragment package. This article introduces the IP address reorganization process in the Linux kernel, kernel code version 2.4.26.
2. handling process
The basic function for IP address reorganization is ip_defrag (), which is implemented in net/IPv4/ip_fragment.c. The basic process is to create a shard processing queue, and each node in the queue is a linked list, this linked list stores fragments of the same connection. When all fragments arrive, the data packet is reorganized, or all the fragments cannot arrive within a certain period of time (30 seconds by default) and are released.
2.1 Data Structure
When processing the part package, save the cb field of the skb package to the Part Control Information struct ipfrag_skb_cb.
# Define frag_cb (SKB) (struct ipfrag_skb_cb *) (SKB)-> CB ))
Struct ipfrag_skb_cb
{
Struct inet_skb_parm h;
Int offset;
};
Ipq queue node structure:
/* Describe an entry in the "incomplete into Rams" queue .*/
Struct ipq {
// Next
Struct ipq * Next;/* linked list pointers */
// The latest linked list
Struct list_head lru_list;/* LRU list member */
// The following four items are used to match a group of IP addresses.
U32 saddr;
U32 daddr;
2017-11-id;
U8 protocol;
// Status flag
U8 last_in;
# Define complete 4 // The data is complete
# Define first_in 2 // The first package arrives
# Define last_in 1 // the last package arrives
// List of received IP Fragments
Struct sk_buff * fragments;/* linked list of supported ed fragments */
// Len is the total length of data obtained based on the offset information in the latest IP fragment.
Int Len;/* total length of original datasync */
// Meat is the sum of the actual lengths of all fragments.
Int meat;
Spinlock_t lock;
Atomic_t refcnt;
// Timeout
Struct timer_list timer;/* When will this queue expire? */
// The address of the previous queue
Struct ipq ** pprev;
// The index number for the data to enter the NIC
Int IIF;
// Timestamp of the latest shard
Struct timeval stamp;
};
2.2 ip_defrag () function:
This is a basic function for fragment. It returns the restructured SKB package or null.
Struct sk_buff * ip_defrag (struct sk_buff * SKB)
{
Struct iphdr * IPH = SKB-> NH. iph;
Struct ipq * QP;
Struct net_device * dev;
// Statistics
Ip_inc_stats_bh (ipreasmreqds );
/* Start by cleaning up the memory .*/
// Check whether the allocated fragment memory exceeds the configured Upper Limit
If (atomic_read (& ip_frag_mem)> sysctl_ipfrag_high_thresh)
// The ip_evictor () function releases data packets that cannot be reorganized in the current buffer, making ip_frag_mem smaller
// Sysctl_ipfrag_low_thresh (low buffer limit)
Ip_evictor ();
Dev = SKB-> dev;
/* Lookup (or create) queue header */
// Query queue Nodes Based on IP header information
If (QP = ip_find (IPH ))! = NULL ){
Struct sk_buff * ret = NULL;
Spin_lock (& QP-> lock );
// The SKB data packet enters the linked list of the queue Node
Ip_frag_queue (qP, SKB );
If (QP-> last_in = (first_in | last_in )&&
QP-> meat = QP-> Len)
// Reassembles the data packets to meet the regrouping conditions and return the restructured data packets.
Ret = ip_frag_reasm (qP, Dev );
Spin_unlock (& QP-> lock );
// If the number of queue nodes is 0, release the queue Node
Ipq_put (qP );
Return ret;
}
// The related node cannot be found and the packet is discarded.
Ip_inc_stats_bh (ipreasmfails );
Kfree_skb (SKB );
Return NULL;
}
2.3 ip_find () function
The ip_find () function is used to find the queue nodes that match the source, Destination Address, protocol, and ID of the data packet. If the node is not found, a new node is created:
Static inline struct ipq * ip_find (struct iphdr * iph)
{
_ 2010id = IPH-> ID;
_ U32 saddr = IPH-> saddr;
_ U32 daddr = IPH-> daddr;
_ U8 protocol = IPH-> protocol;
// The Shard queue is implemented in the form of a hash table
// The hash function uses four IP header parameters: source, Destination Address, protocol, and ID.
Unsigned int hash = ipqhashfn (ID, saddr, daddr, Protocol );
Struct ipq * QP;
Read_lock (& ipfrag_lock );
For (QP = ipq_hash [hash]; QP = QP-> next ){
If (QP-> id = ID &&
QP-> saddr = saddr &&
QP-> daddr = daddr &&
QP-> protocol = protocol ){
Atomic_inc (& QP-> refcnt );
Read_unlock (& ipfrag_lock );
Return QP;
}
}
Read_unlock (& ipfrag_lock );
// If the node does not exist, create a queue node.
Return ip_frag_create (hash, IPH );
}
Ip_frag_create () function, returns a shard queue Node
Static struct ipq * ip_frag_create (unsigned hash, struct iphdr * iph)
{
Struct ipq * QP;
// Allocate a new shard queue Node
If (QP = frag_alloc_queue () = NULL)
Goto out_nomem;
QP-> protocol = IPH-> protocol;
QP-> last_in = 0;
QP-> id = IPH-> ID;
QP-> saddr = IPH-> saddr;
QP-> daddr = IPH-> daddr;
QP-> Len = 0;
// Meat is the total length of all fragments in the current queue.
QP-> meat = 0;
QP-> fragments = NULL;
QP-> IIF = 0;
/* Initialize a timer for this entry .*/
// Queue node timer settings
Init_timer (& QP-> timer );
QP-> timer. Data = (unsigned long) QP;/* pointer to queue */
// Timeout processing, releasing memory, and sending ICMP fragment timeout error
QP-> timer. Function = ip_expire;/* expire function */
QP-> lock = spin_lock_unlocked;
// The number of nodes used to initialize the queue is 1. Note that the value cannot be 0.
Atomic_set (& QP-> refcnt, 1 );
// Put the shard nodes in the queue hash table
Return ip_frag_intern (hash, qP );
Out_nomem:
Netdebug (if (net_ratelimit () printk (kern_err "ip_frag_create: no memory left
! /N "));
Return NULL;
}
2.4 ip_frag_queue () function
The ip_frag_queue () function inserts the new SKB package into the queue node. This function is the key to defending against various fragment attacks. It must be able to handle the reorganization process of various exceptions:
// Ping of death, Teardrop, and so on are attacked by abnormal fragmentation offset. Therefore, you need to check carefully.
// Whether the part offset is abnormal
Static void ip_frag_queue (struct ipq * Qp, struct sk_buff * SKB)
{
Struct sk_buff * Prev, * next;
Int flags, offset;
Int IHL, end;
// An error is returned when a new packet is sent to the queue node with the complete mark.
If (QP-> last_in & complete)
Goto err;
// Calculate the offset value of the current package. The Offset Value in the IP header is only 13 BITs, but it represents a multiple of 8 bytes.
Offset = ntohs (SKB-> NH. iph-> frag_off );
Flags = offset &~ Ip_offset;
Offset & = ip_offset;
Offset <= 3;/* offset is in 8-byte chunks */
IHL = SKB-> NH. iph-> IHL * 4;
/* Determine the position of this fragment .*/
// End is the position of the end of the current package in the complete package
End = offset + SKB-> len-IHL;
/* Is this the final fragment? */
If (flags & ip_mf) = 0 ){
// No more multipart packages are available.
/* If we already have some bits beyond end
* Or have different end, the segment is already rupted.
*/
If (end <QP-> Len |
(QP-> last_in & last_in) & End! = QP-> Len ))
Goto err;
QP-> last_in | = last_in;
QP-> Len = end;
} Else {
// Check whether the data length is 8 bytes aligned.
If (end & 7 ){
End & = ~ 7;
If (SKB-> ip_summed! = Checksum_unnecessary)
SKB-> ip_summed = checksum_none;
}
If (end> QP-> Len ){
// The length exceeds the length of the current record
/* Some bits beyond end-> upload uption .*/
If (QP-> last_in & last_in)
Goto err;
QP-> Len = end;
}
}
If (END = offset)
Goto err;
// Remove the IP header and retain only the data
If (pskb_pull (SKB, IHL) = NULL)
Goto err;
// Adjust the SKB package length to end-offset. The value is the actual valid data length in the SKB package.
If (pskb_trim (SKB, end-offset ))
Goto err;
/* Find out which fragments are in front and at the back of us
* In the chain of fragments so far. We must know where to put
* This fragment, right?
*/
// Determine the position of the current package in the complete package. The multipart package may not necessarily arrive at the destination in order, but may be in a messy order.
// Adjust the package order.
Prev = NULL;
For (next = QP-> fragments; next! = NULL; next = Next-> next ){
If (frag_cb (next)-> Offset> = offset)
Break;/* bingo! */
Prev = next;
}
/* We found where to put this one. Check for overlap
* Preceding fragment, and, if needed, align things so that
* Any overlaps are eliminated.
*/
// Check whether the offset overlaps. The overlap is allowed, as long as it is correct.
If (prev ){
Int I = (frag_cb (prev)-> Offset + Prev-> Len)-offset;
If (I> 0 ){
Offset + = I;
If (end <= offset)
Goto err;
If (! Pskb_pull (SKB, I ))
Goto err;
If (SKB-> ip_summed! = Checksum_unnecessary)
SKB-> ip_summed = checksum_none;
}
}
// If the packets overlap, the offset values of all packets after the queue must be adjusted, and the accumulated value of the packet length must be reduced accordingly.
While (next & frag_cb (next)-> offset <End ){
Int I = end-frag_cb (next)-> offset;/* overlap is 'I' bytes */
If (I <next-> Len ){
/* Eat head of the next overlapped Fragment
* And leave the loop. the next ones cannot overlap.
*/
If (! Pskb_pull (next, I ))
Goto err;
Frag_cb (next)-> Offset + = I;
QP-> meat-= I;
If (next-> ip_summed! = Checksum_unnecessary)
Next-> ip_summed = checksum_none;
Break;
} Else {
Struct sk_buff * free_it = next;
/* Old fragmnet is completely overridden
* New one drop it.
*/
Next = Next-> next;
If (prev)
Prev-> next = next;
Else
QP-> fragments = next;
QP-> meat-= free_it-> Len;
Frag_kfree_skb (free_it );
}
}
// SKB records its own Offset Value
Frag_cb (SKB)-> offset = offset;
// Insert the current SKB into the queue
/* Insert this fragment in the chain of fragments .*/
SKB-> next = next;
If (prev)
Prev-> next = SKB;
Else
QP-> fragments = SKB;
If (SKB-> Dev)
QP-> IIF = SKB-> Dev-> ifindex;
SKB-> Dev = NULL;
// Time update
QP-> stamp = SKB-> stamp;
// Accumulate the total length of the current data packet
QP-> meat + = SKB-> Len;
// Add the SKB size to the shard memory
Atomic_add (SKB-> truesize, & ip_frag_mem );
If (offset = 0)
QP-> last_in | = first_in;
Write_lock (& ipfrag_lock );
// Adjust the shard node location in the recently used queue. When the storage area exceeds the limit, the last unused Shard is released.
// Fragments
List_move_tail (& QP-> lru_list, & ipq_lru_list );
Write_unlock (& ipfrag_lock );
Return;
Err:
// Directly discards the data packet when an error occurs, but the existing data packet in the queue is not released. If the reorganization fails
// Release when the fragment memory limit is exceeded
Kfree_skb (SKB );
}
2.5 ip_frag_reasm () function
The ip_frag_reasm () function implements the final data reorganization process after all data is correctly received.
Static struct sk_buff * ip_frag_reasm (struct ipq * Qp, struct net_device * Dev)
{
Struct iphdr * IPH;
Struct sk_buff * FP, * head = QP-> fragments;
Int Len;
Int Ihlen;
// Disconnect the node from the linked list and delete the timer.
Ipq_kill (qP );
Bug_trap (Head! = NULL );
Bug_trap (frag_cb (head)-> offset = 0 );
/* Allocate a new buffer for the datax .*/
Ihlen = head-> NH. iph-> IHL * 4;
Len = Ihlen + QP-> Len;
// The total IP address length exceeds the limit and is discarded.
If (LEN> 65535)
Goto out_oversize;
/* Head of list must not be cloned .*/
If (skb_cloned (head) & pskb_expand_head (Head, 0, 0, gfp_atomic ))
Goto out_nomem;
/* If the first fragment is fragmented itself, we split
* It to two chunks: The first with data and paged part
* And the second, holding only fragments .*/
If (skb_shinfo (head)-> frag_list ){
// The first SKB in the queue cannot be sharded. If sharded, allocate another SKB and its data length is 0,
// The final result of the head is this SKB, which does not include data, but its end pointer, that is
// The frag_list In the struct skb_shared_info structure contains all the shards SKB, which is also SKB
// Is a form of representation, not necessarily a continuous data block, but the final result is through skb_linearize ()
// The function copies the data in these linked list nodes to a continuous data block.
Struct sk_buff * clone;
Int I, Plen = 0;
If (clone = alloc_skb (0, gfp_atomic) = NULL)
Goto out_nomem;
Clone-> next = head-> next;
Head-> next = clone;
Skb_shinfo (clone)-> frag_list = skb_shinfo (head)-> frag_list;
Skb_shinfo (head)-> frag_list = NULL;
For (I = 0; I <skb_shinfo (head)-> nr_frags; I ++)
Plen + = skb_shinfo (head)-> frags [I]. size;
Clone-> Len = clone-> data_len = head-> data_len-plen;
Head-> data_len-= clone-> Len;
Head-> len-= clone-> Len;
Clone-> csum = 0;
Clone-> ip_summed = head-> ip_summed;
Atomic_add (Clone-> truesize, & ip_frag_mem );
}
Skb_shinfo (head)-> frag_list = head-> next;
Skb_push (Head, head-> data-head-> NH. Raw );
Atomic_sub (Head-> truesize, & ip_frag_mem );
// Accumulate the Data Length of all subsequent packages in sequence, and delete the data length from the allocated memory count.
For (FP = head-> next; FP = FP-> next ){
Head-> data_len + = FP-> Len;
Head-> Len + = FP-> Len;
If (Head-> ip_summed! = FP-> ip_summed)
Head-> ip_summed = checksum_none;
Else if (Head-> ip_summed = checksum_hw)
Head-> csum = csum_add (Head-> csum, FP-> csum );
Head-> truesize + = FP-> truesize;
Atomic_sub (FP-> truesize, & ip_frag_mem );
}
Head-> next = NULL;
Head-> Dev = dev;
Head-> stamp = QP-> stamp;
// Reset the length and offset mark in the IP Header
IPH = head-> NH. iph;
IPH-> frag_off = 0;
IPH-> tot_len = htons (LEN );
Ip_inc_stats_bh (ipreasmoks );
// SKB of each shard has been processed and will not be released again when QP is released
QP-> fragments = NULL;
Return head;
Out_nomem:
Netdebug (if (net_ratelimit ())
Printk (kern_err
"IP: queue_glue: no memory for gluing queue % P/N ",
QP ));
Goto out_fail;
Out_oversize:
If (net_ratelimit ())
Printk (kern_info
"Oversized IP packet from % d. % d./N ",
Nipquad (QP-> saddr ));
Out_fail:
Ip_inc_stats_bh (ipreasmfails );
Return NULL;
}
2.6 release of ipq
After the reorganization, the shard queue will be released:
Static _ inline _ void ipq_put (struct ipq * ipq)
{
If (atomic_dec_and_test (& ipq-> refcnt ))
Ip_frag_destroy (ipq );
}
/* Complete destruction of ipq .*/
Static void ip_frag_destroy (struct ipq * QP)
{
Struct sk_buff * FP;
Bug_trap (QP-> last_in & complete );
Bug_trap (del_timer (& QP-> timer) = 0 );
/* Release all fragment data .*/
Fp = QP-> fragments;
While (FP ){
Struct sk_buff * XP = FP-> next;
// Release each shard SKB
Frag_kfree_skb (FP );
Fp = XP;
}
/* Finally, release the queue descriptor itself .*/
// Release the fragment Node itself
Frag_free_queue (qP );
}
3. Conclusion
In Linux, multiple possible exceptions are taken into account in the IP Fragment reorganization process, which provides high security. Therefore, data packet restructuring before data packets enter the netfilter architecture can defend against various fragment attacks.