Netfilter Analysis in linux2.6.16 Kernel
By cyliu
1. Brief Introduction
In the netfilter on the 2.6.16 kernel, a major correction idea of Netfilter is to use netfilter as a protocol-independent framework and create a separate net/netfilter directory in the kernel structure tree, in the past, netfilter was attached to various Protocol directories, such as the net/IPv4 and net/IPv6 directories. Although there are also various protocols under the directory, the main thing is to deal with the protocol-related things, and some common things are put under the net/netfilter directory, the file name has also changed. Although it is still not very independent, for example, net/Netfilter/nf_conntrack_core.c and net/IPv4/Netfilter/ip_conntrack_core.c are still very similar, which makes it unnecessary to score points, however, many protocol-independent matching and target modules have been separated from the Protocol and are only available in this directory, not in the Protocol directory.
The matching and target module File Names in net/netfilter are headers with "XT _", such as xt_comment.c and xt_policy.c.
Target modules include:
Xt_classify.c
Xt_nfqueue.c
Xt_notrack.c
To be compatible with iptables (because iptables finds the module File prefix by "ipt_" or" ip6t _ "), a new macro definition is added to these files: module_alias, to represent the module alias.
For example, in xt_limit.c, it is defined as follows:
Module_alias ("ipt_limit ");
Module_alias ("ip6t_limit ");
The following definitions are defined in include/Linux/netfilter_ipv4/ip_tables.h:
# Define ipt_match xt_match
# Define ipt_target xt_target
# Define ipt_table xt_table
2. Code Analysis
The structure definition of the new matching and target modules is as follows:
Struct xt_match
{
Struct list_head list;
Const char name [XT_FUNCTION_MAXNAMELEN-1];
/* Return true or false: Return false and Set * hotdrop = 1
Force immediate packet drop .*/
/* Arguments changed since 2.6.9, as this must now handle
Non-linear SKB, using skb_header_pointer and
Skb_ip_make_writable .*/
INT (* match) (const struct sk_buff * SKB,
Const struct net_device * In,
Const struct net_device * Out,
Const struct xt_match * match,
Const void * matchinfo,
Int offset,
Unsigned int protoff,
Int * hotdrop );
/* Called when user tries to insert an entry of this type .*/
/* Shoshould return true or false .*/
INT (* checkentry) (const char * tablename,
Const void * IP,
Const struct xt_match * match,
Void * matchinfo,
Unsigned int matchinfosize,
Unsigned int hook_mask );
/* Called when entry of this type deleted .*/
Void (* destroy) (const struct xt_match * match, void * matchinfo,
Unsigned int matchinfosize );
/* Called when userspace align differs from kernel space one */
INT (* compat) (void * match, void ** dstptr, int * size, int convert );
/* Set this to this_module if you are a module, otherwise null */
Struct module * Me;
Char * table;
Unsigned int matchsize;
Unsigned int hooks;
Unsigned short proto;
Unsigned short family;
U_int8_t revision;
};
/* Registration hooks for targets .*/
Struct xt_target
{
Struct list_head list;
Const char name [XT_FUNCTION_MAXNAMELEN-1];
/* Returns verdict. Argument order changed since 2.6.9, as this
Must Now handle non-linear skbs, using skb_copy_bits and
Skb_ip_make_writable .*/
Unsigned int (* Target) (struct sk_buff ** pskb,
Const struct net_device * In,
Const struct net_device * Out,
Unsigned int hooknum,
Const struct xt_target * target,
Const void * targinfo,
Void * userdata );
/* Called when user tries to insert an entry of this type:
Hook_mask is a bitmask of hooks from which it can be
Called .*/
/* Shoshould return true or false .*/
INT (* checkentry) (const char * tablename,
Const void * entry,
Const struct xt_target * target,
Void * targinfo,
Unsigned int targinfosize,
Unsigned int hook_mask );
/* Called when entry of this type deleted .*/
Void (* destroy) (const struct xt_target * target, void * targinfo,
Unsigned int targinfosize );
/* Called when userspace align differs from kernel space one */
INT (* compat) (void * target, void ** dstptr, int * size, int convert );
/* Set this to this_module if you are a module, otherwise null */
Struct module * Me;
Char * table;
Unsigned int targetsize;
Unsigned int hooks;
Unsigned short proto;
Unsigned short family;
U_int8_t revision;
};
/* Furniture shopping ...*/
Struct xt_table
{
Struct list_head list;
/* A unique name ...*/
Char name [xt_table_maxnamelen];
/* What hooks you will enter on */
Unsigned int valid_hooks;
/* Lock for the curtain */
Rwlock_t lock;
/* Man behind the curtain ...*/
// Struct ip6t_table_info * private;
Void * private;
/* Set this to this_module if you are a module, otherwise null */
Struct module * Me;
Int AF;/* address/protocol family */
};
/* The table itself */
Struct xt_table_info
{
/* Size per table */
Unsigned int size;
/* Number of entries: fixme. -- RR */
Unsigned int number;
/* Initial number of entries. Needed for module usage count */
Unsigned int initial_entries;
/* Entry points and underflows */
Unsigned int hook_entry [nf_ip_numhooks];
Unsigned int underflow [nf_ip_numhooks];
/* Ipt_entry tables: one per CPU */
Char * entries [nr_cpus];
};
/* Main Structure */
Struct xt_af {
Struct mutex;
Struct list_head match;
Struct list_head target;
Struct list_head tables;
Struct mutex compat_mutex;
};
/* Data Structure Management module */
Static struct xt_af * XT;
/* Initialize the netfilter module */
Static int _ init xt_init (void)
{
Int I;
/* Allocate a resource for each protocol */
XT = kmalloc (sizeof (struct xt_af) * nproto, gfp_kernel );
If (! XT)
Return-enomem;
For (I = 0; I <nproto; I ++ ){
Mutex_init (& XT. mutex );
# Ifdef config_compat
Mutex_init (& XT. compat_mutex );
# Endif
/* Initialize table, target, and match resources */
Init_list_head(&xt.tar get );
Init_list_head (& XT. Match );
Init_list_head (& XT. Tables );
}
Return 0;
}
At present, the 2.6.16 kernel supports three types of protocol families, IPv4, IPv6, and ARP. In each protocol family, the prefix of the corresponding module is:
Static const char * xt_prefix [nproto] = {
[Af_inet] = "ip ",
[Af_inet6] = "ip6 ",
[Nf_arp] = "ARP ",
};
The specific prefixes are "ip6t", "arpt", respectively ".
The main difference between the struct ipt_match and struct ipt_target structures in the old 2.4 kernel is the addition of the compat function and a series of parameters following the struct modulde * me parameter, which are related to the Protocol, for example, after the limit match is defined for IPv4 and IPv6 respectively, only the family parameter is different. One is af_inet, and the other is af_inet6. The others are the same, but there is no problem during mounting, these modules are linked to linked lists of different protocol families:
/* Registration hooks for targets .*/
Int
Xt_register_target (struct xt_target * target)
{
Int ret, AF = target-> family;
Ret = mutex_lock_interruptible (& XT [af]. mutex );
If (Ret! = 0)
Return ret;
/* Add target */
List_add (& target-> list, &xt;af=.tar get );
Mutex_unlock (& XT [af]. mutex );
Return ret;
}
Int
Xt_register_match (struct xt_match * match)
{
Int ret, AF = match-> family;
Ret = mutex_lock_interruptible (& XT [af]. mutex );
If (Ret! = 0)
Return ret;
/* Add a match */
List_add (& Match-> list, & XT [af]. Match );
Mutex_unlock (& XT [af]. mutex );
Return ret;
}
Table registration occurs in netfilte of each protocol:
Int ipt_register_table (struct xt_table * Table, const struct ipt_replace * repl)
{
Int ret;
Struct xt_table_info * newinfo;
Static struct xt_table_info Bootstrap
= {0, 0, 0, {0}, {0 },{}};
Void * loc_cpu_entry;
Newinfo = xt_alloc_table_info (repl-> size );
If (! Newinfo)
Return-enomem;
/* Choose the copy on our node/CPU
* But dont care of Preemption
*/
Loc_cpu_entry = newinfo-> entries [raw_smp_processor_id ()];
Memcpy (loc_cpu_entry, REPL-> entries, REPL-> size );
Ret = translate_table (Table-> name, table-> valid_hooks,
Newinfo, loc_cpu_entry, REPL-> size,
Repl-> num_entries,
Repl-> hook_entry,
Repl-> underflow );
If (Ret! = 0 ){
Xt_free_table_info (newinfo );
Return ret;
}
If (xt_register_table (table, & Bootstrap, newinfo )! = 0 ){
Xt_free_table_info (newinfo );
Return ret;
}
Return 0;
}
/* Allocate the table_info resource. Note that each CPU corresponds to an entry */
Struct xt_table_info * xt_alloc_table_info (unsigned int size)
{
Struct xt_table_info * newinfo;
Int CPU;
/* Pedantry: prevent them from hitting bug () in vmalloc. c -- RR */
If (smp_align (size)> page_shift) + 2> num_physpages)/* exceeds the physical memory space */
Return NULL;
Newinfo = kzarloc (sizeof (struct xt_table_info), gfp_kernel );
If (! Newinfo)
Return NULL;
Newinfo-> size = size;
For_each_possible_cpu (CPU) {/* traverse each CPU */
If (size <= page_size)
Newinfo-> entries [CPU] = kmalloc_node (size,
Gfp_kernel,
Cpu_to_node (CPU);/* directly allocate physical space */
Else
Newinfo-> entries [CPU] = vmalloc_node (size,
Cpu_to_node (CPU);/* allocate virtual space */
If (newinfo-> entries [CPU] = NULL ){
Xt_free_table_info (newinfo );
Return NULL;
}
}
Return newinfo;
}
Int xt_register_table (struct xt_table * table,
Struct xt_table_info * Bootstrap,
Struct xt_table_info * newinfo)
{
Int ret;
Struct xt_table_info * private;
Ret = mutex_lock_interruptible (& XT [Table-> af]. mutex );
If (Ret! = 0)
Return ret;
/* Don't autoload: We 'd eat our tail ...*/
If (list_named_find (& XT [Table-> af]. tables, table-> name )){
Ret =-eexist;
Goto unlock;
}
/* Simplifies replace_table code .*/
Table-> private = Bootstrap;
Rwlock_init (& table-> lock );
If (! Xt_replace_table (table, 0, newinfo, & RET ))
Goto unlock;
Private = table-> private;
Duprintf ("table-> private-> Number = % u/N", private-> number );
/* Save Number of Initial entries */
Private-> initial_entries = private-> number;
List_prepend (& XT [Table-> af]. tables, table );
Ret = 0;
Unlock:
Mutex_unlock (& XT [Table-> af]. mutex );
Return ret;
}
However, during the actual Matching target search, the system compares the names, protocol families, table names, contacts, protocols, and so on, such as the matching check.
Int xt_check_match (const struct xt_match * match, unsigned short family,
Unsigned int size, const char * Table, unsigned int hook_mask,
Unsigned short proto, int inv_proto)
{
If (xt_align (match-> matchsize )! = Size ){
Printk ("% s_tables: % s match: Invalid size % zu! = % U/N ",
Xt_prefix [Family], match-> name,
Xt_align (match-> matchsize), size );
Return-einval;
}
If (match-> table & strcmp (match-> table, table )){
Printk ("% s_tables: % s match: only valid in % s table, not % s/n ",
Xt_prefix [Family], match-> name, match-> table, table );
Return-einval;
}
If (match-> hooks & (hook_mask &~ Match-> hooks )! = 0 ){
Printk ("% s_tables: % s match: Bad hook_mask % u/N ",
Xt_prefix [Family], match-> name, hook_mask );
Return-einval;
}
If (match-> proto & (match-> proto! = Proto | inv_proto )){
Printk ("% s_tables: % s match: only valid for protocol % u/N ",
Xt_prefix [Family], match-> name, match-> PROTO );
Return-einval;
}
Return 0;
}
Int xt_check_target (const struct xt_target * target, unsigned short family,
Unsigned int size, const char * Table, unsigned int hook_mask,
Unsigned short proto, int inv_proto)
{
If (xt_align (Target-> targetsize )! = Size ){
Printk ("% s_tables: % s target: Invalid size % zu! = % U/N ",
Xt_prefix [Family], target-> name,
Xt_align (Target-> targetsize), size );
Return-einval;
}
If (Target-> table & strcmp (Target-> table, table )){
Printk ("% s_tables: % s target: only valid in % s table, not % s/n ",
Xt_prefix [Family], target-> name, target-> table, table );
Return-einval;
}
If (Target-> hooks & (hook_mask &~ Target-> hooks )! = 0 ){
Printk ("% s_tables: % s target: Bad hook_mask % u/N ",
Xt_prefix [Family], target-> name, hook_mask );
Return-einval;
}
If (Target-> proto & (Target-> proto! = Proto | inv_proto )){
Printk ("% s_tables: % s target: only valid for protocol % u/N ",
Xt_prefix [Family], target-> name, target-> PROTO );
Return-einval;
}
Return 0;
}
/* The following is the IPsec Policy validation process */
Static struct xt_match policy_match = {
. Name = "policy ",
. Family = af_inet,
. Match = match,
. Matchsize = sizeof (struct xt_policy_info ),
. Checkentry = checkentry,
. Family = af_inet,
. Me = this_module,
};
Static int _ init Init (void)
{
Int ret;
Ret = xt_register_match (& policy_match );
If (RET)
Return ret;
Ret = xt_register_match (& policy6_match );
If (RET)
Xt_unregister_match (& policy_match );
Return ret;
}
Static int
Match_policy_in (const struct sk_buff * SKB, const struct xt_policy_info * info,
Unsigned short family)
{
Const struct xt_policy_elem * E;
Struct sec_path * sp = SKB-> sp;
Int strict = Info-> flags & xt_policy_match_strict;
Int I, Pos;
If (sp = NULL)
Return-1;
If (strict & info-> Len! = Sp-> Len)
Return 0;
For (I = Sp-> len-1; I> = 0; I --){
Pos = strict? I-SP-> Len + 1: 0;
If (Pos> = Info-> Len)
Return 0;
E = & info-> Pol [POS];
/* Check policy */
If (match_xfrm_state (SP-> xvec, E, family )){
If (! Strict)
Return 1;
} Else if (strict)
Return 0;
}
Return strict? 1: 0;
}