標籤:icmp
ICMP是網路層的一個協議,可以看作IP協議的附屬協議,因為它主要被IP用來與其他主機或路由器交換錯誤判文及其他需要注意的資訊。當然,更高層協議(tcp/udp)甚至有些使用者進程也可能用到ICMP報文
註冊ICMP協議和ICMP協議的處理涉及以下檔案:
net/ipv4/icmp.c ICMP協議處理入口
net/ipv4/af_inet.c 網路層和傳輸層介面
ICMP報文結構
參見tcp/ip協議學習筆記(5)Internet Control Message Protocol(ICMP)
註冊ICMP報文類型
ICMP的net_protocol結構為icmp_protocol,定義了接收ICMP報文常式為icmp_rcv。
static const struct net_protocol icmp_protocol = {.handler =icmp_rcv,.no_policy =1,.netns_ok =1,};
ICMP的初始化
static int __net_init icmp_sk_init(struct net *net){int i, err;net->ipv4.icmp_sk =kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);if (net->ipv4.icmp_sk == NULL)return -ENOMEM;for_each_possible_cpu(i) {struct sock *sk;err = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, IPPROTO_ICMP, net);if (err < 0)goto fail;net->ipv4.icmp_sk[i] = sk;/* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */sk->sk_sndbuf =(2 * ((64 * 1024) + sizeof(struct sk_buff)));inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;}/* Control parameters for ECHO replies. */net->ipv4.sysctl_icmp_echo_ignore_all = 0;net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;/* Control parameter - ignore bogus broadcast responses? */net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;/* * Configurable global rate limit. * *ratelimit defines tokens/packet consumed for dst->rate_token *bucket ratemask defines which icmp types are ratelimited by *settingit's bit position. * *default: *dest unreachable (3), source quench (4), *time exceeded (11), parameter problem (12) */net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;net->ipv4.sysctl_icmp_ratemask = 0x1818;net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;return 0;fail:for_each_possible_cpu(i)inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);kfree(net->ipv4.icmp_sk);return err;}static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, .exit = icmp_sk_exit,};int __init icmp_init(void){return register_pernet_subsys(&icmp_sk_ops);}
輸入處理
ICMP輸入處理函數是icmp_rcv()。ICMP報文到達時,IP層通過inet_protos[IPPROTO_ICMP]找到該函數進行輸入處理。進入icmp_rcv()後,首先對ICMP報文中的參數作適當的校正,然後就會根據ICMP報文類型進行不同處理。
一個類型的ICMP報文對應一個icmp_control結構,在核心中定義了一個該結構類型的數組icmp_pointers[NR_ICMP_TYPES + 1]用來管理ICMP報文,icmp_control結構如下所示:
struct icmp_control {void (*handler)(struct sk_buff *skb);short error;/* This ICMP is classed as an error message */};
handler對應輸入該類型ICMP報文的處理函數
error值為1時表示是一個差錯報文;為0則是一個查詢ICMP報文。
#define ICMP_ECHOREPLY0/* Echo Reply*/#define ICMP_DEST_UNREACH3/* Destination Unreachable*/#define ICMP_SOURCE_QUENCH4/* Source Quench*/#define ICMP_REDIRECT5/* Redirect (change route)*/#define ICMP_ECHO8/* Echo Request*/#define ICMP_TIME_EXCEEDED11/* Time Exceeded*/#define ICMP_PARAMETERPROB12/* Parameter Problem*/#define ICMP_TIMESTAMP13/* Timestamp Request*/#define ICMP_TIMESTAMPREPLY14/* Timestamp Reply*/#define ICMP_INFO_REQUEST15/* Information Request*/#define ICMP_INFO_REPLY16/* Information Reply*/#define ICMP_ADDRESS17/* Address Mask Request*/#define ICMP_ADDRESSREPLY18/* Address Mask Reply*/#define NR_ICMP_TYPES18/* *This table is the definition of how we handle ICMP. */static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {[ICMP_ECHOREPLY] = {.handler = icmp_discard,},[1] = {.handler = icmp_discard,.error = 1,},[2] = {.handler = icmp_discard,.error = 1,},[ICMP_DEST_UNREACH] = {.handler = icmp_unreach,.error = 1,},[ICMP_SOURCE_QUENCH] = {.handler = icmp_unreach,.error = 1,},[ICMP_REDIRECT] = {.handler = icmp_redirect,.error = 1,},[6] = {.handler = icmp_discard,.error = 1,},[7] = {.handler = icmp_discard,.error = 1,},[ICMP_ECHO] = {.handler = icmp_echo,},[9] = {.handler = icmp_discard,.error = 1,},[10] = {.handler = icmp_discard,.error = 1,},[ICMP_TIME_EXCEEDED] = {.handler = icmp_unreach,.error = 1,},[ICMP_PARAMETERPROB] = {.handler = icmp_unreach,.error = 1,},[ICMP_TIMESTAMP] = {.handler = icmp_timestamp,},[ICMP_TIMESTAMPREPLY] = {.handler = icmp_discard,},[ICMP_INFO_REQUEST] = {.handler = icmp_discard,},[ICMP_INFO_REPLY] = {.handler = icmp_discard,},[ICMP_ADDRESS] = {.handler = icmp_address,},[ICMP_ADDRESSREPLY] = {.handler = icmp_address_reply,},};
ICMP報文的接收處理的函數調用過程:
/* *Deal with incoming ICMP packets. */int icmp_rcv(struct sk_buff *skb){struct icmphdr *icmph;struct rtable *rt = skb_rtable(skb);struct net *net = dev_net(rt->u.dst.dev);if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {struct sec_path *sp = skb_sec_path(skb);int nh;if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP))goto drop;if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))goto drop;nh = skb_network_offset(skb);skb_set_network_header(skb, sizeof(*icmph));if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))goto drop;skb_set_network_header(skb, nh);}ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);switch (skb->ip_summed) {case CHECKSUM_COMPLETE:if (!csum_fold(skb->csum))break;/* fall through */case CHECKSUM_NONE:skb->csum = 0;if (__skb_checksum_complete(skb))goto error;}if (!pskb_pull(skb, sizeof(*icmph)))goto error;icmph = icmp_hdr(skb);ICMPMSGIN_INC_STATS_BH(net, icmph->type);/* *18 is the highest 'known' ICMP type. Anything else is a mystery * *RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */if (icmph->type > NR_ICMP_TYPES)goto error;/* *Parse the ICMP message */if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {/* *RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be * silently ignored (we let user decide with a sysctl). *RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently * discarded if to broadcast/multicast. */if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {goto error;}if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP && icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) {goto error;}}icmp_pointers[icmph->type].handler(skb);drop:kfree_skb(skb);return 0;error:ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);goto drop;}
輸出處理
發送ICMP報文
icmp_send()用於輸出各種指定類型和編碼的ICMP報文,但用該函數不能應答目的地址為組播或廣播類型的硬體地址或IP地址的報文
/* *Send an ICMP message in response to a situation * *RFC 1122: 3.2.2MUST send at least the IP header and 8 bytes of header. * MAY send more (we do). *MUST NOT change this header information. *MUST NOT reply to a multicast/broadcast IP address. *MUST NOT reply to a multicast/broadcast MAC address. *MUST reply to only the first fragment. */void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info){struct iphdr *iph;int room;struct icmp_bxm icmp_param;struct rtable *rt = skb_rtable(skb_in);struct ipcm_cookie ipc;__be32 saddr;u8 tos;struct net *net;struct sock *sk;if (!rt)goto out;net = dev_net(rt->u.dst.dev);/* *Find the original header. It is expected to be valid, of course. *Check this, icmp_send is called from the most obscure devices *sometimes. */iph = ip_hdr(skb_in);if ((u8 *)iph < skb_in->head || (skb_in->network_header + sizeof(*iph)) > skb_in->tail)goto out;/* *No replies to physical multicast/broadcast */if (skb_in->pkt_type != PACKET_HOST)goto out;/* *Now check at the protocol level */if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))goto out;/* *Only reply to fragment 0. We byte re-order the constant *mask for efficiency. */if (iph->frag_off & htons(IP_OFFSET))goto out;/* *If we send an ICMP error to an ICMP error a mess would result.. */if (icmp_pointers[type].error) {/* *We are an error, check if we are replying to an *ICMP error */if (iph->protocol == IPPROTO_ICMP) {u8 _inner_type, *itp;itp = skb_header_pointer(skb_in, skb_network_header(skb_in) + (iph->ihl << 2) + offsetof(struct icmphdr, type) - skb_in->data, sizeof(_inner_type), &_inner_type);if (itp == NULL)goto out;/* *Assume any unknown ICMP type is an error. This *isn't specified by the RFC, but think about it.. */if (*itp > NR_ICMP_TYPES || icmp_pointers[*itp].error)goto out;}}sk = icmp_xmit_lock(net);if (sk == NULL)return;/* *Construct source address and options. */saddr = iph->daddr;if (!(rt->rt_flags & RTCF_LOCAL)) {struct net_device *dev = NULL;if (rt->fl.iif &&net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)dev = dev_get_by_index(net, rt->fl.iif);if (dev) {saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);dev_put(dev);} elsesaddr = 0;}tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos;if (ip_options_echo(&icmp_param.replyopts, skb_in))goto out_unlock;/* *Prepare data for ICMP header. */icmp_param.data.icmph.type = type;icmp_param.data.icmph.code = code;icmp_param.data.icmph.un.gateway = info;icmp_param.data.icmph.checksum = 0;icmp_param.skb = skb_in;icmp_param.offset = skb_network_offset(skb_in);inet_sk(sk)->tos = tos;ipc.addr = iph->saddr;ipc.opt = &icmp_param.replyopts;ipc.shtx.flags = 0;{struct flowi fl = {.nl_u = {.ip4_u = {.daddr = icmp_param.replyopts.srr ?icmp_param.replyopts.faddr :iph->saddr,.saddr = saddr,.tos = RT_TOS(tos)}},.proto = IPPROTO_ICMP,.uli_u = {.icmpt = {.type = type,.code = code}}};int err;struct rtable *rt2;security_skb_classify_flow(skb_in, &fl);if (__ip_route_output_key(net, &rt, &fl))goto out_unlock;/* No need to clone since we're just using its address. */rt2 = rt;err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);switch (err) {case 0:if (rt != rt2)goto route_done;break;case -EPERM:rt = NULL;break;default:goto out_unlock;}if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))goto relookup_failed;if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)err = __ip_route_output_key(net, &rt2, &fl);else {struct flowi fl2 = {};struct dst_entry *odst;fl2.fl4_dst = fl.fl4_src;if (ip_route_output_key(net, &rt2, &fl2))goto relookup_failed;/* Ugh! */odst = skb_dst(skb_in);err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, RT_TOS(tos), rt2->u.dst.dev);dst_release(&rt2->u.dst);rt2 = skb_rtable(skb_in);skb_dst_set(skb_in, odst);}if (err)goto relookup_failed;err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP);switch (err) {case 0:dst_release(&rt->u.dst);rt = rt2;break;case -EPERM:goto ende;default:relookup_failed:if (!rt)goto out_unlock;break;}}route_done:if (!icmpv4_xrlim_allow(net, rt, type, code))goto ende;/* RFC says return as much as we can without exceeding 576 bytes. */room = dst_mtu(&rt->u.dst);if (room > 576)room = 576;room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;room -= sizeof(struct icmphdr);icmp_param.data_len = skb_in->len - icmp_param.offset;if (icmp_param.data_len > room)icmp_param.data_len = room;icmp_param.head_len = sizeof(struct icmphdr);icmp_push_reply(&icmp_param, &ipc, &rt);ende:ip_rt_put(rt);out_unlock:icmp_xmit_unlock(sk);out:;}
一般的差錯和請求ICMP報文是通過icmp_send()來發送的,而回顯應答和時間戳記應答報文則是通過icmp_replay()來輸出的。
/* *Driving logic for building and sending ICMP messages. */static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb){struct ipcm_cookie ipc;struct rtable *rt = skb_rtable(skb);struct net *net = dev_net(rt->u.dst.dev);struct sock *sk;struct inet_sock *inet;__be32 daddr;if (ip_options_echo(&icmp_param->replyopts, skb))return;sk = icmp_xmit_lock(net);if (sk == NULL)return;inet = inet_sk(sk);icmp_param->data.icmph.checksum = 0;inet->tos = ip_hdr(skb)->tos;daddr = ipc.addr = rt->rt_src;ipc.opt = NULL;ipc.shtx.flags = 0;if (icmp_param->replyopts.optlen) {ipc.opt = &icmp_param->replyopts;if (ipc.opt->srr)daddr = icmp_param->replyopts.faddr;}{struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr,.saddr = rt->rt_spec_dst,.tos = RT_TOS(ip_hdr(skb)->tos) } }, .proto = IPPROTO_ICMP };security_skb_classify_flow(skb, &fl);if (ip_route_output_key(net, &rt, &fl))goto out_unlock;}if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code))icmp_push_reply(icmp_param, &ipc, &rt);ip_rt_put(rt);out_unlock:icmp_xmit_unlock(sk);}
ICMP:Internet控制報文協議實現學習筆記