From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 15 Jun 2012 14:54:11 -0700 Subject: ipv6: Handle PMTU in ICMP error handlers. One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller --- net/ipv6/udp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f05099fc5901..051ad481973f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) -- cgit v1.2.3 From 22911fc581f6a241e2897a7a8603e97344a6ec82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jun 2012 00:23:44 +0000 Subject: net: skb_free_datagram_locked() doesnt drop all packets dropwatch wrongly diagnose all received UDP packets as drops. This patch removes trace_kfree_skb() done in skb_free_datagram_locked(). Locations calling skb_free_datagram_locked() should do it on their own. As a result, drops are accounted on the right function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 1 - net/ipv4/udp.c | 5 ++++- net/ipv6/udp.c | 8 +++++--- net/sunrpc/svcsock.c | 12 ++++++------ 4 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/core/datagram.c b/net/core/datagram.c index ae6acf6a3dea..0337e2b76862 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -248,7 +248,6 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ - trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db017efb76ea..ee37d47d472e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -108,6 +108,7 @@ #include #include #include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1220,8 +1221,10 @@ try_again: goto csum_copy_err; } - if (err) + if (unlikely(err)) { + trace_kfree_skb(skb, udp_recvmsg); goto out_free; + } if (!peeked) UDP_INC_STATS_USER(sock_net(sk), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 051ad481973f..1ecd10249488 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -48,6 +48,7 @@ #include #include +#include #include "udp_impl.h" int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) @@ -385,15 +386,16 @@ try_again: if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; } - if (err) + if (unlikely(err)) { + trace_kfree_skb(skb, udpv6_recvmsg); goto out_free; - + } if (!peeked) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a6de09de5d21..18bc130255a7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -619,6 +620,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (!svc_udp_get_dest_address(rqstp, cmh)) { net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", cmh->cmsg_level, cmh->cmsg_type); +out_free: + trace_kfree_skb(skb, svc_udp_recvfrom); skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } @@ -630,8 +633,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { local_bh_enable(); /* checksum error */ - skb_free_datagram_locked(svsk->sk_sk, skb); - return 0; + goto out_free; } local_bh_enable(); skb_free_datagram_locked(svsk->sk_sk, skb); @@ -640,10 +642,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); rqstp->rq_arg.head[0].iov_len = len; - if (skb_checksum_complete(skb)) { - skb_free_datagram_locked(svsk->sk_sk, skb); - return 0; - } + if (skb_checksum_complete(skb)) + goto out_free; rqstp->rq_xprt_ctxt = skb; } -- cgit v1.2.3 From ec18d9a2691d69cd14b48f9b919fddcef28b7f5c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:25:15 -0700 Subject: ipv6: Add redirect support to all protocol icmp error handlers. Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 ++ net/dccp/ipv6.c | 7 +++++++ net/ipv6/ah6.c | 10 ++++++---- net/ipv6/esp6.c | 11 +++++++---- net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/ipcomp6.c | 11 +++++++---- net/ipv6/raw.c | 2 ++ net/ipv6/sit.c | 8 ++++++++ net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 2 ++ net/ipv6/xfrm6_policy.c | 9 +++++++++ net/sctp/input.c | 4 ++-- net/sctp/ipv6.c | 3 +++ 13 files changed, 69 insertions(+), 14 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a2ef81466b00..1f2735dba753 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -162,6 +162,8 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *, void sctp_err_finish(struct sock *, struct sctp_association *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); +void sctp_icmp_redirect(struct sock *, struct sctp_transport *, + struct sk_buff *); void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 02162cfa5048..b4d7d28ce6d2 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -130,6 +130,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 49d4d26bda88..7e6139508ee7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -613,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", - ntohl(ah->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 89a615ba84f8..6dc7fd353ef5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -434,16 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", - ntohl(esph->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6af3fcfdcbbd..0b5b60ec6f4a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -550,6 +550,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; + case NDISC_REDIRECT: + rel_type = ICMP_REDIRECT; + rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -608,6 +611,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } + if (rel_type == ICMP_REDIRECT) { + if (skb_dst(skb2)->ops->redirect) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 92832385a8ef..7af5aee75d98 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) + if (type != ICMPV6_DEST_UNREACH && + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; spi = htonl(ntohs(ipcomph->cpi)); @@ -73,9 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", - spi, &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b5c1dcb27737..ef0579d5bca6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -332,6 +332,8 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 49aea94c9be3..fbf1622fdeef 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -539,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return 0; break; + case ICMP_REDIRECT: + break; } err = -ENOENT; @@ -557,6 +559,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = 0; goto out; } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + IPPROTO_IPV6, 0); + err = 0; + goto out; + } if (t->parms.iph.daddr == 0) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70458a9cd837..7249e4bb9b8a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -363,6 +363,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst,skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1ecd10249488..99d0077b56b8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,6 +483,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); np = inet6_sk(sk); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index bb02038b822b..f5a9cb8257b9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -215,6 +215,14 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + + path->ops->redirect(path, skb); +} + static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -261,6 +269,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, diff --git a/net/sctp/input.c b/net/sctp/input.c index 9fb4247f9a99..5943b7d77ddb 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -423,8 +423,8 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } -static void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, - struct sk_buff *skb) +void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, + struct sk_buff *skb) { struct dst_entry *dst; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 91f479121c55..ed7139ea7978 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -185,6 +185,9 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out_unlock; } break; + case NDISC_REDIRECT: + sctp_icmp_redirect(sk, transport, skb); + break; default: break; } -- cgit v1.2.3