diff options
Diffstat (limited to 'net/ipv6/route.c')
| -rw-r--r-- | net/ipv6/route.c | 585 | 
1 files changed, 296 insertions, 289 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3fd..8e80fd279100 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,7 +78,10 @@ static int		 ip6_dst_gc(struct dst_ops *ops);  static int		ip6_pkt_discard(struct sk_buff *skb);  static int		ip6_pkt_discard_out(struct sk_buff *skb);  static void		ip6_link_failure(struct sk_buff *skb); -static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); +static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +					   struct sk_buff *skb, u32 mtu); +static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk, +					struct sk_buff *skb);  #ifdef CONFIG_IPV6_ROUTE_INFO  static struct rt6_info *rt6_add_route_info(struct net *net, @@ -99,10 +102,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)  	if (!(rt->dst.flags & DST_HOST))  		return NULL; -	if (!rt->rt6i_peer) -		rt6_bind_peer(rt, 1); - -	peer = rt->rt6i_peer; +	peer = rt6_get_peer_create(rt);  	if (peer) {  		u32 *old_p = __DST_METRICS_PTR(old);  		unsigned long prev, new; @@ -123,21 +123,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)  	return p;  } -static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +static inline const void *choose_neigh_daddr(struct rt6_info *rt, +					     struct sk_buff *skb, +					     const void *daddr)  {  	struct in6_addr *p = &rt->rt6i_gateway;  	if (!ipv6_addr_any(p))  		return (const void *) p; +	else if (skb) +		return &ipv6_hdr(skb)->daddr;  	return daddr;  } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, +					  struct sk_buff *skb, +					  const void *daddr)  {  	struct rt6_info *rt = (struct rt6_info *) dst;  	struct neighbour *n; -	daddr = choose_neigh_daddr(rt, daddr); +	daddr = choose_neigh_daddr(rt, skb, daddr);  	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);  	if (n)  		return n; @@ -152,7 +158,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)  		if (IS_ERR(n))  			return PTR_ERR(n);  	} -	dst_set_neighbour(&rt->dst, n); +	rt->n = n;  	return 0;  } @@ -171,6 +177,7 @@ static struct dst_ops ip6_dst_ops_template = {  	.negative_advice	=	ip6_negative_advice,  	.link_failure		=	ip6_link_failure,  	.update_pmtu		=	ip6_rt_update_pmtu, +	.redirect		=	rt6_do_redirect,  	.local_out		=	__ip6_local_out,  	.neigh_lookup		=	ip6_neigh_lookup,  }; @@ -182,7 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)  	return mtu ? : dst->dev->mtu;  } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, +					 struct sk_buff *skb, u32 mtu) +{ +} + +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, +				      struct sk_buff *skb)  {  } @@ -200,6 +213,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {  	.mtu			=	ip6_blackhole_mtu,  	.default_advmss		=	ip6_default_advmss,  	.update_pmtu		=	ip6_rt_blackhole_update_pmtu, +	.redirect		=	ip6_rt_blackhole_redirect,  	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,  	.neigh_lookup		=	ip6_neigh_lookup,  }; @@ -261,16 +275,20 @@ static struct rt6_info ip6_blk_hole_entry_template = {  #endif  /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, +static inline struct rt6_info *ip6_dst_alloc(struct net *net,  					     struct net_device *dev, -					     int flags) +					     int flags, +					     struct fib6_table *table)  { -	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); +	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, +					0, DST_OBSOLETE_NONE, flags); -	if (rt) -		memset(&rt->rt6i_table, 0, -		       sizeof(*rt) - sizeof(struct dst_entry)); +	if (rt) { +		struct dst_entry *dst = &rt->dst; +		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); +		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); +	}  	return rt;  } @@ -278,7 +296,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)  {  	struct rt6_info *rt = (struct rt6_info *)dst;  	struct inet6_dev *idev = rt->rt6i_idev; -	struct inet_peer *peer = rt->rt6i_peer; + +	if (rt->n) +		neigh_release(rt->n);  	if (!(rt->dst.flags & DST_HOST))  		dst_destroy_metrics_generic(dst); @@ -291,8 +311,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)  	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)  		dst_release(dst->from); -	if (peer) { -		rt->rt6i_peer = NULL; +	if (rt6_has_peer(rt)) { +		struct inet_peer *peer = rt6_peer_ptr(rt);  		inet_putpeer(peer);  	}  } @@ -306,13 +326,20 @@ static u32 rt6_peer_genid(void)  void rt6_bind_peer(struct rt6_info *rt, int create)  { +	struct inet_peer_base *base;  	struct inet_peer *peer; -	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); -	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) -		inet_putpeer(peer); -	else -		rt->rt6i_peer_genid = rt6_peer_genid(); +	base = inetpeer_base_ptr(rt->_rt6i_peer); +	if (!base) +		return; + +	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); +	if (peer) { +		if (!rt6_set_peer(rt, peer)) +			inet_putpeer(peer); +		else +			rt->rt6i_peer_genid = rt6_peer_genid(); +	}  }  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -323,12 +350,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,  	struct net_device *loopback_dev =  		dev_net(dev)->loopback_dev; -	if (dev != loopback_dev && idev && idev->dev == dev) { -		struct inet6_dev *loopback_idev = -			in6_dev_get(loopback_dev); -		if (loopback_idev) { -			rt->rt6i_idev = loopback_idev; -			in6_dev_put(idev); +	if (dev != loopback_dev) { +		if (idev && idev->dev == dev) { +			struct inet6_dev *loopback_idev = +				in6_dev_get(loopback_dev); +			if (loopback_idev) { +				rt->rt6i_idev = loopback_idev; +				in6_dev_put(idev); +			} +		} +		if (rt->n && rt->n->dev == dev) { +			rt->n->dev = loopback_dev; +			dev_hold(loopback_dev); +			dev_put(dev);  		}  	}  } @@ -418,7 +452,7 @@ static void rt6_probe(struct rt6_info *rt)  	 * to no more than one per minute.  	 */  	rcu_read_lock(); -	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; +	neigh = rt ? rt->n : NULL;  	if (!neigh || (neigh->nud_state & NUD_VALID))  		goto out;  	read_lock_bh(&neigh->lock); @@ -465,7 +499,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)  	int m;  	rcu_read_lock(); -	neigh = dst_get_neighbour_noref(&rt->dst); +	neigh = rt->n;  	if (rt->rt6i_flags & RTF_NONEXTHOP ||  	    !(rt->rt6i_flags & RTF_GATEWAY))  		m = 1; @@ -812,7 +846,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,  	if (rt) {  		rt->rt6i_flags |= RTF_CACHE; -		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); +		rt->n = neigh_clone(ort->n);  	}  	return rt;  } @@ -846,7 +880,7 @@ restart:  	dst_hold(&rt->dst);  	read_unlock_bh(&table->tb6_lock); -	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) +	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))  		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);  	else if (!(rt->dst.flags & DST_HOST))  		nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -931,6 +965,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,  {  	int flags = 0; +	fl6->flowi6_iif = net->loopback_dev->ifindex; +  	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))  		flags |= RT6_LOOKUP_F_IFACE; @@ -949,12 +985,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori  	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;  	struct dst_entry *new = NULL; -	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); +	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);  	if (rt) { -		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); -  		new = &rt->dst; +		memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); +		rt6_init_peer(rt, net->ipv6.peers); +  		new->__use = 1;  		new->input = dst_discard;  		new->output = dst_discard; @@ -996,7 +1033,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)  	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {  		if (rt->rt6i_peer_genid != rt6_peer_genid()) { -			if (!rt->rt6i_peer) +			if (!rt6_has_peer(rt))  				rt6_bind_peer(rt, 0);  			rt->rt6i_peer_genid = rt6_peer_genid();  		} @@ -1038,11 +1075,15 @@ static void ip6_link_failure(struct sk_buff *skb)  	}  } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +			       struct sk_buff *skb, u32 mtu)  {  	struct rt6_info *rt6 = (struct rt6_info*)dst; +	dst_confirm(dst);  	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { +		struct net *net = dev_net(dst->dev); +  		rt6->rt6i_flags |= RTF_MODIFIED;  		if (mtu < IPV6_MIN_MTU) {  			u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1051,9 +1092,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)  			dst_metric_set(dst, RTAX_FEATURES, features);  		}  		dst_metric_set(dst, RTAX_MTU, mtu); +		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);  	}  } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, +		     int oif, u32 mark) +{ +	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; +	struct dst_entry *dst; +	struct flowi6 fl6; + +	memset(&fl6, 0, sizeof(fl6)); +	fl6.flowi6_oif = oif; +	fl6.flowi6_mark = mark; +	fl6.flowi6_flags = 0; +	fl6.daddr = iph->daddr; +	fl6.saddr = iph->saddr; +	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + +	dst = ip6_route_output(net, NULL, &fl6); +	if (!dst->error) +		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); +	dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ +	ip6_update_pmtu(skb, sock_net(sk), mtu, +			sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +{ +	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; +	struct dst_entry *dst; +	struct flowi6 fl6; + +	memset(&fl6, 0, sizeof(fl6)); +	fl6.flowi6_oif = oif; +	fl6.flowi6_mark = mark; +	fl6.flowi6_flags = 0; +	fl6.daddr = iph->daddr; +	fl6.saddr = iph->saddr; +	fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + +	dst = ip6_route_output(net, NULL, &fl6); +	if (!dst->error) +		rt6_do_redirect(dst, NULL, skb); +	dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_redirect); + +void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ +	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_redirect); +  static unsigned int ip6_default_advmss(const struct dst_entry *dst)  {  	struct net_device *dev = dst->dev; @@ -1110,7 +1208,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  	if (unlikely(!idev))  		return ERR_PTR(-ENODEV); -	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); +	rt = ip6_dst_alloc(net, dev, 0, NULL);  	if (unlikely(!rt)) {  		in6_dev_put(idev);  		dst = ERR_PTR(-ENOMEM); @@ -1120,7 +1218,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  	if (neigh)  		neigh_hold(neigh);  	else { -		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); +		neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);  		if (IS_ERR(neigh)) {  			in6_dev_put(idev);  			dst_free(&rt->dst); @@ -1130,7 +1228,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  	rt->dst.flags |= DST_HOST;  	rt->dst.output  = ip6_output; -	dst_set_neighbour(&rt->dst, neigh); +	rt->n = neigh;  	atomic_set(&rt->dst.__refcnt, 1);  	rt->rt6i_dst.addr = fl6->daddr;  	rt->rt6i_dst.plen = 128; @@ -1292,7 +1390,7 @@ int ip6_route_add(struct fib6_config *cfg)  	if (!table)  		goto out; -	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); +	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);  	if (!rt) {  		err = -ENOMEM; @@ -1546,107 +1644,94 @@ static int ip6_route_del(struct fib6_config *cfg)  	return err;  } -/* - *	Handle redirects - */ -struct ip6rd_flowi { -	struct flowi6 fl6; -	struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct net *net, -					     struct fib6_table *table, -					     struct flowi6 *fl6, -					     int flags) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)  { -	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; -	struct rt6_info *rt; -	struct fib6_node *fn; +	struct net *net = dev_net(skb->dev); +	struct netevent_redirect netevent; +	struct rt6_info *rt, *nrt = NULL; +	const struct in6_addr *target; +	struct ndisc_options ndopts; +	const struct in6_addr *dest; +	struct neighbour *old_neigh; +	struct inet6_dev *in6_dev; +	struct neighbour *neigh; +	struct icmp6hdr *icmph; +	int optlen, on_link; +	u8 *lladdr; -	/* -	 * Get the "current" route for this destination and -	 * check if the redirect has come from approriate router. -	 * -	 * RFC 2461 specifies that redirects should only be -	 * accepted if they come from the nexthop to the target. -	 * Due to the way the routes are chosen, this notion -	 * is a bit fuzzy and one might need to check all possible -	 * routes. -	 */ +	optlen = skb->tail - skb->transport_header; +	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); -	read_lock_bh(&table->tb6_lock); -	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: -	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { -		/* -		 * Current route is on-link; redirect is always invalid. -		 * -		 * Seems, previous statement is not true. It could -		 * be node, which looks for us as on-link (f.e. proxy ndisc) -		 * But then router serving it might decide, that we should -		 * know truth 8)8) --ANK (980726). -		 */ -		if (rt6_check_expired(rt)) -			continue; -		if (!(rt->rt6i_flags & RTF_GATEWAY)) -			continue; -		if (fl6->flowi6_oif != rt->dst.dev->ifindex) -			continue; -		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) -			continue; -		break; +	if (optlen < 0) { +		net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); +		return;  	} -	if (!rt) -		rt = net->ipv6.ip6_null_entry; -	BACKTRACK(net, &fl6->saddr); -out: -	dst_hold(&rt->dst); +	icmph = icmp6_hdr(skb); +	target = (const struct in6_addr *) (icmph + 1); +	dest = target + 1; -	read_unlock_bh(&table->tb6_lock); - -	return rt; -}; - -static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, -					   const struct in6_addr *src, -					   const struct in6_addr *gateway, -					   struct net_device *dev) -{ -	int flags = RT6_LOOKUP_F_HAS_SADDR; -	struct net *net = dev_net(dev); -	struct ip6rd_flowi rdfl = { -		.fl6 = { -			.flowi6_oif = dev->ifindex, -			.daddr = *dest, -			.saddr = *src, -		}, -	}; +	if (ipv6_addr_is_multicast(dest)) { +		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); +		return; +	} -	rdfl.gateway = *gateway; +	on_link = 0; +	if (ipv6_addr_equal(dest, target)) { +		on_link = 1; +	} else if (ipv6_addr_type(target) != +		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { +		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); +		return; +	} -	if (rt6_need_strict(dest)) -		flags |= RT6_LOOKUP_F_IFACE; +	in6_dev = __in6_dev_get(skb->dev); +	if (!in6_dev) +		return; +	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) +		return; -	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, -						   flags, __ip6_route_redirect); -} +	/* RFC2461 8.1: +	 *	The IP source address of the Redirect MUST be the same as the current +	 *	first-hop router for the specified ICMP Destination Address. +	 */ -void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, -		  const struct in6_addr *saddr, -		  struct neighbour *neigh, u8 *lladdr, int on_link) -{ -	struct rt6_info *rt, *nrt = NULL; -	struct netevent_redirect netevent; -	struct net *net = dev_net(neigh->dev); +	if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { +		net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); +		return; +	} -	rt = ip6_route_redirect(dest, src, saddr, neigh->dev); +	lladdr = NULL; +	if (ndopts.nd_opts_tgt_lladdr) { +		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, +					     skb->dev); +		if (!lladdr) { +			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); +			return; +		} +	} +	rt = (struct rt6_info *) dst;  	if (rt == net->ipv6.ip6_null_entry) {  		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); -		goto out; +		return;  	} +	/* Redirect received -> path was valid. +	 * Look, redirects are sent only in response to data packets, +	 * so that this nexthop apparently is reachable. --ANK +	 */ +	dst_confirm(&rt->dst); + +	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); +	if (!neigh) +		return; + +	/* Duplicate redirect: silently ignore. */ +	old_neigh = rt->n; +	if (neigh == old_neigh) +		goto out; +  	/*  	 *	We have finally decided to accept it.  	 */ @@ -1658,17 +1743,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,  				     NEIGH_UPDATE_F_ISROUTER))  		     ); -	/* -	 * Redirect received -> path was valid. -	 * Look, redirects are sent only in response to data packets, -	 * so that this nexthop apparently is reachable. --ANK -	 */ -	dst_confirm(&rt->dst); - -	/* Duplicate redirect: silently ignore. */ -	if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) -		goto out; -  	nrt = ip6_rt_copy(rt, dest);  	if (!nrt)  		goto out; @@ -1678,132 +1752,25 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,  		nrt->rt6i_flags &= ~RTF_GATEWAY;  	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; -	dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); +	nrt->n = neigh_clone(neigh);  	if (ip6_ins_rt(nrt))  		goto out;  	netevent.old = &rt->dst; +	netevent.old_neigh = old_neigh;  	netevent.new = &nrt->dst; +	netevent.new_neigh = neigh; +	netevent.daddr = dest;  	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);  	if (rt->rt6i_flags & RTF_CACHE) { +		rt = (struct rt6_info *) dst_clone(&rt->dst);  		ip6_del_rt(rt); -		return; -	} - -out: -	dst_release(&rt->dst); -} - -/* - *	Handle ICMP "packet too big" messages - *	i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, -			     struct net *net, u32 pmtu, int ifindex) -{ -	struct rt6_info *rt, *nrt; -	int allfrag = 0; -again: -	rt = rt6_lookup(net, daddr, saddr, ifindex, 0); -	if (!rt) -		return; - -	if (rt6_check_expired(rt)) { -		ip6_del_rt(rt); -		goto again;  	} -	if (pmtu >= dst_mtu(&rt->dst)) -		goto out; - -	if (pmtu < IPV6_MIN_MTU) { -		/* -		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link -		 * MTU (1280) and a fragment header should always be included -		 * after a node receiving Too Big message reporting PMTU is -		 * less than the IPv6 Minimum Link MTU. -		 */ -		pmtu = IPV6_MIN_MTU; -		allfrag = 1; -	} - -	/* New mtu received -> path was valid. -	   They are sent only in response to data packets, -	   so that this nexthop apparently is reachable. --ANK -	 */ -	dst_confirm(&rt->dst); - -	/* Host route. If it is static, it would be better -	   not to override it, but add new one, so that -	   when cache entry will expire old pmtu -	   would return automatically. -	 */ -	if (rt->rt6i_flags & RTF_CACHE) { -		dst_metric_set(&rt->dst, RTAX_MTU, pmtu); -		if (allfrag) { -			u32 features = dst_metric(&rt->dst, RTAX_FEATURES); -			features |= RTAX_FEATURE_ALLFRAG; -			dst_metric_set(&rt->dst, RTAX_FEATURES, features); -		} -		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); -		rt->rt6i_flags |= RTF_MODIFIED; -		goto out; -	} - -	/* Network route. -	   Two cases are possible: -	   1. It is connected route. Action: COW -	   2. It is gatewayed route or NONEXTHOP route. Action: clone it. -	 */ -	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) -		nrt = rt6_alloc_cow(rt, daddr, saddr); -	else -		nrt = rt6_alloc_clone(rt, daddr); - -	if (nrt) { -		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); -		if (allfrag) { -			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); -			features |= RTAX_FEATURE_ALLFRAG; -			dst_metric_set(&nrt->dst, RTAX_FEATURES, features); -		} - -		/* According to RFC 1981, detecting PMTU increase shouldn't be -		 * happened within 5 mins, the recommended timer is 10 mins. -		 * Here this route expiration time is set to ip6_rt_mtu_expires -		 * which is 10 mins. After 10 mins the decreased pmtu is expired -		 * and detecting PMTU increase will be automatically happened. -		 */ -		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); -		nrt->rt6i_flags |= RTF_DYNAMIC; -		ip6_ins_rt(nrt); -	}  out: -	dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, -			struct net_device *dev, u32 pmtu) -{ -	struct net *net = dev_net(dev); - -	/* -	 * RFC 1981 states that a node "MUST reduce the size of the packets it -	 * is sending along the path" that caused the Packet Too Big message. -	 * Since it's not possible in the general case to determine which -	 * interface was used to send the original packet, we update the MTU -	 * on the interface that will be used to send future packets. We also -	 * update the MTU on the interface that received the Packet Too Big in -	 * case the original packet was forced out that interface with -	 * SO_BINDTODEVICE or similar. This is the next best thing to the -	 * correct behaviour, which would be to update the MTU on all -	 * interfaces. -	 */ -	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); -	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); +	neigh_release(neigh);  }  /* @@ -1814,8 +1781,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,  				    const struct in6_addr *dest)  {  	struct net *net = dev_net(ort->dst.dev); -	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, -					    ort->dst.dev, 0); +	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, +					    ort->rt6i_table);  	if (rt) {  		rt->dst.input = ort->dst.input; @@ -2099,8 +2066,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,  				    bool anycast)  {  	struct net *net = dev_net(idev->dev); -	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, -					    net->loopback_dev, 0); +	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);  	int err;  	if (!rt) { @@ -2396,13 +2362,11 @@ static int rt6_fill_node(struct net *net,  			 int iif, int type, u32 pid, u32 seq,  			 int prefix, int nowait, unsigned int flags)  { -	const struct inet_peer *peer;  	struct rtmsg *rtm;  	struct nlmsghdr *nlh;  	long expires;  	u32 table;  	struct neighbour *n; -	u32 ts, tsage;  	if (prefix) {	/* user wants prefix routes only */  		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2440,10 +2404,12 @@ static int rt6_fill_node(struct net *net,  	rtm->rtm_protocol = rt->rt6i_protocol;  	if (rt->rt6i_flags & RTF_DYNAMIC)  		rtm->rtm_protocol = RTPROT_REDIRECT; -	else if (rt->rt6i_flags & RTF_ADDRCONF) -		rtm->rtm_protocol = RTPROT_KERNEL; -	else if (rt->rt6i_flags & RTF_DEFAULT) -		rtm->rtm_protocol = RTPROT_RA; +	else if (rt->rt6i_flags & RTF_ADDRCONF) { +		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) +			rtm->rtm_protocol = RTPROT_RA; +		else +			rtm->rtm_protocol = RTPROT_KERNEL; +	}  	if (rt->rt6i_flags & RTF_CACHE)  		rtm->rtm_flags |= RTM_F_CLONED; @@ -2500,7 +2466,7 @@ static int rt6_fill_node(struct net *net,  		goto nla_put_failure;  	rcu_read_lock(); -	n = dst_get_neighbour_noref(&rt->dst); +	n = rt->n;  	if (n) {  		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {  			rcu_read_unlock(); @@ -2514,22 +2480,10 @@ static int rt6_fill_node(struct net *net,  		goto nla_put_failure;  	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))  		goto nla_put_failure; -	if (!(rt->rt6i_flags & RTF_EXPIRES)) -		expires = 0; -	else if (rt->dst.expires - jiffies < INT_MAX) -		expires = rt->dst.expires - jiffies; -	else -		expires = INT_MAX; -	peer = rt->rt6i_peer; -	ts = tsage = 0; -	if (peer && peer->tcp_ts_stamp) { -		ts = peer->tcp_ts; -		tsage = get_seconds() - peer->tcp_ts_stamp; -	} +	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; -	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, -			       expires, rt->dst.error) < 0) +	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)  		goto nla_put_failure;  	return nlmsg_end(skb, nlh); @@ -2722,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)  	seq_puts(m, "00000000000000000000000000000000 00 ");  #endif  	rcu_read_lock(); -	n = dst_get_neighbour_noref(&rt->dst); +	n = rt->n;  	if (n) {  		seq_printf(m, "%pi6", n->primary_key);  	} else { @@ -2957,10 +2911,6 @@ static int __net_init ip6_route_net_init(struct net *net)  	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;  	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; -#ifdef CONFIG_PROC_FS -	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); -	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif  	net->ipv6.ip6_rt_gc_expire = 30*HZ;  	ret = 0; @@ -2981,10 +2931,6 @@ out_ip6_dst_ops:  static void __net_exit ip6_route_net_exit(struct net *net)  { -#ifdef CONFIG_PROC_FS -	proc_net_remove(net, "ipv6_route"); -	proc_net_remove(net, "rt6_stats"); -#endif  	kfree(net->ipv6.ip6_null_entry);  #ifdef CONFIG_IPV6_MULTIPLE_TABLES  	kfree(net->ipv6.ip6_prohibit_entry); @@ -2993,11 +2939,58 @@ static void __net_exit ip6_route_net_exit(struct net *net)  	dst_entries_destroy(&net->ipv6.ip6_dst_ops);  } +static int __net_init ip6_route_net_init_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS +	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); +	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif +	return 0; +} + +static void __net_exit ip6_route_net_exit_late(struct net *net) +{ +#ifdef CONFIG_PROC_FS +	proc_net_remove(net, "ipv6_route"); +	proc_net_remove(net, "rt6_stats"); +#endif +} +  static struct pernet_operations ip6_route_net_ops = {  	.init = ip6_route_net_init,  	.exit = ip6_route_net_exit,  }; +static int __net_init ipv6_inetpeer_init(struct net *net) +{ +	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + +	if (!bp) +		return -ENOMEM; +	inet_peer_base_init(bp); +	net->ipv6.peers = bp; +	return 0; +} + +static void __net_exit ipv6_inetpeer_exit(struct net *net) +{ +	struct inet_peer_base *bp = net->ipv6.peers; + +	net->ipv6.peers = NULL; +	inetpeer_invalidate_tree(bp); +	kfree(bp); +} + +static struct pernet_operations ipv6_inetpeer_ops = { +	.init	=	ipv6_inetpeer_init, +	.exit	=	ipv6_inetpeer_exit, +}; + +static struct pernet_operations ip6_route_net_late_ops = { +	.init = ip6_route_net_init_late, +	.exit = ip6_route_net_exit_late, +}; +  static struct notifier_block ip6_route_dev_notifier = {  	.notifier_call = ip6_route_dev_notify,  	.priority = 0, @@ -3018,10 +3011,14 @@ int __init ip6_route_init(void)  	if (ret)  		goto out_kmem_cache; -	ret = register_pernet_subsys(&ip6_route_net_ops); +	ret = register_pernet_subsys(&ipv6_inetpeer_ops);  	if (ret)  		goto out_dst_entries; +	ret = register_pernet_subsys(&ip6_route_net_ops); +	if (ret) +		goto out_register_inetpeer; +  	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;  	/* Registering of the loopback is done before this portion of code, @@ -3047,19 +3044,25 @@ int __init ip6_route_init(void)  	if (ret)  		goto xfrm6_init; +	ret = register_pernet_subsys(&ip6_route_net_late_ops); +	if (ret) +		goto fib6_rules_init; +  	ret = -ENOBUFS;  	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||  	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||  	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) -		goto fib6_rules_init; +		goto out_register_late_subsys;  	ret = register_netdevice_notifier(&ip6_route_dev_notifier);  	if (ret) -		goto fib6_rules_init; +		goto out_register_late_subsys;  out:  	return ret; +out_register_late_subsys: +	unregister_pernet_subsys(&ip6_route_net_late_ops);  fib6_rules_init:  	fib6_rules_cleanup();  xfrm6_init: @@ -3068,6 +3071,8 @@ out_fib6_init:  	fib6_gc_cleanup();  out_register_subsys:  	unregister_pernet_subsys(&ip6_route_net_ops); +out_register_inetpeer: +	unregister_pernet_subsys(&ipv6_inetpeer_ops);  out_dst_entries:  	dst_entries_destroy(&ip6_dst_blackhole_ops);  out_kmem_cache: @@ -3078,9 +3083,11 @@ out_kmem_cache:  void ip6_route_cleanup(void)  {  	unregister_netdevice_notifier(&ip6_route_dev_notifier); +	unregister_pernet_subsys(&ip6_route_net_late_ops);  	fib6_rules_cleanup();  	xfrm6_fini();  	fib6_gc_cleanup(); +	unregister_pernet_subsys(&ipv6_inetpeer_ops);  	unregister_pernet_subsys(&ip6_route_net_ops);  	dst_entries_destroy(&ip6_dst_blackhole_ops);  	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);  | 
