diff options
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 1804 |
1 files changed, 945 insertions, 859 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f4d61736c41a..cc24ed3bc334 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,7 +78,6 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -97,25 +96,24 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static void rt6_dst_from_metrics_check(struct rt6_info *rt); -static int rt6_score_route(struct rt6_info *rt, int oif, int strict); -static size_t rt6_nlmsg_size(struct rt6_info *rt); -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct fib6_info *rt); +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev); @@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) } } -static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) -{ - return dst_metrics_write_ptr(&rt->from->dst); -} - -static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) -{ - struct rt6_info *rt = (struct rt6_info *)dst; - - if (rt->rt6i_flags & RTF_PCPU) - return rt6_pcpu_cow_metrics(rt); - else if (rt->rt6i_flags & RTF_CACHE) - return NULL; - else - return dst_cow_metrics_generic(dst, old); -} - -static inline const void *choose_neigh_daddr(struct rt6_info *rt, +static inline const void *choose_neigh_daddr(const struct in6_addr *p, struct sk_buff *skb, const void *daddr) { - struct in6_addr *p = &rt->rt6i_gateway; - if (!ipv6_addr_any(p)) return (const void *) p; else if (skb) @@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt, return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, + struct sk_buff *skb, + const void *daddr) { - struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(dst->dev, daddr); + daddr = choose_neigh_daddr(gw, skb, daddr); + n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dst->dev); + return neigh_create(&nd_tbl, daddr, dev); +} + +static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + + return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(rt, NULL, daddr); + daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .mtu = ip6_mtu, - .cow_metrics = ipv6_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = { .update_pmtu = ip6_rt_update_pmtu, .redirect = rt6_do_redirect, .local_out = __ip6_local_out, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, .confirm_neigh = ip6_confirm_neigh, }; @@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, }; static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 0, }; +static const struct fib6_info fib6_null_entry_template = { + .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), + .fib6_protocol = RTPROT_KERNEL, + .fib6_metric = ~(u32)0, + .fib6_ref = ATOMIC_INIT(1), + .fib6_type = RTN_UNREACHABLE, + .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, +}; + static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = { .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = { .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; static const struct rt6_info ip6_blk_hole_entry_template = { @@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .output = dst_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #endif @@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt) struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); INIT_LIST_HEAD(&rt->rt6i_uncached); } /* allocate dst with ip6_dst_ops */ -static struct rt6_info *__ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); @@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } - -struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) -{ - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); - - if (rt) { - rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (!rt->rt6i_pcpu) { - dst_release_immediate(&rt->dst); - return NULL; - } - } - - return rt; -} EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from; struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -403,14 +370,12 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); - if (bucket) { - rt->rt6i_exception_bucket = NULL; - kfree(bucket); - } - rt->from = NULL; - dst_release(&from->dst); + rcu_read_lock(); + from = rcu_dereference(rt->from); + rcu_assign_pointer(rt->from, NULL); + fib6_info_release(from); + rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -440,23 +405,27 @@ static bool __rt6_check_expired(const struct rt6_info *rt) static bool rt6_check_expired(const struct rt6_info *rt) { + struct fib6_info *from; + + from = rcu_dereference(rt->from); + if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; - } else if (rt->from) { + } else if (from) { return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || - rt6_check_expired(rt->from); + fib6_check_expired(from); } return false; } -static struct rt6_info *rt6_multipath_select(const struct net *net, - struct rt6_info *match, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict) +struct fib6_info *fib6_multipath_select(const struct net *net, + struct fib6_info *match, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, + int strict) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -464,12 +433,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) return match; - list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, - rt6i_siblings) { - if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) + list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, + fib6_siblings) { + int nh_upper_bound; + + nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) break; @@ -484,38 +456,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, * Route lookup. rcu_read_lock() should be held. */ -static inline struct rt6_info *rt6_device_match(struct net *net, - struct rt6_info *rt, +static inline struct fib6_info *rt6_device_match(struct net *net, + struct fib6_info *rt, const struct in6_addr *saddr, int oif, int flags) { - struct rt6_info *local = NULL; - struct rt6_info *sprt; + struct fib6_info *sprt; - if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) + if (!oif && ipv6_addr_any(saddr) && + !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) return rt; - for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { - struct net_device *dev = sprt->dst.dev; + for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { + const struct net_device *dev = sprt->fib6_nh.nh_dev; - if (sprt->rt6i_nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; if (oif) { if (dev->ifindex == oif) return sprt; - if (dev->flags & IFF_LOOPBACK) { - if (!sprt->rt6i_idev || - sprt->rt6i_idev->dev->ifindex != oif) { - if (flags & RT6_LOOKUP_F_IFACE) - continue; - if (local && - local->rt6i_idev->dev->ifindex == oif) - continue; - } - local = sprt; - } } else { if (ipv6_chk_addr(net, saddr, dev, flags & RT6_LOOKUP_F_IFACE)) @@ -523,15 +484,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } } - if (oif) { - if (local) - return local; + if (oif && flags & RT6_LOOKUP_F_IFACE) + return net->ipv6.fib6_null_entry; - if (flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.ip6_null_entry; - } - - return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; + return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -553,10 +509,13 @@ static void rt6_probe_deferred(struct work_struct *w) kfree(work); } -static void rt6_probe(struct rt6_info *rt) +static void rt6_probe(struct fib6_info *rt) { struct __rt6_probe_work *work; + const struct in6_addr *nh_gw; struct neighbour *neigh; + struct net_device *dev; + /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -565,20 +524,25 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) + if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) return; + + nh_gw = &rt->fib6_nh.nh_gw; + dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { + struct inet6_dev *idev; + if (neigh->nud_state & NUD_VALID) goto out; + idev = __in6_dev_get(dev); work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + - rt->rt6i_idev->cnf.rtr_probe_interval)) { + neigh->updated + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); @@ -590,9 +554,9 @@ static void rt6_probe(struct rt6_info *rt) if (work) { INIT_WORK(&work->work, rt6_probe_deferred); - work->target = rt->rt6i_gateway; - dev_hold(rt->dst.dev); - work->dev = rt->dst.dev; + work->target = *nh_gw; + dev_hold(dev); + work->dev = dev; schedule_work(&work->work); } @@ -600,7 +564,7 @@ out: rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct rt6_info *rt) +static inline void rt6_probe(struct fib6_info *rt) { } #endif @@ -608,28 +572,27 @@ static inline void rt6_probe(struct rt6_info *rt) /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct rt6_info *rt, int oif) +static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - struct net_device *dev = rt->dst.dev; + const struct net_device *dev = rt->fib6_nh.nh_dev; + if (!oif || dev->ifindex == oif) return 2; - if ((dev->flags & IFF_LOOPBACK) && - rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) - return 1; return 0; } -static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) { - struct neighbour *neigh; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; + struct neighbour *neigh; - if (rt->rt6i_flags & RTF_NONEXTHOP || - !(rt->rt6i_flags & RTF_GATEWAY)) + if (rt->fib6_flags & RTF_NONEXTHOP || + !(rt->fib6_flags & RTF_GATEWAY)) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, + &rt->fib6_nh.nh_gw); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -650,8 +613,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) return ret; } -static int rt6_score_route(struct rt6_info *rt, int oif, - int strict) +static int rt6_score_route(struct fib6_info *rt, int oif, int strict) { int m; @@ -659,7 +621,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, if (!m && (strict & RT6_LOOKUP_F_IFACE)) return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; #endif if (strict & RT6_LOOKUP_F_REACHABLE) { int n = rt6_check_neigh(rt); @@ -669,23 +631,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif, return m; } -static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match, +/* called with rc_read_lock held */ +static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +{ + const struct net_device *dev = fib6_info_nh_dev(f6i); + bool rc = false; + + if (dev) { + const struct inet6_dev *idev = __in6_dev_get(dev); + + rc = !!idev->cnf.ignore_routes_with_linkdown; + } + + return rc; +} + +static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, + int *mpri, struct fib6_info *match, bool *do_rr) { int m; bool match_do_rr = false; - struct inet6_dev *idev = rt->rt6i_idev; - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) goto out; - if (idev->cnf.ignore_routes_with_linkdown && - rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + if (fib6_ignore_linkdown(rt) && + rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); @@ -709,19 +685,19 @@ out: return match; } -static struct rt6_info *find_rr_leaf(struct fib6_node *fn, - struct rt6_info *leaf, - struct rt6_info *rr_head, +static struct fib6_info *find_rr_leaf(struct fib6_node *fn, + struct fib6_info *leaf, + struct fib6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match, *cont; + struct fib6_info *rt, *match, *cont; int mpri = -1; match = NULL; cont = NULL; - for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -730,8 +706,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, } for (rt = leaf; rt && rt != rr_head; - rt = rcu_dereference(rt->rt6_next)) { - if (rt->rt6i_metric != metric) { + rt = rcu_dereference(rt->fib6_next)) { + if (rt->fib6_metric != metric) { cont = rt; break; } @@ -742,22 +718,22 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, if (match || !cont) return match; - for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next)) + for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next)) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } -static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, +static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, int oif, int strict) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); - struct rt6_info *match, *rt0; + struct fib6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *match, *rt0; bool do_rr = false; int key_plen; - if (!leaf || leaf == net->ipv6.ip6_null_entry) - return net->ipv6.ip6_null_entry; + if (!leaf || leaf == net->ipv6.fib6_null_entry) + return net->ipv6.fib6_null_entry; rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@ -768,39 +744,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, * (This might happen if all routes under fn are deleted from * the tree and fib6_repair_tree() is called on the node.) */ - key_plen = rt0->rt6i_dst.plen; + key_plen = rt0->fib6_dst.plen; #ifdef CONFIG_IPV6_SUBTREES - if (rt0->rt6i_src.plen) - key_plen = rt0->rt6i_src.plen; + if (rt0->fib6_src.plen) + key_plen = rt0->fib6_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; - match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, + match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, &do_rr); if (do_rr) { - struct rt6_info *next = rcu_dereference(rt0->rt6_next); + struct fib6_info *next = rcu_dereference(rt0->fib6_next); /* no entries matched; do round-robin */ - if (!next || next->rt6i_metric != rt0->rt6i_metric) + if (!next || next->fib6_metric != rt0->fib6_metric) next = leaf; if (next != rt0) { - spin_lock_bh(&leaf->rt6i_table->tb6_lock); + spin_lock_bh(&leaf->fib6_table->tb6_lock); /* make sure next is not being deleted from the tree */ - if (next->rt6i_node) + if (next->fib6_node) rcu_assign_pointer(fn->rr_ptr, next); - spin_unlock_bh(&leaf->rt6i_table->tb6_lock); + spin_unlock_bh(&leaf->fib6_table->tb6_lock); } } - return match ? match : net->ipv6.ip6_null_entry; + return match ? match : net->ipv6.fib6_null_entry; } -static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { - return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -812,7 +788,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr prefix_buf, *prefix; unsigned int pref; unsigned long lifetime; - struct rt6_info *rt; + struct fib6_info *rt; if (len < sizeof(struct route_info)) { return -EINVAL; @@ -850,13 +826,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (rinfo->prefix_len == 0) - rt = rt6_get_dflt_router(gwaddr, dev); + rt = rt6_get_dflt_router(net, gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -864,21 +840,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev, pref); else if (rt) - rt->rt6i_flags = RTF_ROUTEINFO | - (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->fib6_flags = RTF_ROUTEINFO | + (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { if (!addrconf_finite_timeout(lifetime)) - rt6_clean_expires(rt); + fib6_clean_expires(rt); else - rt6_set_expires(rt, jiffies + HZ * lifetime); + fib6_set_expires(rt, jiffies + HZ * lifetime); - ip6_rt_put(rt); + fib6_info_release(rt); } return 0; } #endif +/* + * Misc support functions + */ + +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +{ + struct net_device *dev = rt->fib6_nh.nh_dev; + + if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->fib6_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + +static const int fib6_prop[RTN_MAX + 1] = { + [RTN_UNSPEC] = 0, + [RTN_UNICAST] = 0, + [RTN_LOCAL] = 0, + [RTN_BROADCAST] = 0, + [RTN_ANYCAST] = 0, + [RTN_MULTICAST] = 0, + [RTN_BLACKHOLE] = -EINVAL, + [RTN_UNREACHABLE] = -EHOSTUNREACH, + [RTN_PROHIBIT] = -EACCES, + [RTN_THROW] = -EAGAIN, + [RTN_NAT] = -EINVAL, + [RTN_XRESOLVE] = -EINVAL, +}; + +static int ip6_rt_type_to_error(u8 fib6_type) +{ + return fib6_prop[fib6_type]; +} + +static unsigned short fib6_info_dst_flags(struct fib6_info *rt) +{ + unsigned short flags = 0; + + if (rt->dst_nocount) + flags |= DST_NOCOUNT; + if (rt->dst_nopolicy) + flags |= DST_NOPOLICY; + if (rt->dst_host) + flags |= DST_HOST; + + return flags; +} + +static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + + switch (ort->fib6_type) { + case RTN_BLACKHOLE: + rt->dst.output = dst_discard_out; + rt->dst.input = dst_discard; + break; + case RTN_PROHIBIT: + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; + break; + case RTN_THROW: + case RTN_UNREACHABLE: + default: + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + break; + } +} + +static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.flags |= fib6_info_dst_flags(ort); + + if (ort->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, ort); + return; + } + + rt->dst.error = 0; + rt->dst.output = ip6_output; + + if (ort->fib6_type == RTN_LOCAL) { + rt->dst.input = ip6_input; + } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + rt->dst.input = ip6_mc_input; + } else { + rt->dst.input = ip6_forward; + } + + if (ort->fib6_nh.nh_lwtstate) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + lwtunnel_set_redirect(&rt->dst); + } + + rt->dst.lastuse = jiffies; +} + +static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) +{ + rt->rt6i_flags &= ~RTF_EXPIRES; + fib6_info_hold(from); + rcu_assign_pointer(rt->from, from); + dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + if (from->fib6_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&from->fib6_metrics->refcnt); + } +} + +static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +{ + struct net_device *dev = fib6_info_nh_dev(ort); + + ip6_rt_init_dst(rt, ort); + + rt->rt6i_dst = ort->fib6_dst; + rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; + rt->rt6i_gateway = ort->fib6_nh.nh_gw; + rt->rt6i_flags = ort->fib6_flags; + rt6_set_from(rt, ort); +#ifdef CONFIG_IPV6_SUBTREES + rt->rt6i_src = ort->fib6_src; +#endif + rt->rt6i_prefsrc = ort->fib6_prefsrc; + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +} + static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { @@ -889,7 +1006,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn, pn = rcu_dereference(fn->parent); sn = FIB6_SUBTREE(pn); if (sn && sn != fn) - fn = fib6_lookup(sn, NULL, saddr); + fn = fib6_node_lookup(sn, NULL, saddr); else fn = pn; if (fn->fn_flags & RTN_RTINFO) @@ -914,50 +1031,74 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, return false; } +/* called with rcu_lock held */ +static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +{ + unsigned short flags = fib6_info_dst_flags(rt); + struct net_device *dev = rt->fib6_nh.nh_dev; + struct rt6_info *nrt; + + nrt = ip6_dst_alloc(dev_net(dev), dev, flags); + if (nrt) + ip6_rt_copy_init(nrt, rt); + + return nrt; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) flags &= ~RT6_LOOKUP_F_IFACE; rcu_read_lock(); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.ip6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) + f6i = fib6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, + flags); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } - /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - if (ip6_hold_safe(net, &rt, true)) - dst_use_noref(&rt->dst, jiffies); + trace_fib6_table_lookup(net, f6i, table, fl6); - rcu_read_unlock(); + /* Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + } else if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = ip6_create_rt_rcu(f6i); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + } - trace_fib6_table_lookup(net, rt, table, fl6); + rcu_read_unlock(); return rt; - } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, @@ -999,55 +1140,28 @@ EXPORT_SYMBOL(rt6_lookup); * Caller must hold dst before calling it. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc, +static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { int err; struct fib6_table *table; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc, extack); + err = fib6_add(&table->tb6_root, rt, info, extack); spin_unlock_bh(&table->tb6_lock); return err; } -int ip6_ins_rt(struct rt6_info *rt) -{ - struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - struct mx6_config mxc = { .mx = NULL, }; - - /* Hold dst to account for the reference from the fib6 tree */ - dst_hold(&rt->dst); - return __ip6_ins_rt(rt, &info, &mxc, NULL); -} - -/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) +int ip6_ins_rt(struct net *net, struct fib6_info *rt) { - struct net_device *dev = rt->dst.dev; - - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { - /* for copies of local routes, dst->dev needs to be the - * device if it is a master device, the master device if - * device is enslaved, and the loopback as the default - */ - if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->rt6i_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); - else if (!netif_is_l3_master(dev)) - dev = dev_net(dev)->loopback_dev; - /* last case is netif_is_l3_master(dev) is true in which - * case we want dev returned to be dev - */ - } + struct nl_info info = { .nl_net = net, }; - return dev; + return __ip6_ins_rt(rt, &info, NULL); } -static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1058,26 +1172,20 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); - rt = __ip6_dst_alloc(dev_net(dev), dev, 0); - rcu_read_unlock(); + rt = ip6_dst_alloc(dev_net(dev), dev, 0); if (!rt) return NULL; ip6_rt_copy_init(rt, ort); rt->rt6i_flags |= RTF_CACHE; - rt->rt6i_metric = 0; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; if (!rt6_is_gw_or_nonexthop(ort)) { - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) + if (ort->fib6_dst.plen != 128 && + ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -1090,45 +1198,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, return rt; } -static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) { + unsigned short flags = fib6_info_dst_flags(rt); struct net_device *dev; struct rt6_info *pcpu_rt; rcu_read_lock(); dev = ip6_rt_get_dev_rcu(rt); - pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); - pcpu_rt->rt6i_protocol = rt->rt6i_protocol; pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; } /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) { struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) - rt6_dst_from_metrics_check(pcpu_rt); + if (pcpu_rt) + ip6_hold_safe(NULL, &pcpu_rt, false); return pcpu_rt; } -static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_make_pcpu_route(struct net *net, + struct fib6_info *rt) { struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { - struct net *net = dev_net(rt->dst.dev); - dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; } @@ -1138,7 +1245,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); - rt6_dst_from_metrics_check(pcpu_rt); return pcpu_rt; } @@ -1158,9 +1264,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, return; net = dev_net(rt6_ex->rt6i->dst.dev); - rt6_ex->rt6i->rt6i_node = NULL; hlist_del_rcu(&rt6_ex->hlist); - rt6_release(rt6_ex->rt6i); + dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; @@ -1268,20 +1373,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, return NULL; } +static unsigned int fib6_mtu(const struct fib6_info *rt) +{ + unsigned int mtu; + + if (rt->fib6_pmtu) { + mtu = rt->fib6_pmtu; + } else { + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + } + + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); +} + static int rt6_insert_exception(struct rt6_info *nrt, - struct rt6_info *ort) + struct fib6_info *ort) { - struct net *net = dev_net(ort->dst.dev); + struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1308,19 +1429,19 @@ static int rt6_insert_exception(struct rt6_info *nrt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (ort->rt6i_src.plen) + if (ort->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif /* Update rt6i_prefsrc as it could be changed * in rt6_remove_prefsrc() */ - nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + nrt->rt6i_prefsrc = ort->fib6_prefsrc; /* rt6_mtu_change() might lower mtu on ort. * Only insert this exception route if its mtu * is less than ort's mtu value. */ - if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { err = -EINVAL; goto out; } @@ -1337,8 +1458,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, } rt6_ex->rt6i = nrt; rt6_ex->stamp = jiffies; - atomic_inc(&nrt->rt6i_ref); - nrt->rt6i_node = ort->rt6i_node; hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; @@ -1351,16 +1470,16 @@ out: /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { - spin_lock_bh(&ort->rt6i_table->tb6_lock); - fib6_update_sernum(ort); - spin_unlock_bh(&ort->rt6i_table->tb6_lock); + spin_lock_bh(&ort->fib6_table->tb6_lock); + fib6_update_sernum(net, ort); + spin_unlock_bh(&ort->fib6_table->tb6_lock); fib6_force_start_gc(net); } return err; } -void rt6_flush_exceptions(struct rt6_info *rt) +void rt6_flush_exceptions(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1390,7 +1509,7 @@ out: /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -1408,7 +1527,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (rt->rt6i_src.plen) + if (rt->fib6_src.plen) src_key = saddr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); @@ -1420,14 +1539,15 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + struct fib6_info *from; int err; + from = rcu_dereference(rt->from); if (!from || !(rt->rt6i_flags & RTF_CACHE)) return -EINVAL; @@ -1445,7 +1565,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_spinlock(&bucket, @@ -1468,7 +1588,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1486,7 +1606,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) * Otherwise, the exception table is indexed by * a hash of only rt6i_dst. */ - if (from->rt6i_src.plen) + if (from->fib6_src.plen) src_key = &rt->rt6i_src.addr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, @@ -1498,7 +1618,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } -static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1540,7 +1660,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, } static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, - struct rt6_info *rt, int mtu) + struct fib6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1557,12 +1677,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, struct rt6_info *entry = rt6_ex->rt6i; /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected - * route), the metrics of its rt->dst.from have already + * route), the metrics of its rt->from have already * been updated. */ - if (entry->rt6i_pmtu && + if (dst_metric_raw(&entry->dst, RTAX_MTU) && rt6_mtu_change_route_allowed(idev, entry, mtu)) - entry->rt6i_pmtu = mtu; + dst_metric_set(&entry->dst, RTAX_MTU, mtu); } bucket++; } @@ -1570,7 +1690,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) -static void rt6_exceptions_clean_tohost(struct rt6_info *rt, +static void rt6_exceptions_clean_tohost(struct fib6_info *rt, struct in6_addr *gateway) { struct rt6_exception_bucket *bucket; @@ -1649,7 +1769,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, gc_args->more++; } -void rt6_age_exceptions(struct rt6_info *rt, +void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, unsigned long now) { @@ -1680,32 +1800,22 @@ void rt6_age_exceptions(struct rt6_info *rt, rcu_read_unlock_bh(); } -struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, - const struct sk_buff *skb, int flags) +/* must be called with rcu lock held */ +struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, int strict) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; - int strict = 0; + struct fib6_info *f6i; - strict |= flags & RT6_LOOKUP_F_IFACE; - strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; - if (net->ipv6.devconf_all->forwarding == 0) - strict |= RT6_LOOKUP_F_REACHABLE; - - rcu_read_lock(); - - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.ip6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1717,45 +1827,57 @@ redo_rt6_select: } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; + trace_fib6_table_lookup(net, f6i, table, fl6); - if (rt == net->ipv6.ip6_null_entry) { + return f6i; +} + +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) +{ + struct fib6_info *f6i; + struct rt6_info *rt; + int strict = 0; + + strict |= flags & RT6_LOOKUP_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; + if (net->ipv6.devconf_all->forwarding == 0) + strict |= RT6_LOOKUP_F_REACHABLE; + + rcu_read_lock(); + + f6i = fib6_table_lookup(net, table, oif, fl6, strict); + if (f6i->fib6_nsiblings) + f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); + + if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); - trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { - if (ip6_hold_safe(net, &rt, true)) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - rt6_dst_from_metrics_check(rt); - } + rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->fib6_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different * from the fl6->daddr used to look-up route here. */ - struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); - } else { - rcu_read_unlock(); - uncached_rt = rt; - goto uncached_rt_out; - } - rcu_read_unlock(); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); - dst_release(&rt->dst); + rcu_read_unlock(); if (uncached_rt) { /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() @@ -1768,36 +1890,21 @@ redo_rt6_select: dst_hold(&uncached_rt->dst); } -uncached_rt_out: - trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; - } else { /* Get a percpu copy */ struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); - - if (!pcpu_rt) { - /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { - /* No dst_hold() on rt is needed because grabbing - * rt->rt6i_ref makes sure rt can't be released. - */ - pcpu_rt = rt6_make_pcpu_route(rt); - rt6_release(rt); - } else { - /* rt is already removed from tree */ - pcpu_rt = net->ipv6.ip6_null_entry; - dst_hold(&pcpu_rt->dst); - } - } + pcpu_rt = rt6_get_pcpu_route(f6i); + + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(net, f6i); + local_bh_enable(); rcu_read_unlock(); - trace_fib6_table_lookup(net, pcpu_rt, table, fl6); + return pcpu_rt; } } @@ -2020,7 +2127,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; - rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); #ifdef CONFIG_IPV6_SUBTREES @@ -2036,18 +2142,27 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ -static void rt6_dst_from_metrics_check(struct rt6_info *rt) +static bool fib6_check(struct fib6_info *f6i, u32 cookie) { - if (rt->from && - dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) - dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); + u32 rt_cookie = 0; + + if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) + return false; + + if (fib6_check_expired(f6i)) + return false; + + return true; } -static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_check(struct rt6_info *rt, + struct fib6_info *from, + u32 cookie) { u32 rt_cookie = 0; - if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) + if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || + rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -2056,11 +2171,13 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) return &rt->dst; } -static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, + struct fib6_info *from, + u32 cookie) { if (!__rt6_check_expired(rt) && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check(rt->from, cookie)) + fib6_check(from, cookie)) return &rt->dst; else return NULL; @@ -2068,22 +2185,30 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { + struct dst_entry *dst_ret; + struct fib6_info *from; struct rt6_info *rt; - rt = (struct rt6_info *) dst; + rt = container_of(dst, struct rt6_info, dst); + + rcu_read_lock(); /* All IPV6 dsts are created with ->obsolete set to the value * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - rt6_dst_from_metrics_check(rt); + from = rcu_dereference(rt->from); - if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - return rt6_dst_from_check(rt, cookie); + if (from && (rt->rt6i_flags & RTF_PCPU || + unlikely(!list_empty(&rt->rt6i_uncached)))) + dst_ret = rt6_dst_from_check(rt, from, cookie); else - return rt6_check(rt, cookie); + dst_ret = rt6_check(rt, from, cookie); + + rcu_read_unlock(); + + return dst_ret; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -2092,10 +2217,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); if (rt6_check_expired(rt)) { - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); dst = NULL; } + rcu_read_unlock(); } else { dst_release(dst); dst = NULL; @@ -2112,35 +2239,60 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { + rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); } else { + struct fib6_info *from; struct fib6_node *fn; - rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); - if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; - rcu_read_unlock(); + from = rcu_dereference(rt->from); + if (from) { + fn = rcu_dereference(from->fib6_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + } } + rcu_read_unlock(); + } +} + +static void rt6_update_expires(struct rt6_info *rt0, int timeout) +{ + if (!(rt0->rt6i_flags & RTF_EXPIRES)) { + struct fib6_info *from; + + rcu_read_lock(); + from = rcu_dereference(rt0->from); + if (from) + rt0->dst.expires = from->expires; + rcu_read_unlock(); } + + dst_set_expires(&rt0->dst, timeout); + rt0->rt6i_flags |= RTF_EXPIRES; } static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) { struct net *net = dev_net(rt->dst.dev); + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt->rt6i_flags |= RTF_MODIFIED; - rt->rt6i_pmtu = mtu; rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); } static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { + bool from_set; + + rcu_read_lock(); + from_set = !!rcu_dereference(rt->from); + rcu_read_unlock(); + return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || - rcu_access_pointer(rt->rt6i_node)); + (rt->rt6i_flags & RTF_PCPU || from_set); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2176,14 +2328,18 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_CACHE) rt6_update_exception_stamp_rt(rt6); } else if (daddr) { + struct fib6_info *from; struct rt6_info *nrt6; - nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + rcu_read_lock(); + from = rcu_dereference(rt6->from); + nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, rt6)) + if (rt6_insert_exception(nrt6, from)) dst_release_immediate(&nrt6->dst); } + rcu_read_unlock(); } } @@ -2264,7 +2420,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct fib6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2278,32 +2435,32 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, */ rcu_read_lock(); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) continue; - if (rt->dst.error) + if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->rt6i_flags & RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_GATEWAY)) continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2312,25 +2469,28 @@ restart: } if (!rt) - rt = net->ipv6.ip6_null_entry; - else if (rt->dst.error) { - rt = net->ipv6.ip6_null_entry; + rt = net->ipv6.fib6_null_entry; + else if (rt->fib6_flags & RTF_REJECT) { + ret = net->ipv6.ip6_null_entry; goto out; } - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -2422,12 +2582,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - - if (mtu) - goto out; + unsigned int mtu; mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) @@ -2511,60 +2667,22 @@ out: return entries > rt_max_size; } -static int ip6_convert_metrics(struct mx6_config *mxc, - const struct fib6_config *cfg) +static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, + struct fib6_config *cfg) { - struct net *net = cfg->fc_nlinfo.nl_net; - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - u32 *mp; + struct dst_metrics *p; if (!cfg->fc_mx) return 0; - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (unlikely(!mp)) + p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); + if (unlikely(!p)) return -ENOMEM; - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (unlikely(type > RTAX_MAX)) - goto err; + refcount_set(&p->refcnt, 1); + rt->fib6_metrics = p; - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - goto err; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - goto err; - - mp[type - 1] = val; - __set_bit(type - 1, mxc->mx_valid); - } - - if (ecn_ca) { - __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); - mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - } - - mxc->mx = mp; - return 0; - err: - kfree(mp); - return -EINVAL; + return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics); } static struct rt6_info *ip6_nh_lookup_table(struct net *net, @@ -2750,11 +2868,12 @@ out: return err; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, + gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; @@ -2773,6 +2892,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -2831,35 +2955,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, - (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); + err = -ENOMEM; + rt = fib6_info_alloc(gfp_flags); + if (!rt) + goto out; - if (!rt) { - err = -ENOMEM; + if (cfg->fc_flags & RTF_ADDRCONF) + rt->dst_nocount = true; + + err = ip6_convert_metrics(net, rt, cfg); + if (err < 0) goto out; - } if (cfg->fc_flags & RTF_EXPIRES) - rt6_set_expires(rt, jiffies + + fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); else - rt6_clean_expires(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; - rt->rt6i_protocol = cfg->fc_protocol; + rt->fib6_protocol = cfg->fc_protocol; addr_type = ipv6_addr_type(&cfg->fc_dst); - if (addr_type & IPV6_ADDR_MULTICAST) - rt->dst.input = ip6_mc_input; - else if (cfg->fc_flags & RTF_LOCAL) - rt->dst.input = ip6_input; - else - rt->dst.input = ip6_forward; - - rt->dst.output = ip6_output; - if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; @@ -2868,22 +2987,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, &lwtstate, extack); if (err) goto out; - rt->dst.lwtstate = lwtstate_get(lwtstate); - lwtunnel_set_redirect(&rt->dst); + rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); } - ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); - rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; + ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->fib6_dst.plen = cfg->fc_dst_len; + if (rt->fib6_dst.plen == 128) + rt->dst_host = true; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); - rt->rt6i_src.plen = cfg->fc_src_len; + ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->fib6_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = cfg->fc_metric; - rt->rt6i_nh_weight = 1; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_nh.nh_weight = 1; + + rt->fib6_type = cfg->fc_type; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes @@ -2906,28 +3026,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } } - rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; - switch (cfg->fc_type) { - case RTN_BLACKHOLE: - rt->dst.error = -EINVAL; - rt->dst.output = dst_discard_out; - rt->dst.input = dst_discard; - break; - case RTN_PROHIBIT: - rt->dst.error = -EACCES; - rt->dst.output = ip6_pkt_prohibit_out; - rt->dst.input = ip6_pkt_prohibit; - break; - case RTN_THROW: - case RTN_UNREACHABLE: - default: - rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : (cfg->fc_type == RTN_UNREACHABLE) - ? -EHOSTUNREACH : -ENETUNREACH; - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; - break; - } + rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -2936,7 +3035,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; - rt->rt6i_gateway = cfg->fc_gateway; + rt->fib6_nh.nh_gw = cfg->fc_gateway; } err = -ENODEV; @@ -2961,96 +3060,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, err = -EINVAL; goto out; } - rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; - rt->rt6i_prefsrc.plen = 128; + rt->fib6_prefsrc.addr = cfg->fc_prefsrc; + rt->fib6_prefsrc.plen = 128; } else - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; - rt->rt6i_flags = cfg->fc_flags; + rt->fib6_flags = cfg->fc_flags; install_route: - if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && + if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; - rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->dst.dev = dev; - rt->rt6i_idev = idev; - rt->rt6i_table = table; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); + rt->fib6_nh.nh_dev = dev; + rt->fib6_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); + if (idev) + in6_dev_put(idev); + return rt; out: if (dev) dev_put(dev); if (idev) in6_dev_put(idev); - if (rt) - dst_release_immediate(&rt->dst); + fib6_info_release(rt); return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg, +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { - struct mx6_config mxc = { .mx = NULL, }; - struct rt6_info *rt; + struct fib6_info *rt; int err; - rt = ip6_route_info_create(cfg, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto out; - } - - err = ip6_convert_metrics(&mxc, cfg); - if (err) - goto out; - - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); + rt = ip6_route_info_create(cfg, gfp_flags, extack); + if (IS_ERR(rt)) + return PTR_ERR(rt); - kfree(mxc.mx); - - return err; -out: - if (rt) - dst_release_immediate(&rt->dst); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); + fib6_info_release(rt); return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) { - int err; + struct net *net = info->nl_net; struct fib6_table *table; - struct net *net = dev_net(rt->dst.dev); + int err; - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { err = -ENOENT; goto out; } - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); spin_unlock_bh(&table->tb6_lock); out: - ip6_rt_put(rt); + fib6_info_release(rt); return err; } -int ip6_del_rt(struct rt6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; + struct nl_info info = { .nl_net = net }; + return __ip6_del_rt(rt, &info); } -static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) { struct nl_info *info = &cfg->fc_nlinfo; struct net *net = info->nl_net; @@ -3058,20 +3143,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) struct fib6_table *table; int err = -ENOENT; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) goto out_put; - table = rt->rt6i_table; + table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); - if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { + struct fib6_info *sibling, *next_sibling; /* prefer to send a single notification with all hops */ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (skb) { u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - if (rt6_fill_node(net, skb, rt, + if (rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, RTM_DELROUTE, info->portid, seq, 0) < 0) { kfree_skb(skb); @@ -3081,8 +3166,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) } list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, - rt6i_siblings) { + &rt->fib6_siblings, + fib6_siblings) { err = fib6_del(sibling, info); if (err) goto out_unlock; @@ -3093,7 +3178,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) out_unlock: spin_unlock_bh(&table->tb6_lock); out_put: - ip6_rt_put(rt); + fib6_info_release(rt); if (skb) { rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, @@ -3102,11 +3187,28 @@ out_put: return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *rt_cache; struct fib6_table *table; + struct fib6_info *rt; struct fib6_node *fn; int err = -ESRCH; @@ -3126,25 +3228,31 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) { + rcu_read_unlock(); + return rc; + } + } + continue; } if (cfg->fc_ifindex && - (!rt->dst.dev || - rt->dst.dev->ifindex != cfg->fc_ifindex)) + (!rt->fib6_nh.nh_dev || + rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) continue; - if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; - if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) + if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) continue; - if (!dst_hold_safe(&rt->dst)) - break; + fib6_info_hold(rt); rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3166,6 +3274,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct ndisc_options ndopts; struct inet6_dev *in6_dev; struct neighbour *neigh; + struct fib6_info *from; struct rd_msg *msg; int optlen, on_link; u8 *lladdr; @@ -3247,7 +3356,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + rcu_read_lock(); + from = rcu_dereference(rt->from); + fib6_info_hold(from); + rcu_read_unlock(); + + nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) goto out; @@ -3255,14 +3369,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; /* No need to remove rt from the exception table if rt is * a cached route because rt6_insert_exception() will * takes care of it */ - if (rt6_insert_exception(nrt, rt)) { + if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3274,47 +3387,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: + fib6_info_release(from); neigh_release(neigh); } -/* - * Misc support functions - */ - -static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - BUG_ON(from->from); - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; - dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); -} - -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) -{ - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->rt6i_dst = ort->rt6i_dst; - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = ort->rt6i_metric; -#ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->rt6i_src; -#endif - rt->rt6i_prefsrc = ort->rt6i_prefsrc; - rt->rt6i_table = ort->rt6i_table; - rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); -} - #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev) @@ -3322,7 +3400,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; int ifindex = dev->ifindex; struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; table = fib6_get_table(net, tb_id); @@ -3335,13 +3413,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != ifindex) + if (rt->fib6_nh.nh_dev->ifindex != ifindex) continue; - if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; - if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) continue; - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); break; } out: @@ -3349,7 +3427,7 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, @@ -3362,6 +3440,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -3375,36 +3454,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, + struct net_device *dev) { u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), tb_id); + table = fib6_get_table(net, tb_id); if (!table) return NULL; rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->dst.dev && - ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->rt6i_gateway, addr)) + if (dev == rt->fib6_nh.nh_dev && + ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && + ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) break; } if (rt) - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); rcu_read_unlock(); return rt; } -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { @@ -3415,14 +3497,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev_net(dev), + .fc_nlinfo.nl_net = net, }; cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg, NULL)) { + if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -3430,24 +3513,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; } - return rt6_get_dflt_router(gwaddr, dev); + return rt6_get_dflt_router(net, gwaddr, dev); } -static void __rt6_purge_dflt_routers(struct fib6_table *table) +static void __rt6_purge_dflt_routers(struct net *net, + struct fib6_table *table) { - struct rt6_info *rt; + struct fib6_info *rt; restart: rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - if (dst_hold_safe(&rt->dst)) { - rcu_read_unlock(); - ip6_del_rt(rt); - } else { - rcu_read_unlock(); - } + struct net_device *dev = fib6_info_nh_dev(rt); + struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; + + if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!idev || idev->cnf.accept_ra != 2)) { + fib6_info_hold(rt); + rcu_read_unlock(); + ip6_del_rt(net, rt); goto restart; } } @@ -3468,7 +3552,7 @@ void rt6_purge_dflt_routers(struct net *net) head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) - __rt6_purge_dflt_routers(table); + __rt6_purge_dflt_routers(net, table); } } @@ -3489,6 +3573,7 @@ static void rtmsg_to_fib6_config(struct net *net, cfg->fc_dst_len = rtmsg->rtmsg_dst_len; cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_type = rtmsg->rtmsg_type; cfg->fc_nlinfo.nl_net = net; @@ -3518,7 +3603,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg, NULL); + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: err = ip6_route_del(&cfg, NULL); @@ -3546,7 +3631,8 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IP6_INC_STATS(dev_net(dst->dev), + __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INADDRERRORS); break; } @@ -3587,40 +3673,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff * Allocate a dst for local (unicast / anycast) address. */ -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, - bool anycast) +struct fib6_info *addrconf_f6i_alloc(struct net *net, + struct inet6_dev *idev, + const struct in6_addr *addr, + bool anycast, gfp_t gfp_flags) { u32 tb_id; - struct net *net = dev_net(idev->dev); struct net_device *dev = idev->dev; - struct rt6_info *rt; + struct fib6_info *f6i; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); - if (!rt) + f6i = fib6_info_alloc(gfp_flags); + if (!f6i) return ERR_PTR(-ENOMEM); - in6_dev_hold(idev); - - rt->dst.flags |= DST_HOST; - rt->dst.input = ip6_input; - rt->dst.output = ip6_output; - rt->rt6i_idev = idev; - - rt->rt6i_protocol = RTPROT_KERNEL; - rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (anycast) - rt->rt6i_flags |= RTF_ANYCAST; - else - rt->rt6i_flags |= RTF_LOCAL; + f6i->dst_nocount = true; + f6i->dst_host = true; + f6i->fib6_protocol = RTPROT_KERNEL; + f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; + if (anycast) { + f6i->fib6_type = RTN_ANYCAST; + f6i->fib6_flags |= RTF_ANYCAST; + } else { + f6i->fib6_type = RTN_LOCAL; + f6i->fib6_flags |= RTF_LOCAL; + } - rt->rt6i_gateway = *addr; - rt->rt6i_dst.addr = *addr; - rt->rt6i_dst.plen = 128; + f6i->fib6_nh.nh_gw = *addr; + dev_hold(dev); + f6i->fib6_nh.nh_dev = dev; + f6i->fib6_dst.addr = *addr; + f6i->fib6_dst.plen = 128; tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - rt->rt6i_table = fib6_get_table(net, tb_id); + f6i->fib6_table = fib6_get_table(net, tb_id); - return rt; + return f6i; } /* remove deleted ip from prefsrc entries */ @@ -3630,18 +3716,18 @@ struct arg_dev_net_ip { struct in6_addr *addr; }; -static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) +static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) { struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->dst.dev == dev || !dev) && - rt != net->ipv6.ip6_null_entry && - ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { + if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + rt != net->ipv6.fib6_null_entry && + ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ - rt->rt6i_prefsrc.plen = 0; + rt->fib6_prefsrc.plen = 0; /* need to update cache as well */ rt6_exceptions_remove_prefsrc(rt); spin_unlock_bh(&rt6_exception_lock); @@ -3663,12 +3749,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) /* Remove routers and update dst entries when gateway turn into host. */ -static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && + ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { return -1; } @@ -3694,85 +3780,85 @@ struct arg_netdev_event { }; }; -static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); iter = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { - if (iter->rt6i_metric == rt->rt6i_metric && + if (iter->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(iter)) return iter; - iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + iter = rcu_dereference_protected(iter->fib6_next, + lockdep_is_held(&rt->fib6_table->tb6_lock)); } return NULL; } -static bool rt6_is_dead(const struct rt6_info *rt) +static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD || - (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && - rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + fib6_ignore_linkdown(rt))) return true; return false; } -static int rt6_multipath_total_weight(const struct rt6_info *rt) +static int rt6_multipath_total_weight(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; int total = 0; if (!rt6_is_dead(rt)) - total += rt->rt6i_nh_weight; + total += rt->fib6_nh.nh_weight; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->rt6i_nh_weight; + total += iter->fib6_nh.nh_weight; } return total; } -static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) { int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->rt6i_nh_weight; + *weight += rt->fib6_nh.nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); } -static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) { - struct rt6_info *iter; + struct fib6_info *iter; int weight = 0; rt6_upper_bound_set(rt, &weight, total); - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) rt6_upper_bound_set(iter, &weight, total); } -void rt6_multipath_rebalance(struct rt6_info *rt) +void rt6_multipath_rebalance(struct fib6_info *rt) { - struct rt6_info *first; + struct fib6_info *first; int total; /* In case the entire multipath route was marked for flushing, * then there is no need to rebalance upon the removal of every * sibling route. */ - if (!rt->rt6i_nsiblings || rt->should_flush) + if (!rt->fib6_nsiblings || rt->should_flush) return; /* During lookup routes are evaluated in order, so we need to @@ -3787,14 +3873,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt) rt6_multipath_upper_bound_set(first, total); } -static int fib6_ifup(struct rt6_info *rt, void *p_arg) +static int fib6_ifup(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; - const struct net *net = dev_net(arg->dev); + struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { - rt->rt6i_nh_flags &= ~arg->nh_flags; - fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); + if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { + rt->fib6_nh.nh_flags &= ~arg->nh_flags; + fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3816,95 +3902,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } -static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) + if (rt->fib6_nh.nh_dev == dev) return true; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) return true; return false; } -static void rt6_multipath_flush(struct rt6_info *rt) +static void rt6_multipath_flush(struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; rt->should_flush = 1; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) iter->should_flush = 1; } -static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, const struct net_device *down_dev) { - struct rt6_info *iter; + struct fib6_info *iter; unsigned int dead = 0; - if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_dev == down_dev || + rt->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == down_dev || - iter->rt6i_nh_flags & RTNH_F_DEAD) + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == down_dev || + iter->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; return dead; } -static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, unsigned int nh_flags) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) - rt->rt6i_nh_flags |= nh_flags; - list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) - iter->rt6i_nh_flags |= nh_flags; + if (rt->fib6_nh.nh_dev == dev) + rt->fib6_nh.nh_flags |= nh_flags; + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) + if (iter->fib6_nh.nh_dev == dev) + iter->fib6_nh.nh_flags |= nh_flags; } /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *p_arg) +static int fib6_ifdown(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; const struct net_device *dev = arg->dev; - const struct net *net = dev_net(dev); + struct net *net = dev_net(dev); - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; switch (arg->event) { case NETDEV_UNREGISTER: - return rt->dst.dev == dev ? -1 : 0; + return rt->fib6_nh.nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; - if (!rt->rt6i_nsiblings) - return rt->dst.dev == dev ? -1 : 0; + if (!rt->fib6_nsiblings) + return rt->fib6_nh.nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; count = rt6_multipath_dead_count(rt, dev); - if (rt->rt6i_nsiblings + 1 == count) { + if (rt->fib6_nsiblings + 1 == count) { rt6_multipath_flush(rt); return -1; } rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - fib6_update_sernum(rt); + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); } return -2; case NETDEV_CHANGE: - if (rt->dst.dev != dev || - rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + if (rt->fib6_nh.nh_dev != dev || + rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -3936,7 +4023,7 @@ struct rt6_mtu_change_arg { unsigned int mtu; }; -static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; @@ -3956,12 +4043,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->fib6_nh.nh_dev == arg->dev && + !fib6_metric_locked(rt, RTAX_MTU)) { + u32 mtu = rt->fib6_pmtu; + + if (mtu >= arg->mtu || + (mtu < arg->mtu && mtu == idev->cnf.mtu6)) + fib6_metric_set(rt, RTAX_MTU, arg->mtu); + spin_lock_bh(&rt6_exception_lock); - if (dst_metric_raw(&rt->dst, RTAX_MTU) && - rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } @@ -4122,9 +4212,8 @@ errout: } struct rt6_nh { - struct rt6_info *rt6_info; + struct fib6_info *fib6_info; struct fib6_config r_cfg; - struct mx6_config mxc; struct list_head next; }; @@ -4139,23 +4228,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) } } -static int ip6_route_info_append(struct list_head *rt6_nh_list, - struct rt6_info *rt, struct fib6_config *r_cfg) +static int ip6_route_info_append(struct net *net, + struct list_head *rt6_nh_list, + struct fib6_info *rt, + struct fib6_config *r_cfg) { struct rt6_nh *nh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { - /* check if rt6_info already exists */ - if (rt6_duplicate_nexthop(nh->rt6_info, rt)) + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh->fib6_info, rt)) return err; } nh = kzalloc(sizeof(*nh), GFP_KERNEL); if (!nh) return -ENOMEM; - nh->rt6_info = rt; - err = ip6_convert_metrics(&nh->mxc, r_cfg); + nh->fib6_info = rt; + err = ip6_convert_metrics(net, rt, r_cfg); if (err) { kfree(nh); return err; @@ -4166,8 +4257,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } -static void ip6_route_mpath_notify(struct rt6_info *rt, - struct rt6_info *rt_last, +static void ip6_route_mpath_notify(struct fib6_info *rt, + struct fib6_info *rt_last, struct nl_info *info, __u16 nlflags) { @@ -4177,10 +4268,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { - rt = list_first_entry(&rt_last->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + rt = list_first_entry(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) @@ -4190,11 +4281,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; - struct rt6_info *rt; + struct fib6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; __u16 nlflags; @@ -4214,7 +4305,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, rtnh = (struct rtnexthop *)cfg->fc_mp; /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * rt6_info structs per nexthop + * fib6_info structs per nexthop */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); @@ -4237,18 +4328,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, } r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - rt = ip6_route_info_create(&r_cfg, extack); + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto cleanup; } - rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; - err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + err = ip6_route_info_append(info->nl_net, &rt6_nh_list, + rt, &r_cfg); if (err) { - dst_release_immediate(&rt->dst); + fib6_info_release(rt); goto cleanup; } @@ -4263,14 +4355,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); + rt_last = nh->fib6_info; + err = __ip6_ins_rt(nh->fib6_info, info, extack); + fib6_info_release(nh->fib6_info); + /* save reference to first route for notification */ if (!rt_notif && !err) - rt_notif = nh->rt6_info; + rt_notif = nh->fib6_info; - /* nh->rt6_info is used or freed at this point, reset to NULL*/ - nh->rt6_info = NULL; + /* nh->fib6_info is used or freed at this point, reset to NULL*/ + nh->fib6_info = NULL; if (err) { if (replace && nhn) ip6_print_replace_route_err(&rt6_nh_list); @@ -4311,9 +4405,8 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->rt6_info) - dst_release_immediate(&nh->rt6_info->dst); - kfree(nh->mxc.mx); + if (nh->fib6_info) + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } @@ -4390,20 +4483,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg, extack); + return ip6_route_add(&cfg, GFP_KERNEL, extack); } -static size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; - if (rt->rt6i_nsiblings) { + if (rt->fib6_nsiblings) { nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->dst.lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); - nexthop_len *= rt->rt6i_nsiblings; + nexthop_len *= rt->fib6_nsiblings; } return NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -4419,38 +4512,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->dst.lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, unsigned int *flags, bool skip_oif) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { + if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; - if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + + rcu_read_lock(); + if (fib6_ignore_linkdown(rt)) *flags |= RTNH_F_DEAD; + rcu_read_unlock(); } - if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) + if (rt->fib6_flags & RTF_GATEWAY) { + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) goto nla_put_failure; } - *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); - if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); + if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + if (!skip_oif && rt->fib6_nh.nh_dev && + nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) goto nla_put_failure; - if (rt->dst.lwtstate && - lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + if (rt->fib6_nh.nh_lwtstate && + lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) goto nla_put_failure; return 0; @@ -4460,8 +4556,9 @@ nla_put_failure: } /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) { + const struct net_device *dev = rt->fib6_nh.nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4469,8 +4566,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; - rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; + rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (rt6_nexthop_info(skb, rt, &flags, true) < 0) goto nla_put_failure; @@ -4486,16 +4583,16 @@ nla_put_failure: return -EMSGSIZE; } -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; - long expires; + long expires = 0; + u32 *pmetrics; u32 table; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); @@ -4504,53 +4601,31 @@ static int rt6_fill_node(struct net *net, rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; - rtm->rtm_dst_len = rt->rt6i_dst.plen; - rtm->rtm_src_len = rt->rt6i_src.plen; + rtm->rtm_dst_len = rt->fib6_dst.plen; + rtm->rtm_src_len = rt->fib6_src.plen; rtm->rtm_tos = 0; - if (rt->rt6i_table) - table = rt->rt6i_table->tb6_id; + if (rt->fib6_table) + table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) { - switch (rt->dst.error) { - case -EINVAL: - rtm->rtm_type = RTN_BLACKHOLE; - break; - case -EACCES: - rtm->rtm_type = RTN_PROHIBIT; - break; - case -EAGAIN: - rtm->rtm_type = RTN_THROW; - break; - default: - rtm->rtm_type = RTN_UNREACHABLE; - break; - } - } - else if (rt->rt6i_flags & RTF_LOCAL) - rtm->rtm_type = RTN_LOCAL; - else if (rt->rt6i_flags & RTF_ANYCAST) - rtm->rtm_type = RTN_ANYCAST; - else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - rtm->rtm_type = RTN_LOCAL; - else - rtm->rtm_type = RTN_UNICAST; + + rtm->rtm_type = rt->fib6_type; rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = rt->rt6i_protocol; + rtm->rtm_protocol = rt->fib6_protocol; - if (rt->rt6i_flags & RTF_CACHE) + if (rt->fib6_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; - if (dst) { - if (nla_put_in6_addr(skb, RTA_DST, dst)) + if (dest) { + if (nla_put_in6_addr(skb, RTA_DST, dest)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { @@ -4558,12 +4633,12 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr)) goto nla_put_failure; #endif if (iif) { #ifdef CONFIG_IPV6_MROUTE - if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { + if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) { int err = ip6mr_get_route(net, skb, rtm, portid); if (err == 0) @@ -4574,34 +4649,32 @@ static int rt6_fill_node(struct net *net, #endif if (nla_put_u32(skb, RTA_IIF, iif)) goto nla_put_failure; - } else if (dst) { + } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - if (rt->rt6i_prefsrc.plen) { + if (rt->fib6_prefsrc.plen) { struct in6_addr saddr_buf; - saddr_buf = rt->rt6i_prefsrc.addr; + saddr_buf = rt->fib6_prefsrc.addr; if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt6i_pmtu) - metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; - if (rtnetlink_put_metrics(skb, metrics) < 0) + pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; + if (rtnetlink_put_metrics(skb, pmetrics) < 0) goto nla_put_failure; - if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) + if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) goto nla_put_failure; /* For multipath routes, walk the siblings list and add * each as a nexthop within RTA_MULTIPATH. */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); @@ -4612,7 +4685,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) { + &rt->fib6_siblings, fib6_siblings) { if (rt6_add_nexthop(skb, sibling) < 0) goto nla_put_failure; } @@ -4623,12 +4696,15 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; + if (rt->fib6_flags & RTF_EXPIRES) { + expires = dst ? dst->expires : rt->expires; + expires -= jiffies; + } - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) + if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; - if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags))) goto nla_put_failure; @@ -4640,12 +4716,12 @@ nla_put_failure: return -EMSGSIZE; } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; struct net *net = arg->net; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { @@ -4653,16 +4729,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) /* user wants prefix routes only */ if (rtm->rtm_flags & RTM_F_PREFIX && - !(rt->rt6i_flags & RTF_PREFIX_RT)) { + !(rt->fib6_flags & RTF_PREFIX_RT)) { /* success since this is not a prefix route */ return 1; } } - return rt6_fill_node(net, - arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - NLM_F_MULTI); + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, + RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, + arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4671,6 +4746,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; int err, iif = 0, oif = 0; + struct fib6_info *from; struct dst_entry *dst; struct rt6_info *rt; struct sk_buff *skb; @@ -4759,14 +4835,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } - if (fibmatch && rt->from) { - struct rt6_info *ort = rt->from; - - dst_hold(&ort->dst); - ip6_rt_put(rt); - rt = ort; - } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); @@ -4775,14 +4843,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } skb_dst_set(skb, &rt->dst); + + rcu_read_lock(); + from = rcu_dereference(rt->from); + if (fibmatch) - err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0); else - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, + 0); + rcu_read_unlock(); + if (err < 0) { kfree_skb(skb); goto errout; @@ -4793,7 +4868,7 @@ errout: return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; @@ -4808,8 +4883,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, if (!skb) goto errout; - err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, nlm_flags); + err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -4834,6 +4909,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { + net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5030,11 +5106,17 @@ static int __net_init ip6_route_net_init(struct net *net) if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) goto out_ip6_dst_ops; + net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, + sizeof(*net->ipv6.fib6_null_entry), + GFP_KERNEL); + if (!net->ipv6.fib6_null_entry) + goto out_ip6_dst_entries; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_entries; + goto out_fib6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); @@ -5081,6 +5163,8 @@ out_ip6_prohibit_entry: out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_fib6_null_entry: + kfree(net->ipv6.fib6_null_entry); out_ip6_dst_entries: dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: @@ -5089,6 +5173,7 @@ out_ip6_dst_ops: static void __net_exit ip6_route_net_exit(struct net *net) { + kfree(net->ipv6.fib6_null_entry); kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -5159,6 +5244,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ + init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES |