From 0ae8133586ad1c9be894411aaf8b17bb58c8efe5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 2 Feb 2017 12:37:08 -0800 Subject: net: ipv6: Allow shorthand delete of all nexthops in multipath route IPv4 allows multipath routes to be deleted using just the prefix and length. For example: $ ip ro ls vrf red unreachable default metric 8192 1.1.1.0/24 nexthop via 10.100.1.254 dev eth1 weight 1 nexthop via 10.11.200.2 dev eth11.200 weight 1 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 $ ip ro del 1.1.1.0/24 vrf red $ ip ro ls vrf red unreachable default metric 8192 10.11.200.0/24 dev eth11.200 proto kernel scope link src 10.11.200.3 10.100.1.0/24 dev eth1 proto kernel scope link src 10.100.1.3 The same notation does not work with IPv6 because of how multipath routes are implemented for IPv6. For IPv6 only the first nexthop of a multipath route is deleted if the request contains only a prefix and length. This leads to unnecessary complexity in userspace dealing with IPv6 multipath routes. This patch allows all nexthops to be deleted without specifying each one in the delete request. Internally, this is done by walking the sibling list of the route matching the specifications given (prefix, length, metric, protocol, etc). $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium 2001:db8:200::/120 via 2001:db8:1::2 dev eth1 metric 1024 pref medium 2001:db8:200::/120 via 2001:db8:2::2 dev eth2 metric 1024 pref medium ... $ ip -6 ro del vrf red 2001:db8:200::/120 $ ip -6 ro ls vrf red 2001:db8:1::/120 dev eth1 proto kernel metric 256 pref medium 2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium ... Because IPv6 allows individual nexthops to be deleted without deleting the entire route, the ip6_route_multipath_del and non-multipath code path (ip6_route_del) have to be discriminated so that all nexthops are only deleted for the latter case. This is done by making the existing fc_type in fib6_config a u16 and then adding a new u16 field with fc_delete_all_nh as the first bit. Suggested-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a74e2aa40ef4..c979c878df1c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -37,7 +37,9 @@ struct fib6_config { int fc_ifindex; u32 fc_flags; u32 fc_protocol; - u32 fc_type; /* only 8 bits are used */ + u16 fc_type; /* only 8 bits are used */ + u16 fc_delete_all_nh : 1, + __unused : 15; struct in6_addr fc_dst; struct in6_addr fc_src; -- cgit v1.2.3 From 3b1137fe74829e021f483756a648cbb87c8a1b4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 2 Feb 2017 12:37:10 -0800 Subject: net: ipv6: Change notifications for multipath add to RTA_MULTIPATH Change ip6_route_multipath_add to send one notifciation with the full route encoded with RTA_MULTIPATH instead of a series of individual routes. This is done by adding a skip_notify flag to the nl_info struct. The flag is used to skip sending of the notification in the fib code that actually inserts the route. Once the full route has been added, a notification is generated with all nexthops. ip6_route_multipath_add handles 3 use cases: new routes, route replace, and route append. The multipath notification generated needs to be consistent with the order of the nexthops and it should be consistent with the order in a FIB dump which means the route with the first nexthop needs to be used as the route reference. For the first 2 cases (new and replace), a reference to the route used to send the notification is obtained by saving the first route added. For the append case, the last route added is used to loop back to its first sibling route which is the first nexthop in the multipath route. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/netlink.h | 1 + net/ipv6/ip6_fib.c | 6 ++++-- net/ipv6/route.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index d3938f11ae52..b239fcd33d80 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -229,6 +229,7 @@ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; + bool skip_notify; }; int netlink_rcv_skb(struct sk_buff *skb, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bf5e22fb95d..99c68ce6ef78 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -881,7 +881,8 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { @@ -907,7 +908,8 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); + if (!info->skip_notify) + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c740d9e249a6..cb3366d5e165 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3023,13 +3023,37 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } +static void ip6_route_mpath_notify(struct rt6_info *rt, + struct rt6_info *rt_last, + struct nl_info *info, + __u16 nlflags) +{ + /* if this is an APPEND route, then rt points to the first route + * inserted and rt_last points to last route inserted. Userspace + * wants a consistent dump of the route which starts at the first + * nexthop. Since sibling routes are always added at the end of + * the list, find the first sibling of the last route appended + */ + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { + rt = list_first_entry(&rt_last->rt6i_siblings, + struct rt6_info, + rt6i_siblings); + } + + if (rt) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); +} + static int ip6_route_multipath_add(struct fib6_config *cfg) { + struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct rt6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; + __u16 nlflags; int remaining; int attrlen; int err = 1; @@ -3038,6 +3062,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); LIST_HEAD(rt6_nh_list); + nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; + if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) + nlflags |= NLM_F_APPEND; + remaining = cfg->fc_mp_len; rtnh = (struct rtnexthop *)cfg->fc_mp; @@ -3080,9 +3108,20 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) rtnh = rtnh_next(rtnh, &remaining); } + /* for add and replace send one notification with all nexthops. + * Skip the notification in fib6_add_rt2node and send one with + * the full route when done + */ + info->skip_notify = 1; + err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); + rt_last = nh->rt6_info; + err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc); + /* save reference to first route for notification */ + if (!rt_notif && !err) + rt_notif = nh->rt6_info; + /* nh->rt6_info is used or freed at this point, reset to NULL*/ nh->rt6_info = NULL; if (err) { @@ -3104,9 +3143,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) nhn++; } + /* success ... tell user about new route */ + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); goto cleanup; add_errout: + /* send notification for routes that were added so that + * the delete notifications sent by ip6_route_del are + * coherent + */ + if (rt_notif) + ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); + /* Delete routes that were already added */ list_for_each_entry(nh, &rt6_nh_list, next) { if (err_nh == nh) -- cgit v1.2.3