From 7efc0b6b666d757e07417f59397e7f5f340e74e0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:12 -0800 Subject: net/ipv4: Pass net to fib_multipath_hash instead of fib_info fib_multipath_hash only needs net struct to check a sysctl. Make it clear by passing net instead of fib_info. In the end this allows alignment between the ipv4 and ipv6 versions. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8812582a94d5..7c7522e8585b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); #ifdef CONFIG_IP_ROUTE_MULTIPATH -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif void fib_select_multipath(struct fib_result *res, int hash); -- cgit v1.2.3 From 3192dac64c73d8c0eb4274a3da23d829fb5177af Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:16 -0800 Subject: net: Rename NETEVENT_MULTIPATH_HASH_UPDATE Rename NETEVENT_MULTIPATH_HASH_UPDATE to NETEVENT_IPV4_MPATH_HASH_UPDATE to denote it relates to a change in the IPv4 hash policy. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- include/net/netevent.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 69f16c605b9d..93d48c1b2bf8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2430,7 +2430,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; - case NETEVENT_MULTIPATH_HASH_UPDATE: + case NETEVENT_IPV4_MPATH_HASH_UPDATE: net = ptr; if (!net_eq(net, &init_net)) diff --git a/include/net/netevent.h b/include/net/netevent.h index 40e7bab68490..baee605a94ab 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -26,7 +26,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ - NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 89683d868b37..011de9a20ec6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); return ret; } -- cgit v1.2.3 From b75cc8f90f07342467b3bd51dbc0054f185032c9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:17 -0800 Subject: net/ipv6: Pass skb to route lookup IPv6 does path selection for multipath routes deep in the lookup functions. The next patch adds L4 hash option and needs the skb for the forward path. To get the skb to the relevant FIB lookup functions it needs to go through the fib rules layer, so add a lookup_data argument to the fib_lookup_arg struct. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/infiniband/core/cma.c | 2 +- drivers/net/ipvlan/ipvlan_core.c | 3 +- drivers/net/vrf.c | 7 ++-- include/net/fib_rules.h | 1 + include/net/ip6_fib.h | 4 ++- include/net/ip6_route.h | 11 +++--- net/ipv6/anycast.c | 2 +- net/ipv6/fib6_rules.c | 8 +++-- net/ipv6/icmp.c | 3 +- net/ipv6/ip6_fib.c | 3 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_tunnel.c | 4 +-- net/ipv6/ip6_vti.c | 2 +- net/ipv6/mcast.c | 4 +-- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/netfilter/nft_fib_ipv6.c | 3 +- net/ipv6/route.c | 72 +++++++++++++++++++++++--------------- net/ipv6/seg6_local.c | 4 +-- 18 files changed, 83 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3ae32d1ddd27..915bbd867b61 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev, IPV6_ADDR_LINKLOCAL; struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, &src_addr->sin6_addr, net_dev->ifindex, - strict); + NULL, strict); bool ret; if (!rt) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 17daebd19e65..1a8132eb2a3e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, }; skb_dst_drop(skb); - dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); skb_dst_set(skb, dst); break; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e459e601c57f..c6be49d3a9eb 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, + const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); @@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, if (!table) return NULL; - return ip6_pol_route(net, table, ifindex, fl6, flags); + return ip6_pol_route(net, table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, @@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; - rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; @@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 1c9e17c11953..e5cfcfc7dd93 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -47,6 +47,7 @@ struct fib_rule { struct fib_lookup_arg { void *lookup_ptr; + const void *lookup_data; void *result; struct fib_rule *rule; u32 table; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8d906a35b534..5e86fd9dc857 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -350,7 +350,8 @@ struct fib6_table { typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, - struct flowi6 *, int); + struct flowi6 *, + const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -364,6 +365,7 @@ struct fib6_entry_notifier_info { struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup); struct fib6_node *fib6_lookup(struct fib6_node *root, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index da2bde5fda8f..9594f9317952 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags); + struct flowi6 *fl6, + const struct sk_buff *skb, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); @@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net, } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags); + const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int ifindex, struct flowi6 *fl6, int flags); + int ifindex, struct flowi6 *fl6, + const struct sk_buff *skb, int flags); void ip6_route_init_special_entries(void); int ip6_route_init(void); @@ -126,7 +128,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, } struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int flags); + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int flags); u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *hkeys); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index d7d0abc7fd0e..c61718dba2e6 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 04e5f523e50f..00ef9467f3c0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = lookup, + .lookup_data = skb, .flags = FIB_LOOKUP_NOREF, }; @@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, } else { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); @@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } - rt = lookup(net, table, flp6, flags); + rt = lookup(net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b0778d323b6e..a5d929223820 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, + skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cab95cf3b39f..2f995e9e3050 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error == -EAGAIN) { ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4f150a394387..83c7766c8c75 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 869e2e6750f7..1124f310df5a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Try to guess incoming interface */ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, - NULL, 0, 0); + NULL, 0, skb2, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c617ea17faa8..a482b854eeea 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (rt) tdev = rt->dst.dev; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d9bb933dd5c4..d1a0cefac273 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); @@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 94deb69bbbda..910a27318f58 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, lookup_flags |= RT6_LOOKUP_F_IFACE; } - rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) goto out; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index cc5174c7254c..3230b3d7b11b 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, *dest = 0; again: - rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, + lookup_flags); if (rt->dst.error) goto put_rt_err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5c89af2c54f4..d2b8368663cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -452,6 +452,7 @@ static bool rt6_check_expired(const struct rt6_info *rt) static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct flowi6 *fl6, int oif, + const struct sk_buff *skb, int strict) { struct rt6_info *sibling, *next_sibling; @@ -460,7 +461,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL); + fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -914,7 +915,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { struct rt6_info *rt, *rt_cache; struct fib6_node *fn; @@ -929,8 +932,8 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, - fl6->flowi6_oif, flags); + rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, + skb, flags); } if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); @@ -954,14 +957,15 @@ restart: } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags) + const struct sk_buff *skb, int flags) { - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup); } EXPORT_SYMBOL_GPL(ip6_route_lookup); struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int strict) + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int strict) { struct flowi6 fl6 = { .flowi6_oif = oif, @@ -975,7 +979,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -1647,7 +1651,8 @@ void rt6_age_exceptions(struct rt6_info *rt, } struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int flags) + int oif, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *rt_cache; @@ -1669,7 +1674,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, redo_rt6_select: rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict); + rt = rt6_multipath_select(rt, fl6, oif, skb, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -1768,20 +1773,25 @@ uncached_rt_out: } EXPORT_SYMBOL_GPL(ip6_pol_route); -static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_input(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags); } struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input); } EXPORT_SYMBOL_GPL(ip6_route_input_lookup); @@ -1876,13 +1886,17 @@ void ip6_route_input(struct sk_buff *skb) if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); skb_dst_drop(skb); - skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); + skb_dst_set(skb, + ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } -static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_output(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); } struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, @@ -1910,7 +1924,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } EXPORT_SYMBOL_GPL(ip6_route_output_flags); @@ -2159,6 +2173,7 @@ struct ip6rd_flowi { static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; @@ -2232,8 +2247,9 @@ out: }; static struct dst_entry *ip6_route_redirect(struct net *net, - const struct flowi6 *fl6, - const struct in6_addr *gateway) + const struct flowi6 *fl6, + const struct sk_buff *skb, + const struct in6_addr *gateway) { int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip6rd_flowi rdfl; @@ -2241,7 +2257,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net, rdfl.fl6 = *fl6; rdfl.gateway = *gateway; - return fib6_rule_lookup(net, &rdfl.fl6, + return fib6_rule_lookup(net, &rdfl.fl6, skb, flags, __ip6_route_redirect); } @@ -2261,7 +2277,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_uid = uid; - dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2283,7 +2299,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.saddr = iph->daddr; fl6.flowi6_uid = sock_net_uid(net, NULL); - dst = ip6_route_redirect(net, &fl6, &iph->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2485,7 +2501,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, flags |= RT6_LOOKUP_F_HAS_SADDR; flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; - rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); + rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags); /* if table lookup failed, fall back to full lookup */ if (rt == net->ipv6.ip6_null_entry) { @@ -2548,7 +2564,7 @@ static int ip6_route_check_nh(struct net *net, } if (!grt) - grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1); if (!grt) goto out; @@ -4613,7 +4629,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - dst = ip6_route_input_lookup(net, dev, &fl6, flags); + dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags); rcu_read_unlock(); } else { diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ba3767ef5e93..45722327375a 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; if (!tbl_id) { - dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); } else { struct fib6_table *table; @@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (!table) goto out; - rt = ip6_pol_route(net, table, 0, &fl6, flags); + rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); dst = &rt->dst; } -- cgit v1.2.3 From b4bac172e90ce4a93df8adf44eb70d91b9d611eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:18 -0800 Subject: net/ipv6: Add support for path selection using hash of 5-tuple Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 ++++ include/net/ip6_route.h | 4 +- include/net/netevent.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv6/icmp.c | 2 +- net/ipv6/route.c | 68 ++++++++++++++++++++++++++-------- net/ipv6/sysctl_net_ipv6.c | 27 ++++++++++++++ 7 files changed, 91 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a553d4e4a0fb..783675a730e5 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN FALSE: disabled Default: FALSE +fib_multipath_hash_policy - INTEGER + Controls which hash policy to use for multipath routes. + Default: 0 (Layer 3) + Possible values: + 0 - Layer 3 (source and destination addresses plus flow label) + 1 - Layer 4 (standard 5-tuple) + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9594f9317952..ce2abc0ff102 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, - struct flow_keys *hkeys); +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); diff --git a/include/net/netevent.h b/include/net/netevent.h index baee605a94ab..d9918261701c 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -27,6 +27,7 @@ enum netevent_notif_type { NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index e286fda09fcf..5b51110435fc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; + int multipath_hash_policy; int flowlabel_consistency; int auto_flowlabels; int icmpv6_time; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a5d929223820..6f84668be6ea 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); - fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d2b8368663cb..f0ae58424c45 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static struct rt6_info *rt6_multipath_select(struct rt6_info *match, +static struct rt6_info *rt6_multipath_select(const struct net *net, + struct rt6_info *match, struct flowi6 *fl6, int oif, const struct sk_buff *skb, int strict) @@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); + fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -932,7 +933,7 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, + rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, skb, flags); } if (rt == net->ipv6.ip6_null_entry) { @@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, redo_rt6_select: rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, skb, strict); + rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -1839,21 +1840,56 @@ out: } /* if skb is set it will be used and fl6 can be NULL */ -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, - struct flow_keys *flkeys) +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; u32 mhash; - memset(&hash_keys, 0, sizeof(hash_keys)); - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - if (skb) { - ip6_multipath_l3_keys(skb, &hash_keys, flkeys); - } else { - hash_keys.addrs.v6addrs.src = fl6->saddr; - hash_keys.addrs.v6addrs.dst = fl6->daddr; - hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; - hash_keys.basic.ip_proto = fl6->flowi6_proto; + switch (net->ipv6.sysctl.multipath_hash_policy) { + case 0: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (skb) { + ip6_multipath_l3_keys(skb, &hash_keys, flkeys); + } else { + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; + case 1: + if (skb) { + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + struct flow_keys keys; + + /* short-circuit if we already have L4 hash present */ + if (skb->l4_hash) + return skb_get_hash_raw(skb) >> 1; + + memset(&hash_keys, 0, sizeof(hash_keys)); + + if (!flkeys) { + skb_flow_dissect_flow_keys(skb, &keys, flag); + flkeys = &keys; + } + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; + hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; + } else { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.ports.src = fl6->fl6_sport; + hash_keys.ports.dst = fl6->fl6_dport; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; } mhash = flow_hash_from_keys(&hash_keys); @@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb) flkeys = &_flkeys; if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) - fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 262f791f1b9b..966c42af92f4 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,14 +16,31 @@ #include #include #include +#include #ifdef CONFIG_NETLABEL #include #endif +static int zero; static int one = 1; static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv6.sysctl.multipath_hash_policy); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); + + return ret; +} static struct ctl_table ipv6_table_template[] = { { @@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fib_multipath_hash_policy", + .data = &init_net.ipv6.sysctl.multipath_hash_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_rt6_multipath_hash_policy, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; + ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) -- cgit v1.2.3 From de7a0f871fabe74fff7481caf7d3efe03b58fe58 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:20 -0800 Subject: net: Remove unused get_hash_from_flow functions __get_hash_from_flowi6 is still used for flowlabels, but the IPv4 variant and the wrappers to both are not used. Remove them. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/flow.h | 16 ---------------- net/core/flow_dissector.c | 16 ---------------- 2 files changed, 32 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 64e7ee9cb980..8ce21793094e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family) __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); -static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) -{ - struct flow_keys keys; - - return __get_hash_from_flowi6(fl6, &keys); -} - -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys); - -static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4) -{ - struct flow_keys keys; - - return __get_hash_from_flowi4(fl4, &keys); -} - #endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 559db9ea8d86..d29f09bc5ff9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) } EXPORT_SYMBOL(__get_hash_from_flowi6); -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys) -{ - memset(keys, 0, sizeof(*keys)); - - keys->addrs.v4addrs.src = fl4->saddr; - keys->addrs.v4addrs.dst = fl4->daddr; - keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys->ports.src = fl4->fl4_sport; - keys->ports.dst = fl4->fl4_dport; - keys->keyid.keyid = fl4->fl4_gre_key; - keys->basic.ip_proto = fl4->flowi4_proto; - - return flow_hash_from_keys(keys); -} -EXPORT_SYMBOL(__get_hash_from_flowi4); - static const struct flow_dissector_key flow_keys_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, -- cgit v1.2.3