diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2024-06-13 02:42:18 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2024-06-13 02:42:18 +0300 |
| commit | 05f43db7f011d0ebf95bd28562d7f23e30d7e107 (patch) | |
| tree | 4ca9e9ef05433d9621909f12bd8d93a05333cc34 /include | |
| parent | 32b06603f8790b34835af43d259152eb57827413 (diff) | |
| parent | 5f90d93b6108098f389f992e267588924e446049 (diff) | |
| download | linux-05f43db7f011d0ebf95bd28562d7f23e30d7e107.tar.xz | |
Merge branch 'allow-configuration-of-multipath-hash-seed'
Petr Machata says:
====================
Allow configuration of multipath hash seed
Let me just quote the commit message of patch #2 here to inform the
motivation and some of the implementation:
When calculating hashes for the purpose of multipath forwarding,
both IPv4 and IPv6 code currently fall back on
flow_hash_from_keys(). That uses a randomly-generated seed. That's a
fine choice by default, but unfortunately some deployments may need
a tighter control over the seed used.
In this patchset, make the seed configurable by adding a new sysctl
key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed
is used specifically for multipath forwarding and not for the other
concerns that flow_hash_from_keys() is used for, such as queue
selection. Expose the knob as sysctl because other such settings,
such as headers to hash, are also handled that way.
Despite being placed in the net.ipv4 namespace, the multipath seed
sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of
TCP variables. Like those, the multipath hash seed is a per-netns
variable.
The seed used by flow_hash_from_keys() is a 128-bit quantity.
However it seems that usually the seed is a much more modest value.
32 bits seem typical (Cisco, Cumulus), some systems go even lower.
For that reason, and to decouple the user interface from
implementation details, go with a 32-bit quantity, which is then
quadruplicated to form the siphash key.
One example of use of this interface is avoiding hash polarization,
where two ECMP routers, one behind the other, happen to make consistent
hashing decisions, and as a result, part of the ECMP space of the latter
router is never used. Another is a load balancer where several machines
forward traffic to one of a number of leaves, and the forwarding
decisions need to be made consistently. (This is a case of a desired
hash polarization, mentioned e.g. in chapter 6.3 of [0].)
There has already been a proposal to include a hash seed control
interface in the past[1].
- Patches #1-#2 contain the substance of the work
- Patch #3 is an mlxsw offload
- Patches #4 and #5 are a selftest
[0] https://www.usenix.org/system/files/conference/nsdi18/nsdi18-araujo.pdf
[1] https://lore.kernel.org/netdev/YIlVpYMCn%2F8WfE1P@rnd/
====================
Link: https://lore.kernel.org/r/20240607151357.421181-1-petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/flow_dissector.h | 2 | ||||
| -rw-r--r-- | include/net/ip_fib.h | 28 | ||||
| -rw-r--r-- | include/net/netns/ipv4.h | 8 |
3 files changed, 38 insertions, 0 deletions
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 99626475c3f4..3e47e123934d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9b2f69ba5e49..6e7984bfb986 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -520,7 +520,35 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); + +static void +fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) +{ + u64 mp_seed_64 = mp_seed; + + key->key[0] = (mp_seed_64 << 32) | mp_seed_64; + key->key[1] = key->key[0]; +} + +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + siphash_aligned_key_t hash_key; + u32 mp_seed; + + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + fib_multipath_hash_construct_key(&hash_key, mp_seed); + + return flow_hash_from_keys_seed(keys, &hash_key); +} +#else +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + return flow_hash_from_keys(keys); +} #endif + int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a91bb971f901..5fcd61ada622 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,13 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct sysctl_fib_multipath_hash_seed { + u32 user_seed; + u32 mp_seed; +}; +#endif + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -246,6 +253,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; |
