diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-03-05 03:54:12 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-03-05 03:54:13 +0300 |
| commit | db739ff277b4ba8713224a334d7e388d04473725 (patch) | |
| tree | 40ce3bd11cefc6f440d0664f68a0d62853630243 /include | |
| parent | c26b8c4e291c55c7b2138d7bcb27348ca3a5ae59 (diff) | |
| parent | a435163d3100b044d620990772a5ce1684ff02ca (diff) | |
| download | linux-db739ff277b4ba8713224a334d7e388d04473725.tar.xz | |
Merge branch 'rfs-use-high-order-allocations-for-hash-tables'
Eric Dumazet says:
====================
rfs: use high-order allocations for hash tables
This series adds rps_tag_ptr which encodes both a pointer
and a size of a power-of-two hash table in a single long word.
RFS hash tables (global and per rx-queue) are converted to rps_tag_ptr.
This removes a cache line miss, and allows high-order allocations.
The global hash table can benefit from huge pages.
====================
Link: https://patch.msgid.link/20260302181432.1836150-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/hotdata.h | 5 | ||||
| -rw-r--r-- | include/net/netdev_rx_queue.h | 3 | ||||
| -rw-r--r-- | include/net/rps-types.h | 24 | ||||
| -rw-r--r-- | include/net/rps.h | 49 |
4 files changed, 49 insertions, 32 deletions
diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6632b1aa7584..62534d1f3c70 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -6,6 +6,9 @@ #include <linux/types.h> #include <linux/netdevice.h> #include <net/protocol.h> +#ifdef CONFIG_RPS +#include <net/rps-types.h> +#endif struct skb_defer_node { struct llist_head defer_list; @@ -33,7 +36,7 @@ struct net_hotdata { struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; #ifdef CONFIG_RPS - struct rps_sock_flow_table __rcu *rps_sock_flow_table; + rps_tag_ptr rps_sock_flow_table; u32 rps_cpu_mask; #endif struct skb_defer_node __percpu *skb_defer_nodes; diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index cfa72c485387..08f81329fc11 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -8,13 +8,14 @@ #include <net/xdp.h> #include <net/page_pool/types.h> #include <net/netdev_queues.h> +#include <net/rps-types.h> /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; - struct rps_dev_flow_table __rcu *rps_flow_table; + rps_tag_ptr rps_flow_table; #endif struct kobject kobj; const struct attribute_group **groups; diff --git a/include/net/rps-types.h b/include/net/rps-types.h new file mode 100644 index 000000000000..6b90a66866c1 --- /dev/null +++ b/include/net/rps-types.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_TYPES_H +#define _NET_RPS_TYPES_H + +/* Define a rps_tag_ptr: + * Low order 5 bits are used to store the ilog2(size) of an RPS table. + */ +typedef unsigned long rps_tag_ptr; + +static inline u8 rps_tag_to_log(rps_tag_ptr tag_ptr) +{ + return tag_ptr & 31U; +} + +static inline u32 rps_tag_to_mask(rps_tag_ptr tag_ptr) +{ + return (1U << rps_tag_to_log(tag_ptr)) - 1; +} + +static inline void *rps_tag_to_table(rps_tag_ptr tag_ptr) +{ + return (void *)(tag_ptr & ~31UL); +} +#endif /* _NET_RPS_TYPES_H */ diff --git a/include/net/rps.h b/include/net/rps.h index f1794cd2e7fb..e33c6a2fa8bb 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -8,6 +8,7 @@ #include <net/hotdata.h> #ifdef CONFIG_RPS +#include <net/rps-types.h> extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; @@ -39,17 +40,6 @@ struct rps_dev_flow { #define RPS_NO_FILTER 0xffff /* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - u8 log; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). * Each entry is a 32bit value. Upper part is the high-order bits @@ -60,41 +50,38 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - struct rcu_head rcu; - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; + u32 ent; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) +static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash) { - unsigned int index = hash & table->mask; + unsigned int index = hash & rps_tag_to_mask(tag_ptr); u32 val = hash & ~net_hotdata.rps_cpu_mask; + struct rps_sock_flow_table *table; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); + table = rps_tag_to_table(tag_ptr); /* The following WRITE_ONCE() is paired with the READ_ONCE() * here, and another one in get_rps_cpu(). */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); + if (READ_ONCE(table[index].ent) != val) + WRITE_ONCE(table[index].ent, val); } static inline void _sock_rps_record_flow_hash(__u32 hash) { - struct rps_sock_flow_table *sock_flow_table; + rps_tag_ptr tag_ptr; if (!hash) return; rcu_read_lock(); - sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (sock_flow_table) - rps_record_sock_flow(sock_flow_table, hash); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) + rps_record_sock_flow(tag_ptr, hash); rcu_read_unlock(); } @@ -121,6 +108,7 @@ static inline void _sock_rps_record_flow(const struct sock *sk) static inline void _sock_rps_delete_flow(const struct sock *sk) { struct rps_sock_flow_table *table; + rps_tag_ptr tag_ptr; u32 hash, index; hash = READ_ONCE(sk->sk_rxhash); @@ -128,11 +116,12 @@ static inline void _sock_rps_delete_flow(const struct sock *sk) return; rcu_read_lock(); - table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (table) { - index = hash & table->mask; - if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) - WRITE_ONCE(table->ents[index], RPS_NO_CPU); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) { + index = hash & rps_tag_to_mask(tag_ptr); + table = rps_tag_to_table(tag_ptr); + if (READ_ONCE(table[index].ent) != RPS_NO_CPU) + WRITE_ONCE(table[index].ent, RPS_NO_CPU); } rcu_read_unlock(); } |
