summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorJiejian Wu <jiejian@linux.alibaba.com>2026-02-24 23:50:40 +0300
committerJakub Kicinski <kuba@kernel.org>2026-02-26 06:36:25 +0300
commit74455a5b4326add2499cb4a1f9706154b3a1eab4 (patch)
treef1696d8139e2f9503c63353daceedf2772f91824 /include/net
parent7717fbb14028be5735acb911aeb7553b7c662418 (diff)
downloadlinux-74455a5b4326add2499cb4a1f9706154b3a1eab4.tar.xz
ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns
Current ipvs uses one global mutex "__ip_vs_mutex" to keep the global "ip_vs_svc_table" and "ip_vs_svc_fwm_table" safe. But when there are tens of thousands of services from different netns in the table, it takes a long time to look up the table, for example, using "ipvsadm -ln" from different netns simultaneously. We make "ip_vs_svc_table" and "ip_vs_svc_fwm_table" per netns, and we add "service_mutex" per netns to keep these two tables safe instead of the global "__ip_vs_mutex" in current version. To this end, looking up services from different netns simultaneously will not get stuck, shortening the time consumption in large-scale deployment. It can be reproduced using the simple scripts below. init.sh: #!/bin/bash for((i=1;i<=4;i++));do ip netns add ns$i ip netns exec ns$i ip link set dev lo up ip netns exec ns$i sh add-services.sh done add-services.sh: #!/bin/bash for((i=0;i<30000;i++)); do ipvsadm -A -t 10.10.10.10:$((80+$i)) -s rr done runtest.sh: #!/bin/bash for((i=1;i<4;i++));do ip netns exec ns$i ipvsadm -ln > /dev/null & done ip netns exec ns4 ipvsadm -ln > /dev/null Run "sh init.sh" to initiate the network environment. Then run "time ./runtest.sh" to evaluate the time consumption. Our testbed is a 4-core Intel Xeon ECS. The result of the original version is around 8 seconds, while the result of the modified version is only 0.8 seconds. Signed-off-by: Jiejian Wu <jiejian@linux.alibaba.com> Co-developed-by: Dust Li <dust.li@linux.alibaba.com> Signed-off-by: Dust Li <dust.li@linux.alibaba.com> Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Florian Westphal <fw@strlen.de> Link: https://patch.msgid.link/20260224205048.4718-2-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/ip_vs.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29a36709e7f3..074a204ec6db 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -33,6 +33,12 @@
#define IP_VS_HDR_INVERSE 1
#define IP_VS_HDR_ICMP 2
+/*
+ * Hash table: for virtual service lookups
+ */
+#define IP_VS_SVC_TAB_BITS 8
+#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS)
+#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
/* Generic access of ipvs struct */
static inline struct netns_ipvs *net_ipvs(struct net* net)
@@ -1041,6 +1047,13 @@ struct netns_ipvs {
*/
unsigned int mixed_address_family_dests;
unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */
+
+ /* the service mutex that protect svc_table and svc_fwm_table */
+ struct mutex service_mutex;
+ /* the service table hashed by <protocol, addr, port> */
+ struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE];
+ /* the service table hashed by fwmark */
+ struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE];
};
#define DEFAULT_SYNC_THRESHOLD 3