diff options
| author | Julian Anastasov <ja@ssi.bg> | 2026-04-30 10:44:13 +0300 |
|---|---|---|
| committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2026-05-05 02:52:55 +0300 |
| commit | afbd961305eb483515650ccfcb7743608e7add78 (patch) | |
| tree | b23174b7123d7014fa1a4a1f187ffea9f31ea56c | |
| parent | bd3a4795d5744f59a1f485379f1303e5e606f377 (diff) | |
| download | linux-afbd961305eb483515650ccfcb7743608e7add78.tar.xz | |
ipvs: fixes for the new ip_vs_status info
Sashiko reports some problems for the recently added
/proc/net/ip_vs_status:
* ip_vs_status_show() as a table reader may run long after the
conn_tab and svc_table table are released. While ip_vs_conn_flush()
properly changes the conn_tab_changes counter when conn_tab is removed,
ip_vs_del_service() and ip_vs_flush() were missing such change for
the svc_table_changes counter. As result, readers like
ip_vs_dst_event() and ip_vs_status_show() may continue to use
a freed table after a cond_resched_rcu() call.
* While counting the buckets in ip_vs_status_show() make sure we
traverse only the needed number of entries in the chain. This also
prevents possible overflow of the 'count' variable.
* Add check for 'loops' to prevent infinite loops while restarting
the traversal on table change.
* While IP_VS_CONN_TAB_MAX_BITS is 20 on 32-bit platforms and
there is no risk to overflow when multiplying the number of
conn_tab buckets to 100, prefer the div_u64() helper to make
the following dividing safer.
* Use 0440 permissions for ip_vs_status to restrict the
info only to root due to the exported information for hash
distribution.
Link: https://sashiko.dev/#/patchset/20260410112352.23599-1-fw%40strlen.de
Fixes: 9a9ccef907a7 ("ipvs: add ip_vs_status info")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| -rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 51 |
1 files changed, 36 insertions, 15 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6632daa87ded..27e50afe9a54 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2032,6 +2032,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc) cancel_delayed_work_sync(&ipvs->svc_resize_work); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2078,6 +2081,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -3004,7 +3010,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) int old_gen, new_gen; u32 counts[8]; u32 bucket; - int count; + u32 count; + int loops; u32 sum1; u32 sum; int i; @@ -3020,6 +3027,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) if (!atomic_read(&ipvs->conn_count)) goto after_conns; old_gen = atomic_read(&ipvs->conn_tab_changes); + loops = 0; repeat_conn: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ @@ -3032,8 +3040,11 @@ repeat_conn: resched_score++; ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; - hlist_bl_for_each_entry_rcu(hn, e, head, node) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3042,37 +3053,41 @@ repeat_conn: new_gen = atomic_read(&ipvs->conn_tab_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_conns; old_gen = new_gen; goto repeat_conn; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_conns: t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); - seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Services:\t%u\n", count); seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); + loops = 0; repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ @@ -3086,8 +3101,11 @@ repeat_svc: ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; hlist_bl_for_each_entry_rcu(svc, e, head, - s_list) + s_list) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3096,24 +3114,27 @@ repeat_svc: new_gen = atomic_read(&ipvs->svc_table_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_svc; old_gen = new_gen; goto repeat_svc; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_svc: @@ -5039,7 +5060,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; - if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net, ip_vs_status_show, NULL)) goto err_status; #endif |
