Merge branch 'net-prevent-rps-table-overwrite-of-active-flows'

Krishna Kumar says: ==================== net: Prevent RPS table overwrite of active flows This series splits the original RPS patch [1] into two patches for net-next. It also addresses a kernel test robot warning by defining rps_flow_is_active() only when aRFS is enabled. I tested v3 with four builds and reboots: two for [PATCH 1/2] with aRFS enabled & disabled, and two for [PATCH 2/2]. There are no code changes in v4 and v5, only documentation. Patch v6 has one line change to keep 'hash' field under #ifdef, and was test built with aRFS=on and aRFS=off. The same two builds were done for v7, along with 15m load testing with aRFS=on to ensure the new changes are correct. The first patch prevents RPS table overwrite for active flows thereby improving aRFS stability. The second patch caches hash & flow_id in get_rps_cpu() to avoid recalculating it in set_rps_cpu(). [1] lore.kernel.org/netdev/20250708081516.53048-1-krikku@gmail.com/ [2] lore.kernel.org/netdev/20250729104109.1687418-1-krikku@gmail.com/ ==================== Link: https://patch.msgid.link/20250825031005.3674864-1-krikku@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2025-08-28 04:23:04 +0300
committer: Jakub Kicinski <kuba@kernel.org> 2025-08-28 04:24:22 +0300
commit: 86b26768167ad030b9d4885d484e08a30e6b8df9 (patch)
tree: f7d2eb1ee55f69d071264ed6ae368109de4eae5e /net/core/dev.c
parent: f0c88a0d83b26bcfbb3463d3a283bc08007a5ae0 (diff)
parent: 48aa30443e52c9666d5cd5e67532e475f212337e (diff)
download: linux-86b26768167ad030b9d4885d484e08a30e6b8df9.tar.xz
1 files changed, 60 insertions, 11 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 93a25d87b86b..1d1650d9ecff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4849,9 +4849,40 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
 	return hash_32(hash, flow_table->log);
 }
 
+#ifdef CONFIG_RFS_ACCEL
+/**
+ * rps_flow_is_active - check whether the flow is recently active.
+ * @rflow: Specific flow to check activity.
+ * @flow_table: per-queue flowtable that @rflow belongs to.
+ * @cpu: CPU saved in @rflow.
+ *
+ * If the CPU has processed many packets since the flow's last activity
+ * (beyond 10 times the table size), the flow is considered stale.
+ *
+ * Return: true if flow was recently active.
+ */
+static bool rps_flow_is_active(struct rps_dev_flow *rflow,
+			       struct rps_dev_flow_table *flow_table,
+			       unsigned int cpu)
+{
+	unsigned int flow_last_active;
+	unsigned int sd_input_head;
+
+	if (cpu >= nr_cpu_ids)
+		return false;
+
+	sd_input_head = READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head);
+	flow_last_active = READ_ONCE(rflow->last_qtail);
+
+	return (int)(sd_input_head - flow_last_active) <
+		(int)(10 << flow_table->log);
+}
+#endif
+
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
-	    struct rps_dev_flow *rflow, u16 next_cpu)
+	    struct rps_dev_flow *rflow, u16 next_cpu, u32 hash,
+	    u32 flow_id)
 {
 	if (next_cpu < nr_cpu_ids) {
 		u32 head;
@@ -4859,8 +4890,9 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		struct netdev_rx_queue *rxqueue;
 		struct rps_dev_flow_table *flow_table;
 		struct rps_dev_flow *old_rflow;
+		struct rps_dev_flow *tmp_rflow;
+		unsigned int tmp_cpu;
 		u16 rxq_index;
-		u32 flow_id;
 		int rc;
 
 		/* Should we steer this flow to a different hardware queue? */
@@ -4875,14 +4907,29 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
 		if (!flow_table)
 			goto out;
-		flow_id = rfs_slot(skb_get_hash(skb), flow_table);
+
+		tmp_rflow = &flow_table->flows[flow_id];
+		tmp_cpu = READ_ONCE(tmp_rflow->cpu);
+
+		if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) {
+			if (rps_flow_is_active(tmp_rflow, flow_table,
+					       tmp_cpu)) {
+				if (hash != READ_ONCE(tmp_rflow->hash) ||
+				    next_cpu == tmp_cpu)
+					goto out;
+			}
+		}
+
 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
 							rxq_index, flow_id);
 		if (rc < 0)
 			goto out;
+
 		old_rflow = rflow;
-		rflow = &flow_table->flows[flow_id];
+		rflow = tmp_rflow;
 		WRITE_ONCE(rflow->filter, rc);
+		WRITE_ONCE(rflow->hash, hash);
+
 		if (old_rflow->filter == rc)
 			WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER);
 	out:
@@ -4908,6 +4955,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	struct rps_dev_flow_table *flow_table;
 	struct rps_map *map;
 	int cpu = -1;
+	u32 flow_id;
 	u32 tcpu;
 	u32 hash;
 
@@ -4954,7 +5002,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		/* OK, now we know there is a match,
 		 * we can look at the local (per receive queue) flow table
 		 */
-		rflow = &flow_table->flows[rfs_slot(hash, flow_table)];
+		flow_id = rfs_slot(hash, flow_table);
+		rflow = &flow_table->flows[flow_id];
 		tcpu = rflow->cpu;
 
 		/*
@@ -4973,7 +5022,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		     ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
 		      rflow->last_qtail)) >= 0)) {
 			tcpu = next_cpu;
-			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+			rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash,
+					    flow_id);
 		}
 
 		if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
@@ -5017,17 +5067,16 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 	struct rps_dev_flow_table *flow_table;
 	struct rps_dev_flow *rflow;
 	bool expire = true;
-	unsigned int cpu;
 
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id < (1UL << flow_table->log)) {
+		unsigned int cpu;
+
 		rflow = &flow_table->flows[flow_id];
 		cpu = READ_ONCE(rflow->cpu);
-		if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
-		    ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
-			   READ_ONCE(rflow->last_qtail)) <
-		     (int)(10 << flow_table->log)))
+		if (READ_ONCE(rflow->filter) == filter_id &&
+		    rps_flow_is_active(rflow, flow_table, cpu))
 			expire = false;
 	}
 	rcu_read_unlock();
author	Jakub Kicinski <kuba@kernel.org>	2025-08-28 04:23:04 +0300
committer	Jakub Kicinski <kuba@kernel.org>	2025-08-28 04:24:22 +0300
commit	86b26768167ad030b9d4885d484e08a30e6b8df9 (patch)
tree	f7d2eb1ee55f69d071264ed6ae368109de4eae5e /net/core/dev.c
parent	f0c88a0d83b26bcfbb3463d3a283bc08007a5ae0 (diff)
parent	48aa30443e52c9666d5cd5e67532e475f212337e (diff)
download	linux-86b26768167ad030b9d4885d484e08a30e6b8df9.tar.xz