summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Wei <dw@davidwei.uk>2026-04-03 02:10:23 +0300
committerJakub Kicinski <kuba@kernel.org>2026-04-10 04:21:46 +0300
commit5602ad61ebee99c83081fba1aaf5814736edc3e7 (patch)
tree9eba10cdd163a89d4e48001f6a2b98c2c407a979
parent1e91c98bc9a8ef8198e73151b2a118cd3748925d (diff)
downloadlinux-5602ad61ebee99c83081fba1aaf5814736edc3e7.tar.xz
net: Proxy netif_mp_{open,close}_rxq for leased queues
When a process in a container wants to setup a memory provider, it will use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq to try and restart the queue. At this point, proxy the queue restart on the real rxq in the physical netdev. For memory providers (io_uring zero-copy rx and devmem), it causes the real rxq in the physical netdev to be filled from a memory provider that has DMA mapped memory from a process within a container. Signed-off-by: David Wei <dw@davidwei.uk> Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org> Link: https://patch.msgid.link/20260402231031.447597-7-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/dev.h7
-rw-r--r--net/core/netdev_rx_queue.c104
3 files changed, 95 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index cc7bcac892af..2df8a2a5ecf5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12350,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct netdev_rx_queue *rxq = &dev->_rx[i];
- struct pp_memory_provider_params *p = &rxq->mp_params;
- if (p->mp_ops && p->mp_ops->uninstall)
- p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+ __netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
}
}
diff --git a/net/core/dev.h b/net/core/dev.h
index 6516ce2b5517..95edb2d4eff8 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -12,6 +12,7 @@ struct net;
struct netlink_ext_ack;
struct netdev_queue_config;
struct cpumask;
+struct pp_memory_provider_params;
/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
@@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+ const struct pp_memory_provider_params *p);
+
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+ struct netdev_rx_queue *virt_rxq);
+
/* netdev management, shared between various uAPI entry points */
struct netdev_name_node {
struct hlist_node hlist;
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 06ac3bd5507f..1d6e7e47bf0a 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
netdev_assert_locked(rxq_dst->dev);
netdev_assert_locked(rxq_src->dev);
+ netif_rxq_cleanup_unlease(rxq_src, rxq_dst);
+
WRITE_ONCE(rxq_src->lease, NULL);
WRITE_ONCE(rxq_dst->lease, NULL);
@@ -200,24 +202,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
-int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
- const struct pp_memory_provider_params *p,
- struct netlink_ext_ack *extack)
+static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack)
{
const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
struct netdev_queue_config qcfg[2];
struct netdev_rx_queue *rxq;
int ret;
- if (!netdev_need_ops_lock(dev))
- return -EOPNOTSUPP;
-
- if (rxq_idx >= dev->real_num_rx_queues) {
- NL_SET_ERR_MSG(extack, "rx queue index out of range");
- return -ERANGE;
- }
- rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
-
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
return -EINVAL;
@@ -264,16 +257,48 @@ err_clear_mp:
return ret;
}
-void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
- const struct pp_memory_provider_params *old_p)
+int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *orig_dev = dev;
+ int ret;
+
+ if (!netdev_need_ops_lock(dev))
+ return -EOPNOTSUPP;
+
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+ rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+
+ if (!netif_rxq_is_leased(dev, rxq_idx))
+ return __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+
+ if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
+ NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
+ return -EBUSY;
+ }
+ if (!dev->dev.parent) {
+ NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+out:
+ netif_put_rx_queue_lease_locked(orig_dev, dev);
+ return ret;
+}
+
+static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *old_p)
{
struct netdev_queue_config qcfg[2];
struct netdev_rx_queue *rxq;
int err;
- if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
- return;
-
rxq = __netif_get_rx_queue(dev, ifq_idx);
/* Callers holding a netdev ref may get here after we already
@@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]);
WARN_ON(err && err != -ENETDOWN);
}
+
+void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *old_p)
+{
+ struct net_device *orig_dev = dev;
+
+ if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+ return;
+ if (!netif_rxq_is_leased(dev, ifq_idx))
+ return __netif_mp_close_rxq(dev, ifq_idx, old_p);
+
+ if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
+ return;
+
+ __netif_mp_close_rxq(dev, ifq_idx, old_p);
+ netif_put_rx_queue_lease_locked(orig_dev, dev);
+}
+
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+ const struct pp_memory_provider_params *p)
+{
+ if (p->mp_ops && p->mp_ops->uninstall)
+ p->mp_ops->uninstall(p->mp_priv, rxq);
+}
+
+/* Clean up memory provider state when a queue lease is torn down. If
+ * a memory provider was installed on the physical queue via the lease,
+ * close it now. The memory provider is a property of the queue itself,
+ * and it was _guaranteed_ to be installed on the physical queue via
+ * the lease redirection. The extra __netif_mp_close_rxq is needed
+ * since the physical queue can outlive the virtual queue in the lease
+ * case, so it needs to be reconfigured to clear the memory provider.
+ */
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+ struct netdev_rx_queue *virt_rxq)
+{
+ struct pp_memory_provider_params *p = &phys_rxq->mp_params;
+ unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);
+
+ if (!p->mp_ops)
+ return;
+
+ __netif_mp_uninstall_rxq(virt_rxq, p);
+ __netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
+}