summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/netdevices.rst10
-rw-r--r--drivers/net/netdevsim/netdev.c259
-rw-r--r--drivers/net/netdevsim/netdevsim.h5
-rw-r--r--net/core/dev.c42
-rw-r--r--net/core/netdev_rx_queue.c1
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py19
6 files changed, 294 insertions, 42 deletions
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 857c9784f87e..1d37038e9fbe 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -297,3 +297,13 @@ napi->poll:
Context:
softirq
will be called with interrupts disabled by netconsole.
+
+NETDEV_INTERNAL symbol namespace
+================================
+
+Symbols exported as NETDEV_INTERNAL can only be used in networking
+core and drivers which exclusively flow via the main networking list and trees.
+Note that the inverse is not true, most symbols outside of NETDEV_INTERNAL
+are not expected to be used by random code outside netdev either.
+Symbols may lack the designation because they predate the namespaces,
+or simply due to an oversight.
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index e068a9761c09..d013b6498539 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -20,6 +20,7 @@
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/netlink.h>
#include <net/net_shaper.h>
@@ -29,6 +30,8 @@
#include "netdevsim.h"
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
+
#define NSIM_RING_SIZE 256
static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
@@ -69,7 +72,7 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb);
if (rxq >= peer_dev->num_rx_queues)
rxq = rxq % peer_dev->num_rx_queues;
- rq = &peer_ns->rq[rxq];
+ rq = peer_ns->rq[rxq];
skb_tx_timestamp(skb);
if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
@@ -359,25 +362,24 @@ static int nsim_poll(struct napi_struct *napi, int budget)
return done;
}
-static int nsim_create_page_pool(struct nsim_rq *rq)
+static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
{
- struct page_pool_params p = {
+ struct page_pool_params params = {
.order = 0,
.pool_size = NSIM_RING_SIZE,
.nid = NUMA_NO_NODE,
- .dev = &rq->napi.dev->dev,
- .napi = &rq->napi,
+ .dev = &napi->dev->dev,
+ .napi = napi,
.dma_dir = DMA_BIDIRECTIONAL,
- .netdev = rq->napi.dev,
+ .netdev = napi->dev,
};
+ struct page_pool *pool;
- rq->page_pool = page_pool_create(&p);
- if (IS_ERR(rq->page_pool)) {
- int err = PTR_ERR(rq->page_pool);
+ pool = page_pool_create(&params);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
- rq->page_pool = NULL;
- return err;
- }
+ *p = pool;
return 0;
}
@@ -388,15 +390,15 @@ static int nsim_init_napi(struct netdevsim *ns)
int err, i;
for (i = 0; i < dev->num_rx_queues; i++) {
- rq = &ns->rq[i];
+ rq = ns->rq[i];
- netif_napi_add(dev, &rq->napi, nsim_poll);
+ netif_napi_add_config(dev, &rq->napi, nsim_poll, i);
}
for (i = 0; i < dev->num_rx_queues; i++) {
- rq = &ns->rq[i];
+ rq = ns->rq[i];
- err = nsim_create_page_pool(rq);
+ err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
if (err)
goto err_pp_destroy;
}
@@ -405,12 +407,12 @@ static int nsim_init_napi(struct netdevsim *ns)
err_pp_destroy:
while (i--) {
- page_pool_destroy(ns->rq[i].page_pool);
- ns->rq[i].page_pool = NULL;
+ page_pool_destroy(ns->rq[i]->page_pool);
+ ns->rq[i]->page_pool = NULL;
}
for (i = 0; i < dev->num_rx_queues; i++)
- __netif_napi_del(&ns->rq[i].napi);
+ __netif_napi_del(&ns->rq[i]->napi);
return err;
}
@@ -421,7 +423,7 @@ static void nsim_enable_napi(struct netdevsim *ns)
int i;
for (i = 0; i < dev->num_rx_queues; i++) {
- struct nsim_rq *rq = &ns->rq[i];
+ struct nsim_rq *rq = ns->rq[i];
netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
napi_enable(&rq->napi);
@@ -448,7 +450,7 @@ static void nsim_del_napi(struct netdevsim *ns)
int i;
for (i = 0; i < dev->num_rx_queues; i++) {
- struct nsim_rq *rq = &ns->rq[i];
+ struct nsim_rq *rq = ns->rq[i];
napi_disable(&rq->napi);
__netif_napi_del(&rq->napi);
@@ -456,8 +458,8 @@ static void nsim_del_napi(struct netdevsim *ns)
synchronize_net();
for (i = 0; i < dev->num_rx_queues; i++) {
- page_pool_destroy(ns->rq[i].page_pool);
- ns->rq[i].page_pool = NULL;
+ page_pool_destroy(ns->rq[i]->page_pool);
+ ns->rq[i]->page_pool = NULL;
}
}
@@ -595,6 +597,182 @@ static const struct netdev_stat_ops nsim_stat_ops = {
.get_base_stats = nsim_get_base_stats,
};
+static struct nsim_rq *nsim_queue_alloc(void)
+{
+ struct nsim_rq *rq;
+
+ rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
+ if (!rq)
+ return NULL;
+
+ skb_queue_head_init(&rq->skb_queue);
+ return rq;
+}
+
+static void nsim_queue_free(struct nsim_rq *rq)
+{
+ skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
+ kfree(rq);
+}
+
+/* Queue reset mode is controlled by ns->rq_reset_mode.
+ * - normal - new NAPI new pool (old NAPI enabled when new added)
+ * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
+ * - mode 2 - new NAPI new pool (old NAPI removed before new added)
+ * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
+ */
+struct nsim_queue_mem {
+ struct nsim_rq *rq;
+ struct page_pool *pp;
+};
+
+static int
+nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+ int err;
+
+ if (ns->rq_reset_mode > 3)
+ return -EINVAL;
+
+ if (ns->rq_reset_mode == 1)
+ return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
+
+ qmem->rq = nsim_queue_alloc();
+ if (!qmem->rq)
+ return -ENOMEM;
+
+ err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
+ if (err)
+ goto err_free;
+
+ if (!ns->rq_reset_mode)
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+
+ return 0;
+
+err_free:
+ nsim_queue_free(qmem->rq);
+ return err;
+}
+
+static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ page_pool_destroy(qmem->pp);
+ if (qmem->rq) {
+ if (!ns->rq_reset_mode)
+ netif_napi_del(&qmem->rq->napi);
+ page_pool_destroy(qmem->rq->page_pool);
+ nsim_queue_free(qmem->rq);
+ }
+}
+
+static int
+nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (ns->rq_reset_mode == 1) {
+ ns->rq[idx]->page_pool = qmem->pp;
+ napi_enable(&ns->rq[idx]->napi);
+ return 0;
+ }
+
+ /* netif_napi_add()/_del() should normally be called from alloc/free,
+ * here we want to test various call orders.
+ */
+ if (ns->rq_reset_mode == 2) {
+ netif_napi_del(&ns->rq[idx]->napi);
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+ } else if (ns->rq_reset_mode == 3) {
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+ netif_napi_del(&ns->rq[idx]->napi);
+ }
+
+ ns->rq[idx] = qmem->rq;
+ napi_enable(&ns->rq[idx]->napi);
+
+ return 0;
+}
+
+static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ napi_disable(&ns->rq[idx]->napi);
+
+ if (ns->rq_reset_mode == 1) {
+ qmem->pp = ns->rq[idx]->page_pool;
+ page_pool_disable_direct_recycling(qmem->pp);
+ } else {
+ qmem->rq = ns->rq[idx];
+ }
+
+ return 0;
+}
+
+static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
+ .ndo_queue_mem_size = sizeof(struct nsim_queue_mem),
+ .ndo_queue_mem_alloc = nsim_queue_mem_alloc,
+ .ndo_queue_mem_free = nsim_queue_mem_free,
+ .ndo_queue_start = nsim_queue_start,
+ .ndo_queue_stop = nsim_queue_stop,
+};
+
+static ssize_t
+nsim_qreset_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct netdevsim *ns = file->private_data;
+ unsigned int queue, mode;
+ char buf[32];
+ ssize_t ret;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+ if (copy_from_user(buf, data, count))
+ return -EFAULT;
+ buf[count] = '\0';
+
+ ret = sscanf(buf, "%u %u", &queue, &mode);
+ if (ret != 2)
+ return -EINVAL;
+
+ rtnl_lock();
+ if (!netif_running(ns->netdev)) {
+ ret = -ENETDOWN;
+ goto exit_unlock;
+ }
+
+ if (queue >= ns->netdev->real_num_rx_queues) {
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ns->rq_reset_mode = mode;
+ ret = netdev_rx_queue_restart(ns->netdev, queue);
+ ns->rq_reset_mode = 0;
+ if (ret)
+ goto exit_unlock;
+
+ ret = count;
+exit_unlock:
+ rtnl_unlock();
+ return ret;
+}
+
+static const struct file_operations nsim_qreset_fops = {
+ .open = simple_open,
+ .write = nsim_qreset_write,
+ .owner = THIS_MODULE,
+};
+
static ssize_t
nsim_pp_hold_read(struct file *file, char __user *data,
size_t count, loff_t *ppos)
@@ -628,7 +806,7 @@ nsim_pp_hold_write(struct file *file, const char __user *data,
if (!netif_running(ns->netdev) && val) {
ret = -ENETDOWN;
} else if (val) {
- ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool);
+ ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
if (!ns->page)
ret = -ENOMEM;
} else {
@@ -677,27 +855,35 @@ static int nsim_queue_init(struct netdevsim *ns)
struct net_device *dev = ns->netdev;
int i;
- ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq),
- GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+ ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
+ GFP_KERNEL_ACCOUNT);
if (!ns->rq)
return -ENOMEM;
- for (i = 0; i < dev->num_rx_queues; i++)
- skb_queue_head_init(&ns->rq[i].skb_queue);
+ for (i = 0; i < dev->num_rx_queues; i++) {
+ ns->rq[i] = nsim_queue_alloc();
+ if (!ns->rq[i])
+ goto err_free_prev;
+ }
return 0;
+
+err_free_prev:
+ while (i--)
+ kfree(ns->rq[i]);
+ kfree(ns->rq);
+ return -ENOMEM;
}
-static void nsim_queue_free(struct netdevsim *ns)
+static void nsim_queue_uninit(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
int i;
for (i = 0; i < dev->num_rx_queues; i++)
- skb_queue_purge_reason(&ns->rq[i].skb_queue,
- SKB_DROP_REASON_QUEUE_PURGE);
+ nsim_queue_free(ns->rq[i]);
- kvfree(ns->rq);
+ kfree(ns->rq);
ns->rq = NULL;
}
@@ -713,6 +899,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
ns->phc = phc;
ns->netdev->netdev_ops = &nsim_netdev_ops;
ns->netdev->stat_ops = &nsim_stat_ops;
+ ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
if (err)
@@ -741,7 +928,7 @@ err_ipsec_teardown:
nsim_macsec_teardown(ns);
nsim_bpf_uninit(ns);
err_rq_destroy:
- nsim_queue_free(ns);
+ nsim_queue_uninit(ns);
err_utn_destroy:
rtnl_unlock();
nsim_udp_tunnels_info_destroy(ns->netdev);
@@ -798,6 +985,9 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
ns, &nsim_pp_hold_fops);
+ ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
+ nsim_dev_port->ddir, ns,
+ &nsim_qreset_fops);
return ns;
@@ -811,6 +1001,7 @@ void nsim_destroy(struct netdevsim *ns)
struct net_device *dev = ns->netdev;
struct netdevsim *peer;
+ debugfs_remove(ns->qr_dfs);
debugfs_remove(ns->pp_dfs);
rtnl_lock();
@@ -823,7 +1014,7 @@ void nsim_destroy(struct netdevsim *ns)
nsim_macsec_teardown(ns);
nsim_ipsec_teardown(ns);
nsim_bpf_uninit(ns);
- nsim_queue_free(ns);
+ nsim_queue_uninit(ns);
}
rtnl_unlock();
if (nsim_dev_port_is_pf(ns->nsim_dev_port))
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index bf02efa10956..a70f62af4c88 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -101,7 +101,9 @@ struct netdevsim {
struct nsim_dev *nsim_dev;
struct nsim_dev_port *nsim_dev_port;
struct mock_phc *phc;
- struct nsim_rq *rq;
+ struct nsim_rq **rq;
+
+ int rq_reset_mode;
u64 tx_packets;
u64 tx_bytes;
@@ -134,6 +136,7 @@ struct netdevsim {
struct page *page;
struct dentry *pp_dfs;
+ struct dentry *qr_dfs;
struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;
diff --git a/net/core/dev.c b/net/core/dev.c
index 983c24927316..26f0c2fbb8aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6736,13 +6736,14 @@ static void napi_restore_config(struct napi_struct *n)
n->gro_flush_timeout = n->config->gro_flush_timeout;
n->irq_suspend_timeout = n->config->irq_suspend_timeout;
/* a NAPI ID might be stored in the config, if so use it. if not, use
- * napi_hash_add to generate one for us. It will be saved to the config
- * in napi_disable.
+ * napi_hash_add to generate one for us.
*/
- if (n->config->napi_id)
+ if (n->config->napi_id) {
napi_hash_add_with_id(n, n->config->napi_id);
- else
+ } else {
napi_hash_add(n);
+ n->config->napi_id = n->napi_id;
+ }
}
static void napi_save_config(struct napi_struct *n)
@@ -6750,10 +6751,39 @@ static void napi_save_config(struct napi_struct *n)
n->config->defer_hard_irqs = n->defer_hard_irqs;
n->config->gro_flush_timeout = n->gro_flush_timeout;
n->config->irq_suspend_timeout = n->irq_suspend_timeout;
- n->config->napi_id = n->napi_id;
napi_hash_del(n);
}
+/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will
+ * inherit an existing ID try to insert it at the right position.
+ */
+static void
+netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
+{
+ unsigned int new_id, pos_id;
+ struct list_head *higher;
+ struct napi_struct *pos;
+
+ new_id = UINT_MAX;
+ if (napi->config && napi->config->napi_id)
+ new_id = napi->config->napi_id;
+
+ higher = &dev->napi_list;
+ list_for_each_entry(pos, &dev->napi_list, dev_list) {
+ if (pos->napi_id >= MIN_NAPI_ID)
+ pos_id = pos->napi_id;
+ else if (pos->config)
+ pos_id = pos->config->napi_id;
+ else
+ pos_id = UINT_MAX;
+
+ if (pos_id <= new_id)
+ break;
+ higher = &pos->dev_list;
+ }
+ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6780,7 +6810,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
napi->list_owner = -1;
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
- list_add_rcu(&napi->dev_list, &dev->napi_list);
+ netif_napi_dev_list_add(dev, napi);
/* default settings from sysfs are applied to all NAPIs. any per-NAPI
* configuration will be loaded in napi_enable
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index e217a5838c87..db82786fa0c4 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -79,3 +79,4 @@ err_free_new_mem:
return err;
}
+EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..93e8cb671c3d 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -18,6 +18,23 @@ def lo_check(nf) -> None:
ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+def napi_list_check(nf) -> None:
+ with NetdevSimDev(queue_count=100) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100)
+
+ for q in [50, 0, 99]:
+ for i in range(4):
+ nsim.dfs_write("queue_reset", f"{q} {i}")
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100,
+ comment=f"queue count after reset queue {q} mode {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -89,7 +106,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
args=(nf, ))
ksft_exit()