diff options
-rw-r--r-- | Documentation/networking/netdevices.rst | 10 | ||||
-rw-r--r-- | drivers/net/netdevsim/netdev.c | 259 | ||||
-rw-r--r-- | drivers/net/netdevsim/netdevsim.h | 5 | ||||
-rw-r--r-- | net/core/dev.c | 42 | ||||
-rw-r--r-- | net/core/netdev_rx_queue.c | 1 | ||||
-rwxr-xr-x | tools/testing/selftests/net/nl_netdev.py | 19 |
6 files changed, 294 insertions, 42 deletions
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 857c9784f87e..1d37038e9fbe 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -297,3 +297,13 @@ napi->poll: Context: softirq will be called with interrupts disabled by netconsole. + +NETDEV_INTERNAL symbol namespace +================================ + +Symbols exported as NETDEV_INTERNAL can only be used in networking +core and drivers which exclusively flow via the main networking list and trees. +Note that the inverse is not true, most symbols outside of NETDEV_INTERNAL +are not expected to be used by random code outside netdev either. +Symbols may lack the designation because they predate the namespaces, +or simply due to an oversight. diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index e068a9761c09..d013b6498539 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -20,6 +20,7 @@ #include <linux/netdevice.h> #include <linux/slab.h> #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/netlink.h> #include <net/net_shaper.h> @@ -29,6 +30,8 @@ #include "netdevsim.h" +MODULE_IMPORT_NS("NETDEV_INTERNAL"); + #define NSIM_RING_SIZE 256 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb) @@ -69,7 +72,7 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = skb_get_queue_mapping(skb); if (rxq >= peer_dev->num_rx_queues) rxq = rxq % peer_dev->num_rx_queues; - rq = &peer_ns->rq[rxq]; + rq = peer_ns->rq[rxq]; skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) @@ -359,25 +362,24 @@ static int nsim_poll(struct napi_struct *napi, int budget) return done; } -static int nsim_create_page_pool(struct nsim_rq *rq) +static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi) { - struct page_pool_params p = { + struct page_pool_params params = { .order = 0, .pool_size = NSIM_RING_SIZE, .nid = NUMA_NO_NODE, - .dev = &rq->napi.dev->dev, - .napi = &rq->napi, + .dev = &napi->dev->dev, + .napi = napi, .dma_dir = DMA_BIDIRECTIONAL, - .netdev = rq->napi.dev, + .netdev = napi->dev, }; + struct page_pool *pool; - rq->page_pool = page_pool_create(&p); - if (IS_ERR(rq->page_pool)) { - int err = PTR_ERR(rq->page_pool); + pool = page_pool_create(¶ms); + if (IS_ERR(pool)) + return PTR_ERR(pool); - rq->page_pool = NULL; - return err; - } + *p = pool; return 0; } @@ -388,15 +390,15 @@ static int nsim_init_napi(struct netdevsim *ns) int err, i; for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; - netif_napi_add(dev, &rq->napi, nsim_poll); + netif_napi_add_config(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; - err = nsim_create_page_pool(rq); + err = nsim_create_page_pool(&rq->page_pool, &rq->napi); if (err) goto err_pp_destroy; } @@ -405,12 +407,12 @@ static int nsim_init_napi(struct netdevsim *ns) err_pp_destroy: while (i--) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } for (i = 0; i < dev->num_rx_queues; i++) - __netif_napi_del(&ns->rq[i].napi); + __netif_napi_del(&ns->rq[i]->napi); return err; } @@ -421,7 +423,7 @@ static void nsim_enable_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); napi_enable(&rq->napi); @@ -448,7 +450,7 @@ static void nsim_del_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; napi_disable(&rq->napi); __netif_napi_del(&rq->napi); @@ -456,8 +458,8 @@ static void nsim_del_napi(struct netdevsim *ns) synchronize_net(); for (i = 0; i < dev->num_rx_queues; i++) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } } @@ -595,6 +597,182 @@ static const struct netdev_stat_ops nsim_stat_ops = { .get_base_stats = nsim_get_base_stats, }; +static struct nsim_rq *nsim_queue_alloc(void) +{ + struct nsim_rq *rq; + + rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT); + if (!rq) + return NULL; + + skb_queue_head_init(&rq->skb_queue); + return rq; +} + +static void nsim_queue_free(struct nsim_rq *rq) +{ + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(rq); +} + +/* Queue reset mode is controlled by ns->rq_reset_mode. + * - normal - new NAPI new pool (old NAPI enabled when new added) + * - mode 1 - allocate new pool (NAPI is only disabled / enabled) + * - mode 2 - new NAPI new pool (old NAPI removed before new added) + * - mode 3 - new NAPI new pool (old NAPI disabled when new added) + */ +struct nsim_queue_mem { + struct nsim_rq *rq; + struct page_pool *pp; +}; + +static int +nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + int err; + + if (ns->rq_reset_mode > 3) + return -EINVAL; + + if (ns->rq_reset_mode == 1) + return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + + qmem->rq = nsim_queue_alloc(); + if (!qmem->rq) + return -ENOMEM; + + err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi); + if (err) + goto err_free; + + if (!ns->rq_reset_mode) + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + + return 0; + +err_free: + nsim_queue_free(qmem->rq); + return err; +} + +static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + page_pool_destroy(qmem->pp); + if (qmem->rq) { + if (!ns->rq_reset_mode) + netif_napi_del(&qmem->rq->napi); + page_pool_destroy(qmem->rq->page_pool); + nsim_queue_free(qmem->rq); + } +} + +static int +nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + if (ns->rq_reset_mode == 1) { + ns->rq[idx]->page_pool = qmem->pp; + napi_enable(&ns->rq[idx]->napi); + return 0; + } + + /* netif_napi_add()/_del() should normally be called from alloc/free, + * here we want to test various call orders. + */ + if (ns->rq_reset_mode == 2) { + netif_napi_del(&ns->rq[idx]->napi); + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + } else if (ns->rq_reset_mode == 3) { + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_del(&ns->rq[idx]->napi); + } + + ns->rq[idx] = qmem->rq; + napi_enable(&ns->rq[idx]->napi); + + return 0; +} + +static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + napi_disable(&ns->rq[idx]->napi); + + if (ns->rq_reset_mode == 1) { + qmem->pp = ns->rq[idx]->page_pool; + page_pool_disable_direct_recycling(qmem->pp); + } else { + qmem->rq = ns->rq[idx]; + } + + return 0; +} + +static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct nsim_queue_mem), + .ndo_queue_mem_alloc = nsim_queue_mem_alloc, + .ndo_queue_mem_free = nsim_queue_mem_free, + .ndo_queue_start = nsim_queue_start, + .ndo_queue_stop = nsim_queue_stop, +}; + +static ssize_t +nsim_qreset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + unsigned int queue, mode; + char buf[32]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -EINVAL; + if (copy_from_user(buf, data, count)) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &queue, &mode); + if (ret != 2) + return -EINVAL; + + rtnl_lock(); + if (!netif_running(ns->netdev)) { + ret = -ENETDOWN; + goto exit_unlock; + } + + if (queue >= ns->netdev->real_num_rx_queues) { + ret = -EINVAL; + goto exit_unlock; + } + + ns->rq_reset_mode = mode; + ret = netdev_rx_queue_restart(ns->netdev, queue); + ns->rq_reset_mode = 0; + if (ret) + goto exit_unlock; + + ret = count; +exit_unlock: + rtnl_unlock(); + return ret; +} + +static const struct file_operations nsim_qreset_fops = { + .open = simple_open, + .write = nsim_qreset_write, + .owner = THIS_MODULE, +}; + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -628,7 +806,7 @@ nsim_pp_hold_write(struct file *file, const char __user *data, if (!netif_running(ns->netdev) && val) { ret = -ENETDOWN; } else if (val) { - ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool); + ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool); if (!ns->page) ret = -ENOMEM; } else { @@ -677,27 +855,35 @@ static int nsim_queue_init(struct netdevsim *ns) struct net_device *dev = ns->netdev; int i; - ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq), - GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq), + GFP_KERNEL_ACCOUNT); if (!ns->rq) return -ENOMEM; - for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_head_init(&ns->rq[i].skb_queue); + for (i = 0; i < dev->num_rx_queues; i++) { + ns->rq[i] = nsim_queue_alloc(); + if (!ns->rq[i]) + goto err_free_prev; + } return 0; + +err_free_prev: + while (i--) + kfree(ns->rq[i]); + kfree(ns->rq); + return -ENOMEM; } -static void nsim_queue_free(struct netdevsim *ns) +static void nsim_queue_uninit(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_purge_reason(&ns->rq[i].skb_queue, - SKB_DROP_REASON_QUEUE_PURGE); + nsim_queue_free(ns->rq[i]); - kvfree(ns->rq); + kfree(ns->rq); ns->rq = NULL; } @@ -713,6 +899,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) ns->phc = phc; ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; + ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) @@ -741,7 +928,7 @@ err_ipsec_teardown: nsim_macsec_teardown(ns); nsim_bpf_uninit(ns); err_rq_destroy: - nsim_queue_free(ns); + nsim_queue_uninit(ns); err_utn_destroy: rtnl_unlock(); nsim_udp_tunnels_info_destroy(ns->netdev); @@ -798,6 +985,9 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir, ns, &nsim_pp_hold_fops); + ns->qr_dfs = debugfs_create_file("queue_reset", 0200, + nsim_dev_port->ddir, ns, + &nsim_qreset_fops); return ns; @@ -811,6 +1001,7 @@ void nsim_destroy(struct netdevsim *ns) struct net_device *dev = ns->netdev; struct netdevsim *peer; + debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); rtnl_lock(); @@ -823,7 +1014,7 @@ void nsim_destroy(struct netdevsim *ns) nsim_macsec_teardown(ns); nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); - nsim_queue_free(ns); + nsim_queue_uninit(ns); } rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index bf02efa10956..a70f62af4c88 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -101,7 +101,9 @@ struct netdevsim { struct nsim_dev *nsim_dev; struct nsim_dev_port *nsim_dev_port; struct mock_phc *phc; - struct nsim_rq *rq; + struct nsim_rq **rq; + + int rq_reset_mode; u64 tx_packets; u64 tx_bytes; @@ -134,6 +136,7 @@ struct netdevsim { struct page *page; struct dentry *pp_dfs; + struct dentry *qr_dfs; struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; diff --git a/net/core/dev.c b/net/core/dev.c index 983c24927316..26f0c2fbb8aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6736,13 +6736,14 @@ static void napi_restore_config(struct napi_struct *n) n->gro_flush_timeout = n->config->gro_flush_timeout; n->irq_suspend_timeout = n->config->irq_suspend_timeout; /* a NAPI ID might be stored in the config, if so use it. if not, use - * napi_hash_add to generate one for us. It will be saved to the config - * in napi_disable. + * napi_hash_add to generate one for us. */ - if (n->config->napi_id) + if (n->config->napi_id) { napi_hash_add_with_id(n, n->config->napi_id); - else + } else { napi_hash_add(n); + n->config->napi_id = n->napi_id; + } } static void napi_save_config(struct napi_struct *n) @@ -6750,10 +6751,39 @@ static void napi_save_config(struct napi_struct *n) n->config->defer_hard_irqs = n->defer_hard_irqs; n->config->gro_flush_timeout = n->gro_flush_timeout; n->config->irq_suspend_timeout = n->irq_suspend_timeout; - n->config->napi_id = n->napi_id; napi_hash_del(n); } +/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will + * inherit an existing ID try to insert it at the right position. + */ +static void +netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) +{ + unsigned int new_id, pos_id; + struct list_head *higher; + struct napi_struct *pos; + + new_id = UINT_MAX; + if (napi->config && napi->config->napi_id) + new_id = napi->config->napi_id; + + higher = &dev->napi_list; + list_for_each_entry(pos, &dev->napi_list, dev_list) { + if (pos->napi_id >= MIN_NAPI_ID) + pos_id = pos->napi_id; + else if (pos->config) + pos_id = pos->config->napi_id; + else + pos_id = UINT_MAX; + + if (pos_id <= new_id) + break; + higher = &pos->dev_list; + } + list_add_rcu(&napi->dev_list, higher); /* adds after higher */ +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6780,7 +6810,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, napi->list_owner = -1; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); - list_add_rcu(&napi->dev_list, &dev->napi_list); + netif_napi_dev_list_add(dev, napi); /* default settings from sysfs are applied to all NAPIs. any per-NAPI * configuration will be loaded in napi_enable diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index e217a5838c87..db82786fa0c4 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -79,3 +79,4 @@ err_free_new_mem: return err; } +EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93d9d914529b..93e8cb671c3d 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -18,6 +18,23 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -89,7 +106,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], args=(nf, )) ksft_exit() |