From b9b8301d369b4c876de5255dbf067b19ba88ac71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Dec 2024 08:37:03 +0000 Subject: net: netdevsim: fix nsim_pp_hold_write() nsim_pp_hold_write() has two problems: 1) It may return with rtnl held, as found by syzbot. 2) Its return value does not propagate an error if any. Fixes: 1580cbcbfe77 ("net: netdevsim: add some fake page pool use") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216083703.1859921-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0be47fed4efc..e068a9761c09 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -635,10 +635,10 @@ nsim_pp_hold_write(struct file *file, const char __user *data, page_pool_put_full_page(ns->page->pp, ns->page, false); ns->page = NULL; } - rtnl_unlock(); exit: - return count; + rtnl_unlock(); + return ret; } static const struct file_operations nsim_pp_hold_fops = { -- cgit v1.2.3 From 00adf88b186fa09a0b4005bdcf440aa2f926a75b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:41 -0800 Subject: netdevsim: support NAPI config Link the NAPI instances to their configs. This will be needed to test that NAPI config doesn't break list ordering. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index e068a9761c09..a4aacd372cdd 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -390,7 +390,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = &ns->rq[i]; - netif_napi_add(dev, &rq->napi, nsim_poll); + netif_napi_add_config(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { -- cgit v1.2.3 From 915c82f842f955429e347a53ce005604ad421343 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:42 -0800 Subject: netdevsim: allocate rqs individually Make nsim->rqs an array of pointers and allocate them individually so that we can swap them out one by one. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 49 +++++++++++++++++++++++++-------------- drivers/net/netdevsim/netdevsim.h | 2 +- 2 files changed, 32 insertions(+), 19 deletions(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a4aacd372cdd..7fa75f37ec49 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -69,7 +69,7 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = skb_get_queue_mapping(skb); if (rxq >= peer_dev->num_rx_queues) rxq = rxq % peer_dev->num_rx_queues; - rq = &peer_ns->rq[rxq]; + rq = peer_ns->rq[rxq]; skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) @@ -388,13 +388,13 @@ static int nsim_init_napi(struct netdevsim *ns) int err, i; for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; netif_napi_add_config(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { - rq = &ns->rq[i]; + rq = ns->rq[i]; err = nsim_create_page_pool(rq); if (err) @@ -405,12 +405,12 @@ static int nsim_init_napi(struct netdevsim *ns) err_pp_destroy: while (i--) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } for (i = 0; i < dev->num_rx_queues; i++) - __netif_napi_del(&ns->rq[i].napi); + __netif_napi_del(&ns->rq[i]->napi); return err; } @@ -421,7 +421,7 @@ static void nsim_enable_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); napi_enable(&rq->napi); @@ -448,7 +448,7 @@ static void nsim_del_napi(struct netdevsim *ns) int i; for (i = 0; i < dev->num_rx_queues; i++) { - struct nsim_rq *rq = &ns->rq[i]; + struct nsim_rq *rq = ns->rq[i]; napi_disable(&rq->napi); __netif_napi_del(&rq->napi); @@ -456,8 +456,8 @@ static void nsim_del_napi(struct netdevsim *ns) synchronize_net(); for (i = 0; i < dev->num_rx_queues; i++) { - page_pool_destroy(ns->rq[i].page_pool); - ns->rq[i].page_pool = NULL; + page_pool_destroy(ns->rq[i]->page_pool); + ns->rq[i]->page_pool = NULL; } } @@ -628,7 +628,7 @@ nsim_pp_hold_write(struct file *file, const char __user *data, if (!netif_running(ns->netdev) && val) { ret = -ENETDOWN; } else if (val) { - ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool); + ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool); if (!ns->page) ret = -ENOMEM; } else { @@ -677,15 +677,26 @@ static int nsim_queue_init(struct netdevsim *ns) struct net_device *dev = ns->netdev; int i; - ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq), - GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq), + GFP_KERNEL_ACCOUNT); if (!ns->rq) return -ENOMEM; - for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_head_init(&ns->rq[i].skb_queue); + for (i = 0; i < dev->num_rx_queues; i++) { + ns->rq[i] = kzalloc(sizeof(**ns->rq), GFP_KERNEL_ACCOUNT); + if (!ns->rq[i]) + goto err_free_prev; + + skb_queue_head_init(&ns->rq[i]->skb_queue); + } return 0; + +err_free_prev: + while (i--) + kfree(ns->rq[i]); + kfree(ns->rq); + return -ENOMEM; } static void nsim_queue_free(struct netdevsim *ns) @@ -693,11 +704,13 @@ static void nsim_queue_free(struct netdevsim *ns) struct net_device *dev = ns->netdev; int i; - for (i = 0; i < dev->num_rx_queues; i++) - skb_queue_purge_reason(&ns->rq[i].skb_queue, + for (i = 0; i < dev->num_rx_queues; i++) { + skb_queue_purge_reason(&ns->rq[i]->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(ns->rq[i]); + } - kvfree(ns->rq); + kfree(ns->rq); ns->rq = NULL; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index bf02efa10956..80fde64f4a7a 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -101,7 +101,7 @@ struct netdevsim { struct nsim_dev *nsim_dev; struct nsim_dev_port *nsim_dev_port; struct mock_phc *phc; - struct nsim_rq *rq; + struct nsim_rq **rq; u64 tx_packets; u64 tx_bytes; -- cgit v1.2.3 From a565dd04a120c03891b6fb504f342159b446f463 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:43 -0800 Subject: netdevsim: add queue alloc/free helpers We'll need the code to allocate and free queues in the queue management API, factor it out. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7fa75f37ec49..7b80796dbe26 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -595,6 +595,24 @@ static const struct netdev_stat_ops nsim_stat_ops = { .get_base_stats = nsim_get_base_stats, }; +static struct nsim_rq *nsim_queue_alloc(void) +{ + struct nsim_rq *rq; + + rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT); + if (!rq) + return NULL; + + skb_queue_head_init(&rq->skb_queue); + return rq; +} + +static void nsim_queue_free(struct nsim_rq *rq) +{ + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); + kfree(rq); +} + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -683,11 +701,9 @@ static int nsim_queue_init(struct netdevsim *ns) return -ENOMEM; for (i = 0; i < dev->num_rx_queues; i++) { - ns->rq[i] = kzalloc(sizeof(**ns->rq), GFP_KERNEL_ACCOUNT); + ns->rq[i] = nsim_queue_alloc(); if (!ns->rq[i]) goto err_free_prev; - - skb_queue_head_init(&ns->rq[i]->skb_queue); } return 0; @@ -699,16 +715,13 @@ err_free_prev: return -ENOMEM; } -static void nsim_queue_free(struct netdevsim *ns) +static void nsim_queue_uninit(struct netdevsim *ns) { struct net_device *dev = ns->netdev; int i; - for (i = 0; i < dev->num_rx_queues; i++) { - skb_queue_purge_reason(&ns->rq[i]->skb_queue, - SKB_DROP_REASON_QUEUE_PURGE); - kfree(ns->rq[i]); - } + for (i = 0; i < dev->num_rx_queues; i++) + nsim_queue_free(ns->rq[i]); kfree(ns->rq); ns->rq = NULL; @@ -754,7 +767,7 @@ err_ipsec_teardown: nsim_macsec_teardown(ns); nsim_bpf_uninit(ns); err_rq_destroy: - nsim_queue_free(ns); + nsim_queue_uninit(ns); err_utn_destroy: rtnl_unlock(); nsim_udp_tunnels_info_destroy(ns->netdev); @@ -836,7 +849,7 @@ void nsim_destroy(struct netdevsim *ns) nsim_macsec_teardown(ns); nsim_ipsec_teardown(ns); nsim_bpf_uninit(ns); - nsim_queue_free(ns); + nsim_queue_uninit(ns); } rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) -- cgit v1.2.3 From 5bc8e8dbef27b73bd7b6d1fd5108b4fd4c6d469f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:44 -0800 Subject: netdevsim: add queue management API support Add queue management API support. We need a way to reset queues to test NAPI reordering, the queue management API provides a handy scaffolding for that. Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: Jakub Kicinski Reviewed-by: Mina Almasry Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 134 ++++++++++++++++++++++++++++++++++---- drivers/net/netdevsim/netdevsim.h | 2 + 2 files changed, 124 insertions(+), 12 deletions(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7b80796dbe26..cfb079a34532 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -359,25 +359,24 @@ static int nsim_poll(struct napi_struct *napi, int budget) return done; } -static int nsim_create_page_pool(struct nsim_rq *rq) +static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi) { - struct page_pool_params p = { + struct page_pool_params params = { .order = 0, .pool_size = NSIM_RING_SIZE, .nid = NUMA_NO_NODE, - .dev = &rq->napi.dev->dev, - .napi = &rq->napi, + .dev = &napi->dev->dev, + .napi = napi, .dma_dir = DMA_BIDIRECTIONAL, - .netdev = rq->napi.dev, + .netdev = napi->dev, }; + struct page_pool *pool; - rq->page_pool = page_pool_create(&p); - if (IS_ERR(rq->page_pool)) { - int err = PTR_ERR(rq->page_pool); + pool = page_pool_create(¶ms); + if (IS_ERR(pool)) + return PTR_ERR(pool); - rq->page_pool = NULL; - return err; - } + *p = pool; return 0; } @@ -396,7 +395,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; - err = nsim_create_page_pool(rq); + err = nsim_create_page_pool(&rq->page_pool, &rq->napi); if (err) goto err_pp_destroy; } @@ -613,6 +612,116 @@ static void nsim_queue_free(struct nsim_rq *rq) kfree(rq); } +/* Queue reset mode is controlled by ns->rq_reset_mode. + * - normal - new NAPI new pool (old NAPI enabled when new added) + * - mode 1 - allocate new pool (NAPI is only disabled / enabled) + * - mode 2 - new NAPI new pool (old NAPI removed before new added) + * - mode 3 - new NAPI new pool (old NAPI disabled when new added) + */ +struct nsim_queue_mem { + struct nsim_rq *rq; + struct page_pool *pp; +}; + +static int +nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + int err; + + if (ns->rq_reset_mode > 3) + return -EINVAL; + + if (ns->rq_reset_mode == 1) + return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + + qmem->rq = nsim_queue_alloc(); + if (!qmem->rq) + return -ENOMEM; + + err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi); + if (err) + goto err_free; + + if (!ns->rq_reset_mode) + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + + return 0; + +err_free: + nsim_queue_free(qmem->rq); + return err; +} + +static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + page_pool_destroy(qmem->pp); + if (qmem->rq) { + if (!ns->rq_reset_mode) + netif_napi_del(&qmem->rq->napi); + page_pool_destroy(qmem->rq->page_pool); + nsim_queue_free(qmem->rq); + } +} + +static int +nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + if (ns->rq_reset_mode == 1) { + ns->rq[idx]->page_pool = qmem->pp; + napi_enable(&ns->rq[idx]->napi); + return 0; + } + + /* netif_napi_add()/_del() should normally be called from alloc/free, + * here we want to test various call orders. + */ + if (ns->rq_reset_mode == 2) { + netif_napi_del(&ns->rq[idx]->napi); + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + } else if (ns->rq_reset_mode == 3) { + netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_del(&ns->rq[idx]->napi); + } + + ns->rq[idx] = qmem->rq; + napi_enable(&ns->rq[idx]->napi); + + return 0; +} + +static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) +{ + struct nsim_queue_mem *qmem = per_queue_mem; + struct netdevsim *ns = netdev_priv(dev); + + napi_disable(&ns->rq[idx]->napi); + + if (ns->rq_reset_mode == 1) { + qmem->pp = ns->rq[idx]->page_pool; + page_pool_disable_direct_recycling(qmem->pp); + } else { + qmem->rq = ns->rq[idx]; + } + + return 0; +} + +static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct nsim_queue_mem), + .ndo_queue_mem_alloc = nsim_queue_mem_alloc, + .ndo_queue_mem_free = nsim_queue_mem_free, + .ndo_queue_start = nsim_queue_start, + .ndo_queue_stop = nsim_queue_stop, +}; + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -739,6 +848,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) ns->phc = phc; ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; + ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 80fde64f4a7a..8c50969b1240 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -103,6 +103,8 @@ struct netdevsim { struct mock_phc *phc; struct nsim_rq **rq; + int rq_reset_mode; + u64 tx_packets; u64 tx_bytes; u64 tx_dropped; -- cgit v1.2.3 From 6917d207b469ee81e6dc7f8ccca29c234a16916d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jan 2025 08:08:45 -0800 Subject: netdevsim: add debugfs-triggered queue reset Support triggering queue reset via debugfs for an upcoming test. Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: Jakub Kicinski Reviewed-by: Mina Almasry Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/netdev.c | 55 +++++++++++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 56 insertions(+) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index cfb079a34532..d013b6498539 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ #include "netdevsim.h" +MODULE_IMPORT_NS("NETDEV_INTERNAL"); + #define NSIM_RING_SIZE 256 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb) @@ -722,6 +725,54 @@ static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = { .ndo_queue_stop = nsim_queue_stop, }; +static ssize_t +nsim_qreset_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct netdevsim *ns = file->private_data; + unsigned int queue, mode; + char buf[32]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -EINVAL; + if (copy_from_user(buf, data, count)) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &queue, &mode); + if (ret != 2) + return -EINVAL; + + rtnl_lock(); + if (!netif_running(ns->netdev)) { + ret = -ENETDOWN; + goto exit_unlock; + } + + if (queue >= ns->netdev->real_num_rx_queues) { + ret = -EINVAL; + goto exit_unlock; + } + + ns->rq_reset_mode = mode; + ret = netdev_rx_queue_restart(ns->netdev, queue); + ns->rq_reset_mode = 0; + if (ret) + goto exit_unlock; + + ret = count; +exit_unlock: + rtnl_unlock(); + return ret; +} + +static const struct file_operations nsim_qreset_fops = { + .open = simple_open, + .write = nsim_qreset_write, + .owner = THIS_MODULE, +}; + static ssize_t nsim_pp_hold_read(struct file *file, char __user *data, size_t count, loff_t *ppos) @@ -934,6 +985,9 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir, ns, &nsim_pp_hold_fops); + ns->qr_dfs = debugfs_create_file("queue_reset", 0200, + nsim_dev_port->ddir, ns, + &nsim_qreset_fops); return ns; @@ -947,6 +1001,7 @@ void nsim_destroy(struct netdevsim *ns) struct net_device *dev = ns->netdev; struct netdevsim *peer; + debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); rtnl_lock(); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8c50969b1240..a70f62af4c88 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -136,6 +136,7 @@ struct netdevsim { struct page *page; struct dentry *pp_dfs; + struct dentry *qr_dfs; struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; -- cgit v1.2.3 From f394d07b192b67a895dbed76253ce95dcbb5d17c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:51 +0000 Subject: netdevsim: add HDS feature HDS options(tcp-data-split, hds-thresh) have dependencies between other features like XDP. Basic dependencies are checked in the core API. netdevsim is very useful to check basic dependencies. The default tcp-data-split mode is UNKNOWN but netdevsim driver returns ENABLED when ethtool dumps tcp-data-split mode. The default value of HDS threshold is 0 and the maximum value is 1024. ethtool shows like this. ethtool -g eni1np1 Ring parameters for eni1np1: Pre-set maximums: ... HDS thresh: 1024 Current hardware settings: ... TCP data split: on HDS thresh: 0 ethtool -G eni1np1 tcp-data-split on hds-thresh 1024 ethtool -g eni1np1 Ring parameters for eni1np1: Pre-set maximums: ... HDS thresh: 1024 Current hardware settings: ... TCP data split: on HDS thresh: 1024 Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-10-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 12 +++++++++++- drivers/net/netdevsim/netdev.c | 9 +++++++++ drivers/net/netdevsim/netdevsim.h | 3 +++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 5fe1eaef99b5..9e0df40c71e1 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -2,7 +2,6 @@ // Copyright (c) 2020 Facebook #include -#include #include #include "netdevsim.h" @@ -72,6 +71,12 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); + kernel_ring->tcp_data_split = dev->ethtool->hds_config; + kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; + + if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } static int nsim_set_ringparam(struct net_device *dev, @@ -161,6 +166,8 @@ static int nsim_get_ts_info(struct net_device *dev, static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_pause_stats = nsim_get_pause_stats, .get_pauseparam = nsim_get_pauseparam, .set_pauseparam = nsim_set_pauseparam, @@ -182,6 +189,9 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; + + ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->ethtool->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index d013b6498539..f92b05ccdca9 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -54,6 +55,7 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; @@ -74,6 +76,13 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; + ethtool = peer_dev->ethtool; + if (skb_is_nonlinear(skb) && + (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + ethtool->hds_thresh > len))) + skb_linearize(skb); + skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) goto out_drop_cnt; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index a70f62af4c88..dcf073bc4802 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #define NSIM_IPSEC_VALID BIT(31) #define NSIM_UDP_TUNNEL_N_PORTS 4 +#define NSIM_HDS_THRESHOLD_MAX 1024 + struct nsim_sa { struct xfrm_state *xs; __be32 ipaddr[4]; -- cgit v1.2.3 From 3c836451ca9041cfb32a7d8f59ea15b3b991bbb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:11 -0800 Subject: net: move HDS config from ethtool state Separate the HDS config from the ethtool state struct. The HDS config contains just simple parameters, not state. Having it as a separate struct will make it easier to clone / copy and also long term potentially make it per-queue. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++-- drivers/net/netdevsim/ethtool.c | 9 +++++---- drivers/net/netdevsim/netdev.c | 10 +++++----- include/linux/ethtool.h | 4 ---- include/linux/netdevice.h | 3 +++ include/net/netdev_queues.h | 10 ++++++++++ net/core/dev.c | 10 ++++++++-- net/core/devmem.c | 4 ++-- net/ethtool/rings.c | 8 +++++--- 10 files changed, 43 insertions(+), 24 deletions(-) (limited to 'drivers/net/netdevsim/netdev.c') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 748c9b1ea701..0998b20578b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4610,7 +4610,7 @@ void bnxt_set_tpa_flags(struct bnxt *bp) static void bnxt_init_ring_params(struct bnxt *bp) { bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; - bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; + bp->dev->cfg->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; } /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must @@ -6585,7 +6585,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh; + u16 hds_thresh = (u16)bp->dev->cfg->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 65a20931c579..0a6d47d4d66b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "bnxt_hsi.h" #include "bnxt.h" @@ -834,7 +835,7 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; - kernel_ering->hds_thresh = dev->ethtool->hds_thresh; + kernel_ering->hds_thresh = dev->cfg->hds_thresh; kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } @@ -852,7 +853,7 @@ static int bnxt_set_ringparam(struct net_device *dev, (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; - hds_config_mod = tcp_data_split != dev->ethtool->hds_config; + hds_config_mod = tcp_data_split != dev->cfg->hds_config; if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) return -EINVAL; diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 12163635b759..189793debdb7 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -3,6 +3,7 @@ #include #include +#include #include "netdevsim.h" @@ -71,8 +72,8 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); - kernel_ring->tcp_data_split = dev->ethtool->hds_config; - kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->tcp_data_split = dev->cfg->hds_config; + kernel_ring->hds_thresh = dev->cfg->hds_thresh; kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) @@ -190,8 +191,8 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; - ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; - ns->netdev->ethtool->hds_thresh = 0; + ns->netdev->cfg->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->cfg->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index f92b05ccdca9..42f247cbdcee 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -55,10 +55,10 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); - struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; + struct netdev_config *cfg; struct nsim_rq *rq; int rxq; @@ -76,11 +76,11 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; - ethtool = peer_dev->ethtool; + cfg = peer_dev->cfg; if (skb_is_nonlinear(skb) && - (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || - (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && - ethtool->hds_thresh > len))) + (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + cfg->hds_thresh > len))) skb_linearize(skb); skb_tx_timestamp(skb); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 64301ddf2f59..870994cc3ef7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1171,16 +1171,12 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. - * @hds_thresh: HDS Threshold value. - * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; - u32 hds_thresh; - u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8308d9c75918..173a8b3a9eb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; +struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; @@ -2410,6 +2411,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + /** @cfg: net_device queue-related configuration */ + struct netdev_config *cfg; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 5ca019d294ca..b02bb9f109d5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,16 @@ #include +/** + * struct netdev_config - queue-related configuration for a netdev + * @hds_thresh: HDS Threshold value. + * @hds_config: HDS value from userspace. + */ +struct netdev_config { + u32 hds_thresh; + u8 hds_config; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; diff --git a/net/core/dev.c b/net/core/dev.c index 3dab6699b1c1..e37d47cf476b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -9719,7 +9720,7 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && bpf->command == XDP_SETUP_PROG && bpf->prog && !bpf->prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(bpf->extack, @@ -9764,7 +9765,7 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); return -EBUSY; @@ -11542,6 +11543,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + dev->cfg = kzalloc(sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); + if (!dev->cfg) + goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) @@ -11610,6 +11615,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->cfg); kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c index c971b8aceac8..3bba3f018df0 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -141,12 +141,12 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } - if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; } - if (dev->ethtool->hds_thresh) { + if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); return -EINVAL; } diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index d8cd4e4d7762..7a3c2a2dff12 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" @@ -219,7 +221,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->ethtool->hds_config; + kernel_ringparam.tcp_data_split = dev->cfg->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -295,8 +297,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); if (!ret) { - dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; - dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + dev->cfg->hds_config = kernel_ringparam.tcp_data_split; + dev->cfg->hds_thresh = kernel_ringparam.hds_thresh; } return ret < 0 ? ret : 1; -- cgit v1.2.3