From 4b623f9f0f59652ea71fcb27d60b4c3b65126dbb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:49 +0200 Subject: net-shapers: implement NL get operation Introduce the basic infrastructure to implement the net-shaper core functionality. Each network devices carries a net-shaper cache, the NL get() operation fetches the data from such cache. The cache is initially empty, will be fill by the set()/group() operation implemented later and is destroyed at device cleanup time. The net_shaper_fill_handle(), net_shaper_ctx_init(), and net_shaper_generic_pre() implementations handle generic index type attributes, despite the current caller always pass a constant value to avoid more noise in later patches using them with different attributes. Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ddd10fd645a9367803ad02fca4a5664ea5ace170.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ea5fbcd133ae..6e727c49a6f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11147,6 +11147,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, hash_init(dev->qdisc_hash); #endif + mutex_init(&dev->lock); + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11217,6 +11219,8 @@ void free_netdev(struct net_device *dev) return; } + mutex_destroy(&dev->lock); + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11426,6 +11430,8 @@ void unregister_netdevice_many_notify(struct list_head *head, mutex_destroy(&dev->ethtool->rss_lock); + net_shaper_flush_netdev(dev); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); -- cgit v1.2.3 From ff7d4deb1f3e18b983cb51fc2dcb7af57991d827 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:53 +0200 Subject: net-shapers: implement shaper cleanup on queue deletion hook into netif_set_real_num_tx_queues() to cleanup any shaper configured on top of the to-be-destroyed TX queues. Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/6da4ee03cae2b2a757d7b59e88baf09cc94c5ef1.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ net/core/dev.h | 4 ++++ net/shaper/shaper.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6e727c49a6f7..b590eefce3b4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2949,6 +2949,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + net_shaper_set_real_num_tx_queues(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); dev->real_num_tx_queues = txq; diff --git a/net/core/dev.h b/net/core/dev.h index 13c558874af3..d3ea92949ff3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -37,8 +37,12 @@ void dev_addr_check(struct net_device *dev); #if IS_ENABLED(CONFIG_NET_SHAPER) void net_shaper_flush_netdev(struct net_device *dev); +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); #else static inline void net_shaper_flush_netdev(struct net_device *dev) {} +static inline void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) {} #endif /* sysctls not referred to from outside net/core/ */ diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index ddd1999b3f27..85ad172833fc 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -1157,6 +1157,54 @@ void net_shaper_flush_netdev(struct net_device *dev) net_shaper_flush(&binding); } +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding binding; + int i; + + binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + binding.netdev = dev; + hierarchy = net_shaper_hierarchy(&binding); + if (!hierarchy) + return; + + /* Only drivers implementing shapers support ensure + * the lock is acquired in advance. + */ + lockdep_assert_held(&dev->lock); + + /* Take action only when decreasing the tx queue number. */ + for (i = txq; i < dev->real_num_tx_queues; ++i) { + struct net_shaper_handle handle, parent_handle; + struct net_shaper *shaper; + u32 index; + + handle.scope = NET_SHAPER_SCOPE_QUEUE; + handle.id = i; + shaper = net_shaper_lookup(&binding, &handle); + if (!shaper) + continue; + + /* Don't touch the H/W for the queue shaper, the drivers already + * deleted the queue and related resources. + */ + parent_handle = shaper->parent; + index = net_shaper_handle_to_index(&handle); + xa_erase(&hierarchy->shapers, index); + kfree_rcu(shaper, rcu); + + /* The recursion on parent does the full job. */ + if (parent_handle.scope != NET_SHAPER_SCOPE_NODE) + continue; + + shaper = net_shaper_lookup(&binding, &parent_handle); + if (shaper && !--shaper->leaves) + __net_shaper_delete(&binding, shaper, NULL); + } +} + static int __init shaper_init(void) { return genl_register_family(&net_shaper_nl_family); -- cgit v1.2.3 From f15e3b3ddb9fab1c1731b6154e2cd6573fb54c4d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:56 +0000 Subject: net: napi: Make napi_defer_hard_irqs per-NAPI Add defer_hard_irqs to napi_struct in preparation for per-NAPI settings. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device defer_hard_irq field. Reads from sysfs show the net_device field. The ability to set defer_hard_irqs on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 3 +- net/core/dev.c | 10 +++--- net/core/dev.h | 36 ++++++++++++++++++++++ net/core/net-sysfs.c | 2 +- 5 files changed, 45 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 1b018ac35e9a..5a7388b2ab6f 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,4 +186,5 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6b93d01e631..2e7bc23660ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; @@ -2085,7 +2086,6 @@ struct net_device { unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned long gro_flush_timeout; - u32 napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + u32 napi_defer_hard_irqs; /** * @lock: protects @net_shaper_hierarchy, feel free to use for other diff --git a/net/core/dev.c b/net/core/dev.c index b590eefce3b4..fbaa9eabf77f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6233,7 +6233,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) { if (n->gro_bitmask) timeout = READ_ONCE(n->dev->gro_flush_timeout); - n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); + n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } if (n->defer_hard_irqs_count > 0) { n->defer_hard_irqs_count--; @@ -6371,7 +6371,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (flags & NAPI_F_PREFER_BUSY_POLL) { - napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); + napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); timeout = READ_ONCE(napi->dev->gro_flush_timeout); if (napi->defer_hard_irqs_count && timeout) { hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); @@ -6653,6 +6653,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, INIT_HLIST_NODE(&napi->napi_hash_node); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { dev->gro_flush_timeout = 20000; - dev->napi_defer_hard_irqs = 1; + netdev_set_defer_hard_irqs(dev, 1); } } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); @@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); @@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100); } /* diff --git a/net/core/dev.h b/net/core/dev.h index d3ea92949ff3..0716b1048261 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -148,6 +148,42 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, WRITE_ONCE(dev->gro_ipv4_max_size, size); } +/** + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs + * @n: napi struct to get the defer_hard_irqs field from + * + * Return: the per-NAPI value of the defar_hard_irqs field. + */ +static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n) +{ + return READ_ONCE(n->defer_hard_irqs); +} + +/** + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi + * @n: napi_struct to set the defer_hard_irqs field + * @defer: the value the field should be set to + */ +static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer) +{ + WRITE_ONCE(n->defer_hard_irqs, defer); +} + +/** + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev + * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set + * @defer: the defer_hard_irqs value to set + */ +static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, + u32 defer) +{ + struct napi_struct *napi; + + WRITE_ONCE(netdev->napi_defer_hard_irqs, defer); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_defer_hard_irqs(napi, defer); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 05cf5347f25e..25125f356a15 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val if (val > S32_MAX) return -ERANGE; - WRITE_ONCE(dev->napi_defer_hard_irqs, val); + netdev_set_defer_hard_irqs(dev, (u32)val); return 0; } -- cgit v1.2.3 From acb8d4ed5661d05f794ef2ce34fd11e699e9ca32 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:58 +0000 Subject: net: napi: Make gro_flush_timeout per-NAPI Allow per-NAPI gro_flush_timeout setting. The existing sysfs parameter is respected; writes to sysfs will write to all NAPI structs for the device and the net_device gro_flush_timeout field. Reads from sysfs will read from the net_device field. The ability to set gro_flush_timeout on specific NAPI instances will be added in a later commit, via netdev-genl. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-4-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 3 +- net/core/dev.c | 12 +++---- net/core/dev.h | 40 ++++++++++++++++++++++ net/core/net-sysfs.c | 2 +- 5 files changed, 50 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 5a7388b2ab6f..67910ea49160 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,5 +186,6 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +unsigned_long gro_flush_timeout u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7bc23660ec..93241d4de437 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -373,6 +373,7 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + unsigned long gro_flush_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; @@ -2085,7 +2086,6 @@ struct net_device { int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; - unsigned long gro_flush_timeout; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2413,6 +2413,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; /** diff --git a/net/core/dev.c b/net/core/dev.c index fbaa9eabf77f..e21ace3551d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6232,12 +6232,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) { if (n->gro_bitmask) - timeout = READ_ONCE(n->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(n); n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } if (n->defer_hard_irqs_count > 0) { n->defer_hard_irqs_count--; - timeout = READ_ONCE(n->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(n); if (timeout) ret = false; } @@ -6372,7 +6372,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = READ_ONCE(napi->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(napi); if (napi->defer_hard_irqs_count && timeout) { hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); skip_schedule = true; @@ -6654,6 +6654,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); + napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) WARN_ON(dev->reg_state == NETREG_REGISTERED); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { - dev->gro_flush_timeout = 20000; + netdev_set_gro_flush_timeout(dev, 20000); netdev_set_defer_hard_irqs(dev, 1); } } @@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); @@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92); } /* diff --git a/net/core/dev.h b/net/core/dev.h index 0716b1048261..7d0aab7e3ef1 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -184,6 +184,46 @@ static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, napi_set_defer_hard_irqs(napi, defer); } +/** + * napi_get_gro_flush_timeout - get the gro_flush_timeout + * @n: napi struct to get the gro_flush_timeout from + * + * Return: the per-NAPI value of the gro_flush_timeout field. + */ +static inline unsigned long +napi_get_gro_flush_timeout(const struct napi_struct *n) +{ + return READ_ONCE(n->gro_flush_timeout); +} + +/** + * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi + * @n: napi struct to set the gro_flush_timeout + * @timeout: timeout value to set + * + * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout + */ +static inline void napi_set_gro_flush_timeout(struct napi_struct *n, + unsigned long timeout) +{ + WRITE_ONCE(n->gro_flush_timeout, timeout); +} + +/** + * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs + * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set + * @timeout: the timeout value to set + */ +static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, + unsigned long timeout) +{ + struct napi_struct *napi; + + WRITE_ONCE(netdev->gro_flush_timeout, timeout); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_gro_flush_timeout(napi, timeout); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 25125f356a15..2d9afc6e2161 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { - WRITE_ONCE(dev->gro_flush_timeout, val); + netdev_set_gro_flush_timeout(dev, val); return 0; } -- cgit v1.2.3 From 86e25f40aa1e9e54e081e55016f65b5c92523989 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:00 +0000 Subject: net: napi: Add napi_config Add a persistent NAPI config area for NAPI configuration to the core. Drivers opt-in to setting the persistent config for a NAPI by passing an index when calling netif_napi_add_config. napi_config is allocated in alloc_netdev_mqs, freed in free_netdev (after the NAPIs are deleted). Drivers which call netif_napi_add_config will have persistent per-NAPI settings: NAPI IDs, gro_flush_timeout, and defer_hard_irq settings. Per-NAPI settings are saved in napi_disable and restored in napi_enable. Co-developed-by: Martin Karsten Signed-off-by: Martin Karsten Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-6-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 36 +++++++++- net/core/dev.c | 80 ++++++++++++++++++++-- net/core/dev.h | 12 ++++ 4 files changed, 119 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 67910ea49160..db6192b2bb50 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -186,6 +186,7 @@ struct dpll_pin* dpll_pin struct hlist_head page_pools struct dim_irq_moder* irq_moder u64 max_pacing_offload_horizon +struct_napi_config* napi_config unsigned_long gro_flush_timeout u32 napi_defer_hard_irqs =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93241d4de437..8feaca12655e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -342,6 +342,15 @@ struct gro_list { */ #define GRO_HASH_BUCKETS 8 +/* + * Structure for per-NAPI config + */ +struct napi_config { + u64 gro_flush_timeout; + u32 defer_hard_irqs; + unsigned int napi_id; +}; + /* * Structure for NAPI scheduling similar to tasklet but with weighting */ @@ -379,6 +388,8 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + int index; + struct napi_config *config; }; enum { @@ -1868,9 +1879,6 @@ enum netdev_reg_state { * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer - * @gro_flush_timeout: timeout for GRO layer in NAPI - * @napi_defer_hard_irqs: If not zero, provides a counter that would - * allow to avoid NIC hard IRQ, on busy queues. * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -2020,6 +2028,11 @@ enum netdev_reg_state { * where the clock is recovered. * * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * @napi_config: An array of napi_config structures containing per-NAPI + * settings. + * @gro_flush_timeout: timeout for GRO layer in NAPI + * @napi_defer_hard_irqs: If not zero, provides a counter that would + * allow to avoid NIC hard IRQ, on busy queues. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2413,6 +2426,7 @@ struct net_device { struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; + struct napi_config *napi_config; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; @@ -2678,6 +2692,22 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +/** + * netif_napi_add_config - initialize a NAPI context with persistent config + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @index: the NAPI index + */ +static inline void +netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device diff --git a/net/core/dev.c b/net/core/dev.c index e21ace3551d5..c682173a7642 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6505,6 +6505,23 @@ EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ +static void __napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + napi->napi_id = napi_id; + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); +} + +static void napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + spin_lock(&napi_hash_lock); + WARN_ON_ONCE(napi_by_id(napi_id)); + __napi_hash_add_with_id(napi, napi_id); + spin_unlock(&napi_hash_lock); +} + static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) @@ -6517,10 +6534,8 @@ static void napi_hash_add(struct napi_struct *napi) if (unlikely(++napi_gen_id < MIN_NAPI_ID)) napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); - napi->napi_id = napi_gen_id; - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + __napi_hash_add_with_id(napi, napi_gen_id); spin_unlock(&napi_hash_lock); } @@ -6643,6 +6658,28 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +static void napi_restore_config(struct napi_struct *n) +{ + n->defer_hard_irqs = n->config->defer_hard_irqs; + n->gro_flush_timeout = n->config->gro_flush_timeout; + /* a NAPI ID might be stored in the config, if so use it. if not, use + * napi_hash_add to generate one for us. It will be saved to the config + * in napi_disable. + */ + if (n->config->napi_id) + napi_hash_add_with_id(n, n->config->napi_id); + else + napi_hash_add(n); +} + +static void napi_save_config(struct napi_struct *n) +{ + n->config->defer_hard_irqs = n->defer_hard_irqs; + n->config->gro_flush_timeout = n->gro_flush_timeout; + n->config->napi_id = n->napi_id; + napi_hash_del(n); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6653,8 +6690,6 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, INIT_HLIST_NODE(&napi->napi_hash_node); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; - napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); - napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -6672,7 +6707,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); - napi_hash_add(napi); + + /* default settings from sysfs are applied to all NAPIs. any per-NAPI + * configuration will be loaded in napi_enable + */ + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); + napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that @@ -6704,6 +6745,11 @@ void napi_disable(struct napi_struct *n) hrtimer_cancel(&n->timer); + if (n->config) + napi_save_config(n); + else + napi_hash_del(n); + clear_bit(NAPI_STATE_DISABLE, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6719,6 +6765,11 @@ void napi_enable(struct napi_struct *n) { unsigned long new, val = READ_ONCE(n->state); + if (n->config) + napi_restore_config(n); + else + napi_hash_add(n); + do { BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); @@ -6748,7 +6799,11 @@ void __netif_napi_del(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; - napi_hash_del(napi); + if (napi->config) { + napi->index = -1; + napi->config = NULL; + } + list_del_rcu(&napi->dev_list); napi_free_frags(napi); @@ -11085,6 +11140,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; + size_t napi_config_sz; + unsigned int maxqs; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11098,6 +11155,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } + maxqs = max(txqs, rxqs); + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!dev) @@ -11174,6 +11233,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); + dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); + if (!dev->napi_config) + goto free_all; + strscpy(dev->name, name); dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; @@ -11237,6 +11301,8 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); + kvfree(dev->napi_config); + ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); diff --git a/net/core/dev.h b/net/core/dev.h index 7d0aab7e3ef1..7881bced70a9 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -177,11 +177,17 @@ static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer) static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, u32 defer) { + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); struct napi_struct *napi; + int i; WRITE_ONCE(netdev->napi_defer_hard_irqs, defer); list_for_each_entry(napi, &netdev->napi_list, dev_list) napi_set_defer_hard_irqs(napi, defer); + + for (i = 0; i < count; i++) + netdev->napi_config[i].defer_hard_irqs = defer; } /** @@ -217,11 +223,17 @@ static inline void napi_set_gro_flush_timeout(struct napi_struct *n, static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, unsigned long timeout) { + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); struct napi_struct *napi; + int i; WRITE_ONCE(netdev->gro_flush_timeout, timeout); list_for_each_entry(napi, &netdev->napi_list, dev_list) napi_set_gro_flush_timeout(napi, timeout); + + for (i = 0; i < count; i++) + netdev->napi_config[i].gro_flush_timeout = timeout; } int rps_cpumask_housekeeping(struct cpumask *mask); -- cgit v1.2.3 From 5dc51ec86df6e2214d8398079c1e31736593ab53 Mon Sep 17 00:00:00 2001 From: Martin Karsten Date: Sat, 9 Nov 2024 05:02:31 +0000 Subject: net: Add napi_struct parameter irq_suspend_timeout Add a per-NAPI IRQ suspension parameter, which can be get/set with netdev-genl. This patch doesn't change any behavior but prepares the code for other changes in the following commits which use irq_suspend_timeout as a timeout for IRQ suspension. Signed-off-by: Martin Karsten Co-developed-by: Joe Damato Signed-off-by: Joe Damato Tested-by: Joe Damato Tested-by: Martin Karsten Acked-by: Stanislav Fomichev Reviewed-by: Sridhar Samudrala Link: https://patch.msgid.link/20241109050245.191288-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 7 +++++++ include/linux/netdevice.h | 2 ++ include/uapi/linux/netdev.h | 1 + net/core/dev.c | 2 ++ net/core/dev.h | 25 +++++++++++++++++++++++++ net/core/netdev-genl-gen.c | 5 +++-- net/core/netdev-genl.c | 12 ++++++++++++ tools/include/uapi/linux/netdev.h | 1 + 8 files changed, 53 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index f9cb97d6106c..cbb544bd6c84 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -263,6 +263,11 @@ attribute-sets: the end of a NAPI cycle. This may add receive latency in exchange for reducing the number of frames processed by the network stack. type: uint + - + name: irq-suspend-timeout + doc: The timeout, in nanoseconds, of how long to suspend irq + processing, if event polling finds events + type: uint - name: queue attributes: @@ -653,6 +658,7 @@ operations: - pid - defer-hard-irqs - gro-flush-timeout + - irq-suspend-timeout dump: request: attributes: @@ -704,6 +710,7 @@ operations: - id - defer-hard-irqs - gro-flush-timeout + - irq-suspend-timeout kernel-family: headers: [ "linux/list.h"] diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df4483598628..0aae346d919e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -348,6 +348,7 @@ struct gro_list { */ struct napi_config { u64 gro_flush_timeout; + u64 irq_suspend_timeout; u32 defer_hard_irqs; unsigned int napi_id; }; @@ -384,6 +385,7 @@ struct napi_struct { struct hrtimer timer; struct task_struct *thread; unsigned long gro_flush_timeout; + unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e3ebb49f60d2..e4be227d3ad6 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -124,6 +124,7 @@ enum { NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/dev.c b/net/core/dev.c index 6a31152e4606..4d910872963f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6666,6 +6666,7 @@ static void napi_restore_config(struct napi_struct *n) { n->defer_hard_irqs = n->config->defer_hard_irqs; n->gro_flush_timeout = n->config->gro_flush_timeout; + n->irq_suspend_timeout = n->config->irq_suspend_timeout; /* a NAPI ID might be stored in the config, if so use it. if not, use * napi_hash_add to generate one for us. It will be saved to the config * in napi_disable. @@ -6680,6 +6681,7 @@ static void napi_save_config(struct napi_struct *n) { n->config->defer_hard_irqs = n->defer_hard_irqs; n->config->gro_flush_timeout = n->gro_flush_timeout; + n->config->irq_suspend_timeout = n->irq_suspend_timeout; n->config->napi_id = n->napi_id; napi_hash_del(n); } diff --git a/net/core/dev.h b/net/core/dev.h index 7881bced70a9..d043dee25a68 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -236,6 +236,31 @@ static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, netdev->napi_config[i].gro_flush_timeout = timeout; } +/** + * napi_get_irq_suspend_timeout - get the irq_suspend_timeout + * @n: napi struct to get the irq_suspend_timeout from + * + * Return: the per-NAPI value of the irq_suspend_timeout field. + */ +static inline unsigned long +napi_get_irq_suspend_timeout(const struct napi_struct *n) +{ + return READ_ONCE(n->irq_suspend_timeout); +} + +/** + * napi_set_irq_suspend_timeout - set the irq_suspend_timeout for a napi + * @n: napi struct to set the irq_suspend_timeout + * @timeout: timeout value to set + * + * napi_set_irq_suspend_timeout sets the per-NAPI irq_suspend_timeout + */ +static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, + unsigned long timeout) +{ + WRITE_ONCE(n->irq_suspend_timeout, timeout); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 21de7e10be16..a89cbd8d87c3 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -92,10 +92,11 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] }; /* NETDEV_CMD_NAPI_SET - do */ -static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT + 1] = { +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, + [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, }; /* Ops table for netdev */ @@ -186,7 +187,7 @@ static const struct genl_split_ops netdev_nl_ops[] = { .cmd = NETDEV_CMD_NAPI_SET, .doit = netdev_nl_napi_set_doit, .policy = netdev_napi_set_nl_policy, - .maxattr = NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, }; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index b49c3b4e5fbe..765ce7c9d73b 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -161,6 +161,7 @@ static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { + unsigned long irq_suspend_timeout; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; void *hdr; @@ -196,6 +197,11 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, napi_defer_hard_irqs)) goto nla_put_failure; + irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); + if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + irq_suspend_timeout)) + goto nla_put_failure; + gro_flush_timeout = napi_get_gro_flush_timeout(napi); if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, gro_flush_timeout)) @@ -306,6 +312,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) static int netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { + u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; u32 defer = 0; @@ -314,6 +321,11 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) napi_set_defer_hard_irqs(napi, defer); } + if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { + irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); + napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); + } + if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); napi_set_gro_flush_timeout(napi, gro_flush_timeout); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e3ebb49f60d2..e4be227d3ad6 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -124,6 +124,7 @@ enum { NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 3fcbecbdeb048dfd1bea824f4276717fed02d10e Mon Sep 17 00:00:00 2001 From: Martin Karsten Date: Sat, 9 Nov 2024 05:02:32 +0000 Subject: net: Add control functions for irq suspension The napi_suspend_irqs routine bootstraps irq suspension by elongating the defer timeout to irq_suspend_timeout. The napi_resume_irqs routine effectively cancels irq suspension by forcing the napi to be scheduled immediately. Signed-off-by: Martin Karsten Co-developed-by: Joe Damato Signed-off-by: Joe Damato Tested-by: Joe Damato Tested-by: Martin Karsten Acked-by: Stanislav Fomichev Reviewed-by: Sridhar Samudrala Link: https://patch.msgid.link/20241109050245.191288-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/net/busy_poll.h | 3 +++ net/core/dev.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f03040baaefd..c858270141bc 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -52,6 +52,9 @@ void napi_busy_loop_rcu(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), void *loop_end_arg, bool prefer_busy_poll, u16 budget); +void napi_suspend_irqs(unsigned int napi_id); +void napi_resume_irqs(unsigned int napi_id); + #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { diff --git a/net/core/dev.c b/net/core/dev.c index 4d910872963f..13d00fc10f55 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6507,6 +6507,43 @@ void napi_busy_loop(unsigned int napi_id, } EXPORT_SYMBOL(napi_busy_loop); +void napi_suspend_irqs(unsigned int napi_id) +{ + struct napi_struct *napi; + + rcu_read_lock(); + napi = napi_by_id(napi_id); + if (napi) { + unsigned long timeout = napi_get_irq_suspend_timeout(napi); + + if (timeout) + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + } + rcu_read_unlock(); +} + +void napi_resume_irqs(unsigned int napi_id) +{ + struct napi_struct *napi; + + rcu_read_lock(); + napi = napi_by_id(napi_id); + if (napi) { + /* If irq_suspend_timeout is set to 0 between the call to + * napi_suspend_irqs and now, the original value still + * determines the safety timeout as intended and napi_watchdog + * will resume irq processing. + */ + if (napi_get_irq_suspend_timeout(napi)) { + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); + } + } + rcu_read_unlock(); +} + #endif /* CONFIG_NET_RX_BUSY_POLL */ static void __napi_hash_add_with_id(struct napi_struct *napi, -- cgit v1.2.3