From b400f4b87430c105d92550cee5a72aea01fdf3d6 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 11 Dec 2024 21:20:30 +0000 Subject: page_pool: Set `dma_sync` to false for devmem memory provider Move the `dma_map` and `dma_sync` checks to `page_pool_init` to make them generic. Set dma_sync to false for devmem memory provider because the dma_sync APIs should not be used for dma_buf backed devmem memory provider. Cc: Jason Gunthorpe Signed-off-by: Samiullah Khawaja Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/core/devmem.c') diff --git a/net/core/devmem.c b/net/core/devmem.c index 11b91c12ee11..3ebdeed2bf18 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -331,11 +331,10 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) if (!binding) return -EINVAL; - if (!pool->dma_map) - return -EOPNOTSUPP; - - if (pool->dma_sync) - return -EOPNOTSUPP; + /* dma-buf dma addresses do not need and should not be used with + * dma_sync_for_cpu/device. Force disable dma_sync. + */ + pool->dma_sync = false; if (pool->p.order != 0) return -E2BIG; -- cgit v1.2.3 From 7dba339faae991a23c54f7b93a58798c58f8c16f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:31 +0000 Subject: page_pool: disable sync for cpu for dmabuf memory provider dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider. The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem. Also minimize the impact on the dma syncing performance for pages. Special case the dma-sync path for pages to not go through the overhead checks for dma-syncing and conversion to netmem. Cc: Alexander Lobakin Cc: Jason Gunthorpe Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-5-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 35 ++++++++++++++++++++++++++++++----- include/net/page_pool/types.h | 3 ++- net/core/devmem.c | 1 + net/core/page_pool.c | 1 + 4 files changed, 34 insertions(+), 6 deletions(-) (limited to 'net/core/devmem.c') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 95af7f0b029e..e555921e5233 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + +static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const dma_addr_t dma_addr, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, dma_addr, + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); } /** @@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, const struct page *page, u32 offset, u32 dma_sync_size) { - dma_sync_single_range_for_cpu(pool->p.dev, - page_pool_get_dma_addr(page), - offset + pool->p.offset, dma_sync_size, - page_pool_get_dma_dir(pool)); + __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset, + dma_sync_size); +} + +static inline void +page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, + const netmem_ref netmem, u32 offset, + u32 dma_sync_size) +{ + if (!pool->dma_sync_for_cpu) + return; + + __page_pool_dma_sync_for_cpu(pool, + page_pool_get_dma_addr_netmem(netmem), + offset, dma_sync_size); } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3270c92841b4..ed4cd114180a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -164,7 +164,8 @@ struct page_pool { bool has_init_callback:1; /* slow::init_callback is set */ bool dma_map:1; /* Perform DMA mapping */ - bool dma_sync:1; /* Perform DMA sync */ + bool dma_sync:1; /* Perform DMA sync for device */ + bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ #ifdef CONFIG_PAGE_POOL_STATS bool system:1; /* This is a global percpu pool */ #endif diff --git a/net/core/devmem.c b/net/core/devmem.c index 3ebdeed2bf18..0b6ed7525b22 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) * dma_sync_for_cpu/device. Force disable dma_sync. */ pool->dma_sync = false; + pool->dma_sync_for_cpu = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 275a7fd209d7..e07ad7315955 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) -- cgit v1.2.3 From a08a5c9484015a88c937aa0f9eaf3efb2123c3b8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:45 +0000 Subject: net: devmem: add ring parameter filtering If driver doesn't support ring parameter or tcp-data-split configuration is not sufficient, the devmem should not be set up. Before setup the devmem, tcp-data-split should be ON and hds-thresh value should be 0. Tested-by: Stanislav Fomichev Reviewed-by: Mina Almasry Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-4-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/core/devmem.c') diff --git a/net/core/devmem.c b/net/core/devmem.c index 0b6ed7525b22..c971b8aceac8 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -140,6 +141,16 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } + if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + + if (dev->ethtool->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_priv) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); -- cgit v1.2.3 From 3c836451ca9041cfb32a7d8f59ea15b3b991bbb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:11 -0800 Subject: net: move HDS config from ethtool state Separate the HDS config from the ethtool state struct. The HDS config contains just simple parameters, not state. Having it as a separate struct will make it easier to clone / copy and also long term potentially make it per-queue. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++-- drivers/net/netdevsim/ethtool.c | 9 +++++---- drivers/net/netdevsim/netdev.c | 10 +++++----- include/linux/ethtool.h | 4 ---- include/linux/netdevice.h | 3 +++ include/net/netdev_queues.h | 10 ++++++++++ net/core/dev.c | 10 ++++++++-- net/core/devmem.c | 4 ++-- net/ethtool/rings.c | 8 +++++--- 10 files changed, 43 insertions(+), 24 deletions(-) (limited to 'net/core/devmem.c') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 748c9b1ea701..0998b20578b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4610,7 +4610,7 @@ void bnxt_set_tpa_flags(struct bnxt *bp) static void bnxt_init_ring_params(struct bnxt *bp) { bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; - bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; + bp->dev->cfg->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; } /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must @@ -6585,7 +6585,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh; + u16 hds_thresh = (u16)bp->dev->cfg->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 65a20931c579..0a6d47d4d66b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "bnxt_hsi.h" #include "bnxt.h" @@ -834,7 +835,7 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; - kernel_ering->hds_thresh = dev->ethtool->hds_thresh; + kernel_ering->hds_thresh = dev->cfg->hds_thresh; kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } @@ -852,7 +853,7 @@ static int bnxt_set_ringparam(struct net_device *dev, (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; - hds_config_mod = tcp_data_split != dev->ethtool->hds_config; + hds_config_mod = tcp_data_split != dev->cfg->hds_config; if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) return -EINVAL; diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 12163635b759..189793debdb7 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -3,6 +3,7 @@ #include #include +#include #include "netdevsim.h" @@ -71,8 +72,8 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); - kernel_ring->tcp_data_split = dev->ethtool->hds_config; - kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->tcp_data_split = dev->cfg->hds_config; + kernel_ring->hds_thresh = dev->cfg->hds_thresh; kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) @@ -190,8 +191,8 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; - ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; - ns->netdev->ethtool->hds_thresh = 0; + ns->netdev->cfg->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->cfg->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index f92b05ccdca9..42f247cbdcee 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -55,10 +55,10 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); - struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; + struct netdev_config *cfg; struct nsim_rq *rq; int rxq; @@ -76,11 +76,11 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; - ethtool = peer_dev->ethtool; + cfg = peer_dev->cfg; if (skb_is_nonlinear(skb) && - (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || - (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && - ethtool->hds_thresh > len))) + (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + cfg->hds_thresh > len))) skb_linearize(skb); skb_tx_timestamp(skb); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 64301ddf2f59..870994cc3ef7 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1171,16 +1171,12 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. - * @hds_thresh: HDS Threshold value. - * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; - u32 hds_thresh; - u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8308d9c75918..173a8b3a9eb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; +struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; @@ -2410,6 +2411,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + /** @cfg: net_device queue-related configuration */ + struct netdev_config *cfg; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 5ca019d294ca..b02bb9f109d5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,16 @@ #include +/** + * struct netdev_config - queue-related configuration for a netdev + * @hds_thresh: HDS Threshold value. + * @hds_config: HDS value from userspace. + */ +struct netdev_config { + u32 hds_thresh; + u8 hds_config; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; diff --git a/net/core/dev.c b/net/core/dev.c index 3dab6699b1c1..e37d47cf476b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -9719,7 +9720,7 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && bpf->command == XDP_SETUP_PROG && bpf->prog && !bpf->prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(bpf->extack, @@ -9764,7 +9765,7 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); return -EBUSY; @@ -11542,6 +11543,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + dev->cfg = kzalloc(sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); + if (!dev->cfg) + goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) @@ -11610,6 +11615,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->cfg); kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c index c971b8aceac8..3bba3f018df0 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -141,12 +141,12 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } - if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; } - if (dev->ethtool->hds_thresh) { + if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); return -EINVAL; } diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index d8cd4e4d7762..7a3c2a2dff12 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" @@ -219,7 +221,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->ethtool->hds_config; + kernel_ringparam.tcp_data_split = dev->cfg->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -295,8 +297,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); if (!ret) { - dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; - dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + dev->cfg->hds_config = kernel_ringparam.tcp_data_split; + dev->cfg->hds_thresh = kernel_ringparam.hds_thresh; } return ret < 0 ? ret : 1; -- cgit v1.2.3