From 024bfd2e9d80d7131f1178eb2235030b96f7ef0e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:31 +0100 Subject: page_pool: make page_pool_put_page_bulk() handle array of netmems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, page_pool_put_page_bulk() indeed takes an array of pointers to the data, not pages, despite the name. As one side effect, when you're freeing frags from &skb_shared_info, xdp_return_frame_bulk() converts page pointers to virtual addresses and then page_pool_put_page_bulk() converts them back. Moreover, data pointers assume every frag is placed in the host memory, making this function non-universal. Make page_pool_put_page_bulk() handle array of netmems. Pass frag netmems directly and use virt_to_netmem() when freeing xdpf->data, so that the PP core will then get the compound netmem and take care of the rest. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-9-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f89cf93f6eb4..4c85b77cfdac 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -840,22 +840,22 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, EXPORT_SYMBOL(page_pool_put_unrefed_page); /** - * page_pool_put_page_bulk() - release references on multiple pages + * page_pool_put_netmem_bulk() - release references on multiple netmems * @pool: pool from which pages were allocated - * @data: array holding page pointers - * @count: number of pages in @data + * @data: array holding netmem references + * @count: number of entries in @data * - * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring - * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() - * will release leftover pages to the page allocator. - * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx + * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring + * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() + * will release leftover netmems to the memory provider. + * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx * completion loop for the XDP_REDIRECT use case. * * Please note the caller must not use data area after running - * page_pool_put_page_bulk(), as this function overwrites it. + * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, + u32 count) { int i, bulk_len = 0; bool allow_direct; @@ -864,7 +864,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, allow_direct = page_pool_napi_local(pool); for (i = 0; i < count; i++) { - netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); + netmem_ref netmem = netmem_compound_head(data[i]); /* It is not the last user for the page frag case */ if (!page_pool_is_last_ref(netmem)) @@ -873,7 +873,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (netmem) - data[bulk_len++] = (__force void *)netmem; + data[bulk_len++] = netmem; } if (!bulk_len) @@ -882,7 +882,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; @@ -899,9 +899,9 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, (__force netmem_ref)data[i]); + page_pool_return_page(pool, data[i]); } -EXPORT_SYMBOL(page_pool_put_page_bulk); +EXPORT_SYMBOL(page_pool_put_netmem_bulk); static netmem_ref page_pool_drain_frag(struct page_pool *pool, netmem_ref netmem) -- cgit v1.2.3 From fcc680a647ba77370480fe753664cc10d572b240 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:38 +0100 Subject: page_pool: allow mixing PPs within one bulk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main reason for this change was to allow mixing pages from different &page_pools within one &xdp_buff/&xdp_frame. Why not? With stuff like devmem and io_uring zerocopy Rx, it's required to have separate PPs for header buffers and payload buffers. Adjust xdp_return_frame_bulk() and page_pool_put_netmem_bulk(), so that they won't be tied to a particular pool. Let the latter create a separate bulk of pages which's PP is different from the first netmem of the bulk and process it after the main loop. This greatly optimizes xdp_return_frame_bulk(): no more hashtable lookups and forced flushes on PP mismatch. Also make xdp_flush_frame_bulk() inline, as it's just one if + function call + one u32 read, not worth extending the call ladder. Co-developed-by: Toke Høiland-Jørgensen # iterative Signed-off-by: Toke Høiland-Jørgensen Suggested-by: Jakub Kicinski # while (count) Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 6 +-- include/net/xdp.h | 16 +++++-- net/core/page_pool.c | 109 ++++++++++++++++++++++++++++-------------- net/core/xdp.c | 29 +---------- 4 files changed, 87 insertions(+), 73 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1ea16b0e9c79..05a864031271 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count); +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +271,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_netmem_bulk(struct page_pool *pool, - netmem_ref *data, u32 count) +static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index f4020b29122f..9e7eb8223513 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -11,6 +11,8 @@ #include #include /* skb_shared_info */ +#include + /** * DOC: XDP RX-queue information * @@ -193,14 +195,12 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; - void *xa; netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) { - /* bq->count will be zero'ed when bq->xa gets updated */ - bq->xa = NULL; + bq->count = 0; } static inline struct skb_shared_info * @@ -317,10 +317,18 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); +static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + if (unlikely(!bq->count)) + return; + + page_pool_put_netmem_bulk(bq->q, bq->count); + bq->count = 0; +} + static __always_inline unsigned int xdp_get_frame_len(const struct xdp_frame *xdpf) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4c85b77cfdac..8292e3edbbfd 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -839,9 +839,41 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, } EXPORT_SYMBOL(page_pool_put_unrefed_page); +static void page_pool_recycle_ring_bulk(struct page_pool *pool, + netmem_ref *bulk, + u32 bulk_len) +{ + bool in_softirq; + u32 i; + + /* Bulk produce into ptr_ring page_pool cache */ + in_softirq = page_pool_producer_lock(pool); + + for (i = 0; i < bulk_len; i++) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } + } + + page_pool_producer_unlock(pool, in_softirq); + recycle_stat_add(pool, ring, i); + + /* Hopefully all pages were returned into ptr_ring */ + if (likely(i == bulk_len)) + return; + + /* + * ptr_ring cache is full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation. + */ + for (; i < bulk_len; i++) + page_pool_return_page(pool, bulk[i]); +} + /** * page_pool_put_netmem_bulk() - release references on multiple netmems - * @pool: pool from which pages were allocated * @data: array holding netmem references * @count: number of entries in @data * @@ -854,52 +886,55 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page); * Please note the caller must not use data area after running * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count) +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { - int i, bulk_len = 0; - bool allow_direct; - bool in_softirq; - - allow_direct = page_pool_napi_local(pool); + u32 bulk_len = 0; - for (i = 0; i < count; i++) { + for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(netmem)) - continue; - - netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); - /* Approved for bulk recycling in ptr_ring cache */ - if (netmem) + if (page_pool_is_last_ref(netmem)) data[bulk_len++] = netmem; } - if (!bulk_len) - return; - - /* Bulk producer into ptr_ring page_pool cache */ - in_softirq = page_pool_producer_lock(pool); - for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { - /* ring full */ - recycle_stat_inc(pool, ring_full); - break; + count = bulk_len; + while (count) { + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + struct page_pool *pool = NULL; + bool allow_direct; + u32 foreign = 0; + + bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + struct page_pool *netmem_pp; + netmem_ref netmem = data[i]; + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different + * page_pool, save it for another round. + */ + data[foreign++] = netmem; + continue; + } + + netmem = __page_pool_put_page(pool, netmem, -1, + allow_direct); + /* Approved for bulk recycling in ptr_ring cache */ + if (netmem) + bulk[bulk_len++] = netmem; } - } - recycle_stat_add(pool, ring, i); - page_pool_producer_unlock(pool, in_softirq); - /* Hopefully all pages was return into ptr_ring */ - if (likely(i == bulk_len)) - return; + if (bulk_len) + page_pool_recycle_ring_bulk(pool, bulk, bulk_len); - /* ptr_ring cache full, free remaining pages outside producer lock - * since put_page() with refcnt == 1 can be an expensive operation - */ - for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + count = foreign; + } } EXPORT_SYMBOL(page_pool_put_netmem_bulk); diff --git a/net/core/xdp.c b/net/core/xdp.c index 938ad15c9857..56127e8ec85f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -511,46 +511,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); * xdp_frame_bulk is usually stored/allocated on the function * call-stack to avoid locking penalties. */ -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) -{ - struct xdp_mem_allocator *xa = bq->xa; - - if (unlikely(!xa || !bq->count)) - return; - - page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); - /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ - bq->count = 0; -} -EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); /* Must be called with rcu_read_lock held */ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - struct xdp_mem_info *mem = &xdpf->mem; - struct xdp_mem_allocator *xa; - - if (mem->type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } - xa = bq->xa; - if (unlikely(!xa)) { - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - bq->count = 0; - bq->xa = xa; - } - if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); - if (unlikely(mem->id != xa->mem.id)) { - xdp_flush_frame_bulk(bq); - bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - } - if (unlikely(xdp_frame_has_frags(xdpf))) { struct skb_shared_info *sinfo; int i; -- cgit v1.2.3 From 91a152cbb49c26609d217cf2f116d46143b9b8be Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:28 +0000 Subject: net: page_pool: rename page_pool_alloc_netmem to *_netmems page_pool_alloc_netmem (without an s) was the mirror of page_pool_alloc_pages (with an s), which was confusing. Rename to page_pool_alloc_netmems so it's the mirror of page_pool_alloc_pages. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241211212033.1684197-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 2 +- net/core/page_pool.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 05a864031271..3270c92841b4 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -242,7 +242,7 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8292e3edbbfd..7a17af286a9e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -574,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem; @@ -590,11 +590,11 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; } -EXPORT_SYMBOL(page_pool_alloc_netmem); +EXPORT_SYMBOL(page_pool_alloc_netmems); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { - return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -992,7 +992,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, } if (!netmem) { - netmem = page_pool_alloc_netmem(pool, gfp); + netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0; -- cgit v1.2.3 From b400f4b87430c105d92550cee5a72aea01fdf3d6 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 11 Dec 2024 21:20:30 +0000 Subject: page_pool: Set `dma_sync` to false for devmem memory provider Move the `dma_map` and `dma_sync` checks to `page_pool_init` to make them generic. Set dma_sync to false for devmem memory provider because the dma_sync APIs should not be used for dma_buf backed devmem memory provider. Cc: Jason Gunthorpe Signed-off-by: Samiullah Khawaja Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 9 ++++----- net/core/page_pool.c | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/net/core/devmem.c b/net/core/devmem.c index 11b91c12ee11..3ebdeed2bf18 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -331,11 +331,10 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) if (!binding) return -EINVAL; - if (!pool->dma_map) - return -EOPNOTSUPP; - - if (pool->dma_sync) - return -EOPNOTSUPP; + /* dma-buf dma addresses do not need and should not be used with + * dma_sync_for_cpu/device. Force disable dma_sync. + */ + pool->dma_sync = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 7a17af286a9e..275a7fd209d7 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -287,6 +287,9 @@ static int page_pool_init(struct page_pool *pool, } if (pool->mp_priv) { + if (!pool->dma_map || !pool->dma_sync) + return -EOPNOTSUPP; + err = mp_dmabuf_devmem_init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, -- cgit v1.2.3 From 7dba339faae991a23c54f7b93a58798c58f8c16f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:31 +0000 Subject: page_pool: disable sync for cpu for dmabuf memory provider dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider. The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem. Also minimize the impact on the dma syncing performance for pages. Special case the dma-sync path for pages to not go through the overhead checks for dma-syncing and conversion to netmem. Cc: Alexander Lobakin Cc: Jason Gunthorpe Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-5-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 35 ++++++++++++++++++++++++++++++----- include/net/page_pool/types.h | 3 ++- net/core/devmem.c | 1 + net/core/page_pool.c | 1 + 4 files changed, 34 insertions(+), 6 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 95af7f0b029e..e555921e5233 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + +static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const dma_addr_t dma_addr, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, dma_addr, + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); } /** @@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, const struct page *page, u32 offset, u32 dma_sync_size) { - dma_sync_single_range_for_cpu(pool->p.dev, - page_pool_get_dma_addr(page), - offset + pool->p.offset, dma_sync_size, - page_pool_get_dma_dir(pool)); + __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset, + dma_sync_size); +} + +static inline void +page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, + const netmem_ref netmem, u32 offset, + u32 dma_sync_size) +{ + if (!pool->dma_sync_for_cpu) + return; + + __page_pool_dma_sync_for_cpu(pool, + page_pool_get_dma_addr_netmem(netmem), + offset, dma_sync_size); } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3270c92841b4..ed4cd114180a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -164,7 +164,8 @@ struct page_pool { bool has_init_callback:1; /* slow::init_callback is set */ bool dma_map:1; /* Perform DMA mapping */ - bool dma_sync:1; /* Perform DMA sync */ + bool dma_sync:1; /* Perform DMA sync for device */ + bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ #ifdef CONFIG_PAGE_POOL_STATS bool system:1; /* This is a global percpu pool */ #endif diff --git a/net/core/devmem.c b/net/core/devmem.c index 3ebdeed2bf18..0b6ed7525b22 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) * dma_sync_for_cpu/device. Force disable dma_sync. */ pool->dma_sync = false; + pool->dma_sync_for_cpu = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 275a7fd209d7..e07ad7315955 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) -- cgit v1.2.3 From d3c9510dc900e9ff3ea330189c0465c9f00fba18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 15 Dec 2024 13:29:38 -0800 Subject: net: page_pool: rename page_pool_is_last_ref() page_pool_is_last_ref() releases a reference while the name, to me at least, suggests it just checks if the refcount is 1. The semantics of the function are the same as those of atomic_dec_and_test() and refcount_dec_and_test(), so just use the _and_test() suffix. Reviewed-by: Alexander Lobakin Reviewed-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20241215212938.99210-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 4 ++-- net/core/page_pool.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index e555921e5233..776a3008ac28 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -306,7 +306,7 @@ static inline void page_pool_ref_page(struct page *page) page_pool_ref_netmem(page_to_netmem(page)); } -static inline bool page_pool_is_last_ref(netmem_ref netmem) +static inline bool page_pool_unref_and_test(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ return page_pool_unref_netmem(netmem, 1) == 0; @@ -321,7 +321,7 @@ static inline void page_pool_put_netmem(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_ref(netmem)) + if (!page_pool_unref_and_test(netmem)) return; page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e07ad7315955..9733206d6406 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -897,7 +897,7 @@ void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); - if (page_pool_is_last_ref(netmem)) + if (page_pool_unref_and_test(netmem)) data[bulk_len++] = netmem; } -- cgit v1.2.3 From 8d20dcda404d48784f2359976811bfc189992aa0 Mon Sep 17 00:00:00 2001 From: John Daley Date: Wed, 15 Jan 2025 10:13:12 -0800 Subject: selftests: drv-net-hw: inject pp_alloc_fail errors in the right place The tool pp_alloc_fail.py tested error recovery by injecting errors into the function page_pool_alloc_pages(). The page pool allocation function page_pool_dev_alloc() does not end up calling page_pool_alloc_pages(). page_pool_alloc_netmems() seems to be the function that is called by all of the page pool alloc functions in the API, so move error injection to that function instead. Signed-off-by: John Daley Link: https://patch.msgid.link/20250115181312.3544-2-johndale@cisco.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 2 +- tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9733206d6406..a3de752c5178 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -595,13 +595,13 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) return netmem; } EXPORT_SYMBOL(page_pool_alloc_netmems); +ALLOW_ERROR_INJECTION(page_pool_alloc_netmems, NULL); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); -ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 05b6fbb3fcdd..ad192fef3117 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -21,9 +21,9 @@ def _enable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") - if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("page_pool_alloc_pages\n") + fp.write("page_pool_alloc_netmems\n") _write_fail_config({ "verbose": 0, @@ -37,7 +37,7 @@ def _disable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): return - if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: fp.write("\n") -- cgit v1.2.3 From 6bf9b5b40af373690313f64a3935b2bf2e5d46d9 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 23 Dec 2024 17:00:38 -0500 Subject: mm: alloc_pages_bulk: rename API The previous commit removed the page_list argument from alloc_pages_bulk_noprof() along with the alloc_pages_bulk_list() function. Now that only the *_array() flavour of the API remains, we can do the following renaming (along with the _noprof() ones): alloc_pages_bulk_array -> alloc_pages_bulk alloc_pages_bulk_array_mempolicy -> alloc_pages_bulk_mempolicy alloc_pages_bulk_array_node -> alloc_pages_bulk_node Link: https://lkml.kernel.org/r/275a3bbc0be20fbe9002297d60045e67ab3d4ada.1734991165.git.luizcap@redhat.com Signed-off-by: Luiz Capitulino Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Yunsheng Lin Signed-off-by: Andrew Morton --- drivers/staging/media/atomisp/pci/hmm/hmm_bo.c | 4 ++-- drivers/vfio/pci/mlx5/cmd.c | 14 +++++++------- drivers/vfio/pci/virtio/migrate.c | 6 +++--- fs/btrfs/extent_io.c | 2 +- fs/erofs/zutil.c | 4 ++-- fs/splice.c | 2 +- fs/xfs/xfs_buf.c | 4 ++-- include/linux/gfp.h | 14 +++++++------- kernel/bpf/arena.c | 2 +- lib/alloc_tag.c | 4 ++-- lib/kunit_iov_iter.c | 2 +- lib/test_vmalloc.c | 2 +- mm/mempolicy.c | 14 +++++++------- mm/vmalloc.c | 4 ++-- net/core/page_pool.c | 7 +++---- net/sunrpc/svc.c | 4 ++-- net/sunrpc/svc_xprt.c | 3 +-- 17 files changed, 45 insertions(+), 47 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm_bo.c b/drivers/staging/media/atomisp/pci/hmm/hmm_bo.c index 07ed33464d71..224ca8d42721 100644 --- a/drivers/staging/media/atomisp/pci/hmm/hmm_bo.c +++ b/drivers/staging/media/atomisp/pci/hmm/hmm_bo.c @@ -624,10 +624,10 @@ static int alloc_private_pages(struct hmm_buffer_object *bo) const gfp_t gfp = __GFP_NOWARN | __GFP_RECLAIM | __GFP_FS; int ret; - ret = alloc_pages_bulk_array(gfp, bo->pgnr, bo->pages); + ret = alloc_pages_bulk(gfp, bo->pgnr, bo->pages); if (ret != bo->pgnr) { free_pages_bulk_array(ret, bo->pages); - dev_err(atomisp_dev, "alloc_pages_bulk_array() failed\n"); + dev_err(atomisp_dev, "alloc_pages_bulk() failed\n"); return -ENOMEM; } diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index eb7387ee6ebd..11eda6b207f1 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -408,7 +408,7 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) buf->dma_dir, 0); } - /* Undo alloc_pages_bulk_array() */ + /* Undo alloc_pages_bulk() */ for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) __free_page(sg_page_iter_page(&sg_iter)); sg_free_append_table(&buf->table); @@ -431,8 +431,8 @@ static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, return -ENOMEM; do { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, - page_list); + filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill, + page_list); if (!filled) { ret = -ENOMEM; goto err; @@ -1342,7 +1342,7 @@ static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) { int i; - /* Undo alloc_pages_bulk_array() */ + /* Undo alloc_pages_bulk() */ for (i = 0; i < recv_buf->npages; i++) __free_page(recv_buf->page_list[i]); @@ -1361,9 +1361,9 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, return -ENOMEM; for (;;) { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, - npages - done, - recv_buf->page_list + done); + filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, + npages - done, + recv_buf->page_list + done); if (!filled) goto err; diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c index ee54f4c17857..ba92bb4e9af9 100644 --- a/drivers/vfio/pci/virtio/migrate.c +++ b/drivers/vfio/pci/virtio/migrate.c @@ -77,8 +77,8 @@ static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf, return -ENOMEM; do { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, - page_list); + filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill, + page_list); if (!filled) { ret = -ENOMEM; goto err; @@ -112,7 +112,7 @@ static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf) { struct sg_page_iter sg_iter; - /* Undo alloc_pages_bulk_array() */ + /* Undo alloc_pages_bulk() */ for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) __free_page(sg_page_iter_page(&sg_iter)); sg_free_append_table(&buf->table); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b923d0cec61c..d70e9461fea8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -632,7 +632,7 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, for (allocated = 0; allocated < nr_pages;) { unsigned int last = allocated; - allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array); + allocated = alloc_pages_bulk(gfp, nr_pages, page_array); if (unlikely(allocated == last)) { /* No progress, fail and do cleanup. */ for (int i = 0; i < allocated; i++) { diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 0dd65cefce33..9c5aa9d53682 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -87,8 +87,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) tmp_pages[j] = gbuf->pages[j]; do { last = j; - j = alloc_pages_bulk_array(GFP_KERNEL, nrpages, - tmp_pages); + j = alloc_pages_bulk(GFP_KERNEL, nrpages, + tmp_pages); if (last == j) goto out; } while (j != nrpages); diff --git a/fs/splice.c b/fs/splice.c index 2898fa1e9e63..28cfa63aa236 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -342,7 +342,7 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, return -ENOMEM; pages = (struct page **)(bv + npages); - npages = alloc_pages_bulk_array(GFP_USER, npages, pages); + npages = alloc_pages_bulk(GFP_USER, npages, pages); if (!npages) { kfree(bv); return -ENOMEM; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index aa63b8efd782..82db3ab0e8b4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -395,8 +395,8 @@ xfs_buf_alloc_pages( for (;;) { long last = filled; - filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count, - bp->b_pages); + filled = alloc_pages_bulk(gfp_mask, bp->b_page_count, + bp->b_pages); if (filled == bp->b_page_count) { XFS_STATS_INC(bp->b_mount, xb_page_found); break; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f8b33c5e7a14..6bb1a5a7a4ae 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -215,18 +215,18 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, struct page **page_array); #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) -unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, +unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); -#define alloc_pages_bulk_array_mempolicy(...) \ - alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_mempolicy(...) \ + alloc_hooks(alloc_pages_bulk_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ -#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \ +#define alloc_pages_bulk(_gfp, _nr_pages, _page_array) \ __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _page_array) static inline unsigned long -alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, +alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) { if (nid == NUMA_NO_NODE) @@ -235,8 +235,8 @@ alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, page_array); } -#define alloc_pages_bulk_array_node(...) \ - alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_node(...) \ + alloc_hooks(alloc_pages_bulk_node_noprof(__VA_ARGS__)) static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 945a5680f6a5..9927cd4c9e0e 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -443,7 +443,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt return 0; } - /* zeroing is needed, since alloc_pages_bulk_array() only fills in non-zero entries */ + /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */ pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL); if (!pages) return 0; diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 7dcebf118a3e..4bb778be4476 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -420,8 +420,8 @@ static int vm_module_tags_populate(void) unsigned long nr; more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; - nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, - NUMA_NO_NODE, more_pages, next_page); + nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages, next_page); if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c index 13e15687675a..830bf3eca4c2 100644 --- a/lib/kunit_iov_iter.c +++ b/lib/kunit_iov_iter.c @@ -57,7 +57,7 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages); *ppages = pages; - got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages); + got = alloc_pages_bulk(GFP_KERNEL, npages, pages); if (got != npages) { release_pages(pages, got); KUNIT_ASSERT_EQ(test, got, npages); diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 4ddf769861ff..f585949ff696 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -373,7 +373,7 @@ vm_map_ram_test(void) if (!pages) return -1; - nr_allocated = alloc_pages_bulk_array(GFP_KERNEL, map_nr_pages, pages); + nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages); if (nr_allocated != map_nr_pages) goto cleanup; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0da6cf950f7b..f83b73236ffe 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2372,7 +2372,7 @@ struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) } EXPORT_SYMBOL(folio_alloc_noprof); -static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, +static unsigned long alloc_pages_bulk_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { @@ -2407,7 +2407,7 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, return total_allocated; } -static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp, +static unsigned long alloc_pages_bulk_weighted_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { @@ -2522,7 +2522,7 @@ static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp, return total_allocated; } -static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, +static unsigned long alloc_pages_bulk_preferred_many(gfp_t gfp, int nid, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { @@ -2548,7 +2548,7 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, * It can accelerate memory allocation especially interleaving * allocate memory. */ -unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, +unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array) { struct mempolicy *pol = &default_policy; @@ -2559,15 +2559,15 @@ unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, pol = get_task_policy(current); if (pol->mode == MPOL_INTERLEAVE) - return alloc_pages_bulk_array_interleave(gfp, pol, + return alloc_pages_bulk_interleave(gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_WEIGHTED_INTERLEAVE) - return alloc_pages_bulk_array_weighted_interleave( + return alloc_pages_bulk_weighted_interleave( gfp, pol, nr_pages, page_array); if (pol->mode == MPOL_PREFERRED_MANY) - return alloc_pages_bulk_array_preferred_many(gfp, + return alloc_pages_bulk_preferred_many(gfp, numa_node_id(), pol, nr_pages, page_array); nid = numa_node_id(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5c88d0e90c20..a6e7acebe9ad 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3562,11 +3562,11 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * but mempolicy wants to alloc memory by interleaving. */ if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) - nr = alloc_pages_bulk_array_mempolicy_noprof(gfp, + nr = alloc_pages_bulk_mempolicy_noprof(gfp, nr_pages_request, pages + nr_allocated); else - nr = alloc_pages_bulk_array_node_noprof(gfp, nid, + nr = alloc_pages_bulk_node_noprof(gfp, nid, nr_pages_request, pages + nr_allocated); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f89cf93f6eb4..8a91c1972dc5 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -532,12 +532,11 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, if (unlikely(pool->alloc.count > 0)) return pool->alloc.cache[--pool->alloc.count]; - /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ + /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, - pool->p.nid, bulk, - (struct page **)pool->alloc.cache); + nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) return 0; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 79879b7d39cb..e7f9c295d13c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -651,8 +651,8 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) if (pages > RPCSVC_MAXPAGES) pages = RPCSVC_MAXPAGES; - ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk_node(GFP_KERNEL, node, pages, + rqstp->rq_pages); return ret == pages; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 43c57124de52..aebc0d8ddff5 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -671,8 +671,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array(GFP_KERNEL, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; -- cgit v1.2.3 From 67e4bb2ced0f2d8fbd414f932daea94ba63ae4c4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 23 Jan 2025 15:16:20 -0800 Subject: net: page_pool: don't try to stash the napi id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Page ppol tried to cache the NAPI ID in page pool info to avoid having a dependency on the life cycle of the NAPI instance. Since commit under Fixes the NAPI ID is not populated until napi_enable() and there's a good chance that page pool is created before NAPI gets enabled. Protect the NAPI pointer with the existing page pool mutex, the reading path already holds it. napi_id itself we need to READ_ONCE(), it's protected by netdev_lock() which are not holding in page pool. Before this patch napi IDs were missing for mlx5: # ./cli.py --spec netlink/specs/netdev.yaml --dump page-pool-get [{'id': 144, 'ifindex': 2, 'inflight': 3072, 'inflight-mem': 12582912}, {'id': 143, 'ifindex': 2, 'inflight': 5568, 'inflight-mem': 22806528}, {'id': 142, 'ifindex': 2, 'inflight': 5120, 'inflight-mem': 20971520}, {'id': 141, 'ifindex': 2, 'inflight': 4992, 'inflight-mem': 20447232}, ... After: [{'id': 144, 'ifindex': 2, 'inflight': 3072, 'inflight-mem': 12582912, 'napi-id': 565}, {'id': 143, 'ifindex': 2, 'inflight': 4224, 'inflight-mem': 17301504, 'napi-id': 525}, {'id': 142, 'ifindex': 2, 'inflight': 4288, 'inflight-mem': 17563648, 'napi-id': 524}, ... Fixes: 86e25f40aa1e ("net: napi: Add napi_config") Reviewed-by: Mina Almasry Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250123231620.1086401-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 1 - net/core/dev.c | 2 +- net/core/page_pool.c | 2 ++ net/core/page_pool_priv.h | 2 ++ net/core/page_pool_user.c | 15 +++++++++------ 5 files changed, 14 insertions(+), 8 deletions(-) (limited to 'net/core/page_pool.c') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ed4cd114180a..7f405672b089 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -237,7 +237,6 @@ struct page_pool { struct { struct hlist_node list; u64 detach_time; - u32 napi_id; u32 id; } user; }; diff --git a/net/core/dev.c b/net/core/dev.c index afa2282f2604..07b2bb1ce64f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6708,7 +6708,7 @@ void napi_resume_irqs(unsigned int napi_id) static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { - napi->napi_id = napi_id; + WRITE_ONCE(napi->napi_id, napi_id); hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a3de752c5178..ed0f89373259 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1147,7 +1147,9 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 57439787b9c2..2fb06d5f6d55 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -7,6 +7,8 @@ #include "netmem_priv.h" +extern struct mutex page_pools_lock; + s32 page_pool_inflight(const struct page_pool *pool, bool strict); int page_pool_list(struct page_pool *pool); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..6677e0c2e256 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,10 +15,11 @@ #include "netdev-genl-gen.h" static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1); -/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user. +/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev, + * pool->user. * Ordering: inside rtnl_lock */ -static DEFINE_MUTEX(page_pools_lock); +DEFINE_MUTEX(page_pools_lock); /* Page pools are only reachable from user space (via netlink) if they are * linked to a netdev at creation time. Following page pool "visibility" @@ -216,6 +218,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; + unsigned int napi_id; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -229,8 +232,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, pool->slow.netdev->ifindex)) goto err_cancel; - if (pool->user.napi_id && - nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) + + napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; + if (napi_id >= MIN_NAPI_ID && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; inflight = page_pool_inflight(pool, false); @@ -319,8 +324,6 @@ int page_pool_list(struct page_pool *pool) if (pool->slow.netdev) { hlist_add_head(&pool->user.list, &pool->slow.netdev->page_pools); - pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; - netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); } -- cgit v1.2.3