diff options
author | David S. Miller <davem@davemloft.net> | 2020-01-03 02:37:53 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-01-03 02:37:53 +0300 |
commit | c9a2069b1dbadf7cb85da0fd9df62d25e3642f03 (patch) | |
tree | 9284b79bba0aa682bb0019f1165a5fb50c6edace | |
parent | fe23d63422c83cd7c8154dc7faef6af97be4b948 (diff) | |
parent | f13fc10785bc04f83fe0d7730beb8ff4be563a20 (diff) | |
download | linux-c9a2069b1dbadf7cb85da0fd9df62d25e3642f03.tar.xz |
Merge branch 'page_pool-NUMA-node-handling-fixes'
Jesper Dangaard Brouer says:
====================
page_pool: NUMA node handling fixes
The recently added NUMA changes (merged for v5.5) to page_pool, it both
contains a bug in handling NUMA_NO_NODE condition, and added code to
the fast-path.
This patchset fixes the bug and moves code out of fast-path. The first
patch contains a fix that should be considered for 5.5. The second
patch reduce code size and overhead in case CONFIG_NUMA is disabled.
Currently the NUMA_NO_NODE setting bug only affects driver 'ti_cpsw'
(drivers/net/ethernet/ti/), but after this patchset, we plan to move
other drivers (netsec and mvneta) to use NUMA_NO_NODE setting.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/core/page_pool.c | 89 |
1 files changed, 70 insertions, 19 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a6aefe989043..9b7cbe35df37 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -96,10 +96,65 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); +static void __page_pool_return_page(struct page_pool *pool, struct page *page); + +noinline +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, + bool refill) +{ + struct ptr_ring *r = &pool->ring; + struct page *page; + int pref_nid; /* preferred NUMA node */ + + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Softirq guarantee CPU and thus NUMA node is stable. This, + * assumes CPU refilling driver RX-ring will also run RX-NAPI. + */ +#ifdef CONFIG_NUMA + pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; +#else + /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ + pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ +#endif + + /* Slower-path: Get pages from locked ring queue */ + spin_lock(&r->consumer_lock); + + /* Refill alloc array, but only if NUMA match */ + do { + page = __ptr_ring_consume(r); + if (unlikely(!page)) + break; + + if (likely(page_to_nid(page) == pref_nid)) { + pool->alloc.cache[pool->alloc.count++] = page; + } else { + /* NUMA mismatch; + * (1) release 1 page to page-allocator and + * (2) break out to fallthrough to alloc_pages_node. + * This limit stress on page buddy alloactor. + */ + __page_pool_return_page(pool, page); + page = NULL; + break; + } + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL && + refill); + + /* Return last page */ + if (likely(pool->alloc.count > 0)) + page = pool->alloc.cache[--pool->alloc.count]; + + spin_unlock(&r->consumer_lock); + return page; +} + /* fast path */ static struct page *__page_pool_get_cached(struct page_pool *pool) { - struct ptr_ring *r = &pool->ring; bool refill = false; struct page *page; @@ -113,20 +168,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) refill = true; } - /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) - return NULL; - - /* Slow-path: Get page from locked ring queue, - * refill alloc array if requested. - */ - spin_lock(&r->consumer_lock); - page = __ptr_ring_consume(r); - if (refill) - pool->alloc.count = __ptr_ring_consume_batched(r, - pool->alloc.cache, - PP_ALLOC_CACHE_REFILL); - spin_unlock(&r->consumer_lock); + page = page_pool_refill_alloc_cache(pool, refill); return page; } @@ -163,7 +205,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ /* Cache was empty, do real allocation */ +#ifdef CONFIG_NUMA page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); +#else + page = alloc_pages(gfp, pool->p.order); +#endif if (!page) return NULL; @@ -311,13 +357,10 @@ static bool __page_pool_recycle_direct(struct page *page, /* page is NOT reusable when: * 1) allocated when system is under some pressure. (page_is_pfmemalloc) - * 2) belongs to a different NUMA node than pool->p.nid. - * - * To update pool->p.nid users must call page_pool_update_nid. */ static bool pool_page_reusable(struct page_pool *pool, struct page *page) { - return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid; + return !page_is_pfmemalloc(page); } void __page_pool_put_page(struct page_pool *pool, struct page *page, @@ -484,7 +527,15 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { + struct page *page; + trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; + + /* Flush pool alloc cache, as refill will check NUMA node */ + while (pool->alloc.count) { + page = pool->alloc.cache[--pool->alloc.count]; + __page_pool_return_page(pool, page); + } } EXPORT_SYMBOL(page_pool_update_nid); |