summaryrefslogtreecommitdiff
path: root/mm/percpu-vm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 15:26:02 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 15:26:02 +0400
commitc798360cd1438090d51eeaa8e67985da11362eba (patch)
tree0107d3b9ee7476264c3357287787d393545bd2d9 /mm/percpu-vm.c
parentb211e9d7c861bdb37b86d6384da9edfb80949ceb (diff)
parent6ae833c7fe0c6ef1f0ab13cc775da230d6f4c256 (diff)
downloadlinux-c798360cd1438090d51eeaa8e67985da11362eba.tar.xz
Merge branch 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu updates from Tejun Heo: "A lot of activities on percpu front. Notable changes are... - percpu allocator now can take @gfp. If @gfp doesn't contain GFP_KERNEL, it tries to allocate from what's already available to the allocator and a work item tries to keep the reserve around certain level so that these atomic allocations usually succeed. This will replace the ad-hoc percpu memory pool used by blk-throttle and also be used by the planned blkcg support for writeback IOs. Please note that I noticed a bug in how @gfp is interpreted while preparing this pull request and applied the fix 6ae833c7fe0c ("percpu: fix how @gfp is interpreted by the percpu allocator") just now. - percpu_ref now uses longs for percpu and global counters instead of ints. It leads to more sparse packing of the percpu counters on 64bit machines but the overhead should be negligible and this allows using percpu_ref for refcnting pages and in-memory objects directly. - The switching between percpu and single counter modes of a percpu_ref is made independent of putting the base ref and a percpu_ref can now optionally be initialized in single or killed mode. This allows avoiding percpu shutdown latency for cases where the refcounted objects may be synchronously created and destroyed in rapid succession with only a fraction of them reaching fully operational status (SCSI probing does this when combined with blk-mq support). It's also planned to be used to implement forced single mode to detect underflow more timely for debugging. There's a separate branch percpu/for-3.18-consistent-ops which cleans up the duplicate percpu accessors. That branch causes a number of conflicts with s390 and other trees. I'll send a separate pull request w/ resolutions once other branches are merged" * 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (33 commits) percpu: fix how @gfp is interpreted by the percpu allocator blk-mq, percpu_ref: start q->mq_usage_counter in atomic mode percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky percpu_ref: add PERCPU_REF_INIT_* flags percpu_ref: decouple switching to percpu mode and reinit percpu_ref: decouple switching to atomic mode and killing percpu_ref: add PCPU_REF_DEAD percpu_ref: rename things to prepare for decoupling percpu/atomic mode switch percpu_ref: replace pcpu_ prefix with percpu_ percpu_ref: minor code and comment updates percpu_ref: relocate percpu_ref_reinit() Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe" Revert "percpu: free percpu allocation info for uniprocessor system" percpu-refcount: make percpu_ref based on longs instead of ints percpu-refcount: improve WARN messages percpu: fix locking regression in the failure path of pcpu_alloc() percpu-refcount: add @gfp to percpu_ref_init() proportions: add @gfp to init functions percpu_counter: add @gfp to percpu_counter_init() percpu_counter: make percpu_counters_lock irq-safe ...
Diffstat (limited to 'mm/percpu-vm.c')
-rw-r--r--mm/percpu-vm.c162
1 files changed, 35 insertions, 127 deletions
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 51108165f829..538998a137d2 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -20,46 +20,25 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
}
/**
- * pcpu_get_pages_and_bitmap - get temp pages array and bitmap
+ * pcpu_get_pages - get temp pages array
* @chunk: chunk of interest
- * @bitmapp: output parameter for bitmap
- * @may_alloc: may allocate the array
*
- * Returns pointer to array of pointers to struct page and bitmap,
- * both of which can be indexed with pcpu_page_idx(). The returned
- * array is cleared to zero and *@bitmapp is copied from
- * @chunk->populated. Note that there is only one array and bitmap
- * and access exclusion is the caller's responsibility.
- *
- * CONTEXT:
- * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc.
- * Otherwise, don't care.
+ * Returns pointer to array of pointers to struct page which can be indexed
+ * with pcpu_page_idx(). Note that there is only one array and accesses
+ * should be serialized by pcpu_alloc_mutex.
*
* RETURNS:
- * Pointer to temp pages array on success, NULL on failure.
+ * Pointer to temp pages array on success.
*/
-static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
- unsigned long **bitmapp,
- bool may_alloc)
+static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
{
static struct page **pages;
- static unsigned long *bitmap;
size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
- size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
- sizeof(unsigned long);
-
- if (!pages || !bitmap) {
- if (may_alloc && !pages)
- pages = pcpu_mem_zalloc(pages_size);
- if (may_alloc && !bitmap)
- bitmap = pcpu_mem_zalloc(bitmap_size);
- if (!pages || !bitmap)
- return NULL;
- }
- bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
+ lockdep_assert_held(&pcpu_alloc_mutex);
- *bitmapp = bitmap;
+ if (!pages)
+ pages = pcpu_mem_zalloc(pages_size);
return pages;
}
@@ -67,7 +46,6 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
* pcpu_free_pages - free pages which were allocated for @chunk
* @chunk: chunk pages were allocated for
* @pages: array of pages to be freed, indexed by pcpu_page_idx()
- * @populated: populated bitmap
* @page_start: page index of the first page to be freed
* @page_end: page index of the last page to be freed + 1
*
@@ -75,8 +53,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
* The pages were allocated for @chunk.
*/
static void pcpu_free_pages(struct pcpu_chunk *chunk,
- struct page **pages, unsigned long *populated,
- int page_start, int page_end)
+ struct page **pages, int page_start, int page_end)
{
unsigned int cpu;
int i;
@@ -95,7 +72,6 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
* pcpu_alloc_pages - allocates pages for @chunk
* @chunk: target chunk
* @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
- * @populated: populated bitmap
* @page_start: page index of the first page to be allocated
* @page_end: page index of the last page to be allocated + 1
*
@@ -104,8 +80,7 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
* content of @pages and will pass it verbatim to pcpu_map_pages().
*/
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
- struct page **pages, unsigned long *populated,
- int page_start, int page_end)
+ struct page **pages, int page_start, int page_end)
{
const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
unsigned int cpu, tcpu;
@@ -164,7 +139,6 @@ static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
* pcpu_unmap_pages - unmap pages out of a pcpu_chunk
* @chunk: chunk of interest
* @pages: pages array which can be used to pass information to free
- * @populated: populated bitmap
* @page_start: page index of the first page to unmap
* @page_end: page index of the last page to unmap + 1
*
@@ -175,8 +149,7 @@ static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
* proper pre/post flush functions.
*/
static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
- struct page **pages, unsigned long *populated,
- int page_start, int page_end)
+ struct page **pages, int page_start, int page_end)
{
unsigned int cpu;
int i;
@@ -192,8 +165,6 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
page_end - page_start);
}
-
- bitmap_clear(populated, page_start, page_end - page_start);
}
/**
@@ -228,7 +199,6 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
* pcpu_map_pages - map pages into a pcpu_chunk
* @chunk: chunk of interest
* @pages: pages array containing pages to be mapped
- * @populated: populated bitmap
* @page_start: page index of the first page to map
* @page_end: page index of the last page to map + 1
*
@@ -236,13 +206,11 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
* caller is responsible for calling pcpu_post_map_flush() after all
* mappings are complete.
*
- * This function is responsible for setting corresponding bits in
- * @chunk->populated bitmap and whatever is necessary for reverse
- * lookup (addr -> chunk).
+ * This function is responsible for setting up whatever is necessary for
+ * reverse lookup (addr -> chunk).
*/
static int pcpu_map_pages(struct pcpu_chunk *chunk,
- struct page **pages, unsigned long *populated,
- int page_start, int page_end)
+ struct page **pages, int page_start, int page_end)
{
unsigned int cpu, tcpu;
int i, err;
@@ -253,18 +221,12 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
page_end - page_start);
if (err < 0)
goto err;
- }
- /* mapping successful, link chunk and mark populated */
- for (i = page_start; i < page_end; i++) {
- for_each_possible_cpu(cpu)
+ for (i = page_start; i < page_end; i++)
pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
chunk);
- __set_bit(i, populated);
}
-
return 0;
-
err:
for_each_possible_cpu(tcpu) {
if (tcpu == cpu)
@@ -299,123 +261,69 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
/**
* pcpu_populate_chunk - populate and map an area of a pcpu_chunk
* @chunk: chunk of interest
- * @off: offset to the area to populate
- * @size: size of the area to populate in bytes
+ * @page_start: the start page
+ * @page_end: the end page
*
* For each cpu, populate and map pages [@page_start,@page_end) into
- * @chunk. The area is cleared on return.
+ * @chunk.
*
* CONTEXT:
* pcpu_alloc_mutex, does GFP_KERNEL allocation.
*/
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end)
{
- int page_start = PFN_DOWN(off);
- int page_end = PFN_UP(off + size);
- int free_end = page_start, unmap_end = page_start;
struct page **pages;
- unsigned long *populated;
- unsigned int cpu;
- int rs, re, rc;
-
- /* quick path, check whether all pages are already there */
- rs = page_start;
- pcpu_next_pop(chunk, &rs, &re, page_end);
- if (rs == page_start && re == page_end)
- goto clear;
- /* need to allocate and map pages, this chunk can't be immutable */
- WARN_ON(chunk->immutable);
-
- pages = pcpu_get_pages_and_bitmap(chunk, &populated, true);
+ pages = pcpu_get_pages(chunk);
if (!pages)
return -ENOMEM;
- /* alloc and map */
- pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
- rc = pcpu_alloc_pages(chunk, pages, populated, rs, re);
- if (rc)
- goto err_free;
- free_end = re;
- }
+ if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
+ return -ENOMEM;
- pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
- rc = pcpu_map_pages(chunk, pages, populated, rs, re);
- if (rc)
- goto err_unmap;
- unmap_end = re;
+ if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
+ pcpu_free_pages(chunk, pages, page_start, page_end);
+ return -ENOMEM;
}
pcpu_post_map_flush(chunk, page_start, page_end);
- /* commit new bitmap */
- bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
-clear:
- for_each_possible_cpu(cpu)
- memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
return 0;
-
-err_unmap:
- pcpu_pre_unmap_flush(chunk, page_start, unmap_end);
- pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end)
- pcpu_unmap_pages(chunk, pages, populated, rs, re);
- pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end);
-err_free:
- pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end)
- pcpu_free_pages(chunk, pages, populated, rs, re);
- return rc;
}
/**
* pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
* @chunk: chunk to depopulate
- * @off: offset to the area to depopulate
- * @size: size of the area to depopulate in bytes
+ * @page_start: the start page
+ * @page_end: the end page
*
* For each cpu, depopulate and unmap pages [@page_start,@page_end)
- * from @chunk. If @flush is true, vcache is flushed before unmapping
- * and tlb after.
+ * from @chunk.
*
* CONTEXT:
* pcpu_alloc_mutex.
*/
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end)
{
- int page_start = PFN_DOWN(off);
- int page_end = PFN_UP(off + size);
struct page **pages;
- unsigned long *populated;
- int rs, re;
-
- /* quick path, check whether it's empty already */
- rs = page_start;
- pcpu_next_unpop(chunk, &rs, &re, page_end);
- if (rs == page_start && re == page_end)
- return;
-
- /* immutable chunks can't be depopulated */
- WARN_ON(chunk->immutable);
/*
* If control reaches here, there must have been at least one
* successful population attempt so the temp pages array must
* be available now.
*/
- pages = pcpu_get_pages_and_bitmap(chunk, &populated, false);
+ pages = pcpu_get_pages(chunk);
BUG_ON(!pages);
/* unmap and free */
pcpu_pre_unmap_flush(chunk, page_start, page_end);
- pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
- pcpu_unmap_pages(chunk, pages, populated, rs, re);
+ pcpu_unmap_pages(chunk, pages, page_start, page_end);
/* no need to flush tlb, vmalloc will handle it lazily */
- pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
- pcpu_free_pages(chunk, pages, populated, rs, re);
-
- /* commit new bitmap */
- bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
+ pcpu_free_pages(chunk, pages, page_start, page_end);
}
static struct pcpu_chunk *pcpu_create_chunk(void)