diff options
author | Nicholas Piggin <npiggin@gmail.com> | 2018-03-07 04:37:12 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2018-03-13 15:43:06 +0300 |
commit | 5709f7cfd8305252dc327206bd674ad65ca4d77f (patch) | |
tree | 3bff567b21774e60411dbc9722211e121983f9c6 /arch/powerpc/mm/slice.c | |
parent | 830fd2d45aa11631325404904d02c7b530d40119 (diff) | |
download | linux-5709f7cfd8305252dc327206bd674ad65ca4d77f.tar.xz |
powerpc/mm/slice: implement a slice mask cache
Calculating the slice mask can become a signifcant overhead for
get_unmapped_area. This patch adds a struct slice_mask for
each page size in the mm_context, and keeps these in synch with
the slices psize arrays and slb_addr_limit.
On Book3S/64 this adds 288 bytes to the mm_context_t for the
slice mask caches.
On POWER8, this increases vfork+exec+exit performance by 9.9%
and reduces time to mmap+munmap a 64kB page by 28%.
Reduces time to mmap+munmap by about 10% on 8xx.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm/slice.c')
-rw-r--r-- | arch/powerpc/mm/slice.c | 112 |
1 files changed, 70 insertions, 42 deletions
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 420d791f0e18..3e199b9cbbfd 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -37,15 +37,6 @@ #include <asm/hugetlb.h> static DEFINE_SPINLOCK(slice_convert_lock); -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; #ifdef DEBUG int _slice_debug = 1; @@ -149,36 +140,39 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, - unsigned long high_limit) +#ifdef CONFIG_PPC_BOOK3S_64 +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) { - unsigned char *hpsizes, *lpsizes; - int index, mask_index; - unsigned long i; - - ret->low_slices = 0; - if (SLICE_NUM_HIGH) - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); - - lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - ret->low_slices |= 1u << i; - } - - if (high_limit <= SLICE_LOW_TOP) - return; - - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - __set_bit(i, ret->high_slices); - } +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &mm->context.mask_64k; +#endif + if (psize == MMU_PAGE_4K) + return &mm->context.mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &mm->context.mask_16m; + if (psize == MMU_PAGE_16G) + return &mm->context.mask_16g; +#endif + BUG(); } +#elif defined(CONFIG_PPC_8xx) +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) +{ + if (psize == mmu_virtual_psize) + return &mm->context.mask_base_psize; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_512K) + return &mm->context.mask_512k; + if (psize == MMU_PAGE_8M) + return &mm->context.mask_8m; +#endif + BUG(); +} +#else +#error "Must define the slice masks for page sizes supported by the platform" +#endif static int slice_check_fit(struct mm_struct *mm, const struct slice_mask *mask, @@ -225,11 +219,15 @@ static void slice_convert(struct mm_struct *mm, int index, mask_index; /* Write the new slice psize bits */ unsigned char *hpsizes, *lpsizes; + struct slice_mask *psize_mask, *old_mask; unsigned long i, flags; + int old_psize; slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); + psize_mask = slice_mask_for_size(mm, psize); + /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ @@ -242,6 +240,14 @@ static void slice_convert(struct mm_struct *mm, mask_index = i & 0x1; index = i >> 1; + + /* Update the slice_mask */ + old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + old_mask->low_slices &= ~(1u << i); + psize_mask->low_slices |= 1u << i; + + /* Update the sizes array */ lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } @@ -253,6 +259,14 @@ static void slice_convert(struct mm_struct *mm, mask_index = i & 0x1; index = i >> 1; + + /* Update the slice_mask */ + old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + __clear_bit(i, old_mask->high_slices); + __set_bit(i, psize_mask->high_slices); + + /* Update the sizes array */ hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } @@ -463,7 +477,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, } if (high_limit > mm->context.slb_addr_limit) { + /* + * Increasing the slb_addr_limit does not require + * slice mask cache to be recalculated because it should + * be already initialised beyond the old address limit. + */ mm->context.slb_addr_limit = high_limit; + on_each_cpu(slice_flush_segments, mm, 1); } @@ -505,7 +525,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask, high_limit); + good_mask = *slice_mask_for_size(mm, psize); slice_print_mask(" good_mask", &good_mask); /* @@ -530,7 +550,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, #ifdef CONFIG_PPC_64K_PAGES /* If we support combo pages, we can allow 64k pages in 4k slices */ if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); + compat_mask = *slice_mask_for_size(mm, MMU_PAGE_4K); if (fixed) slice_or_mask(&good_mask, &compat_mask); } @@ -682,6 +702,7 @@ EXPORT_SYMBOL_GPL(get_slice_psize); void slice_init_new_context_exec(struct mm_struct *mm) { unsigned char *hpsizes, *lpsizes; + struct slice_mask *mask; unsigned int psize = mmu_virtual_psize; slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm); @@ -707,6 +728,14 @@ void slice_init_new_context_exec(struct mm_struct *mm) hpsizes = mm->context.high_slices_psize; memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); + + /* + * Slice mask cache starts zeroed, fill the default size cache. + */ + mask = slice_mask_for_size(mm, psize); + mask->low_slices = ~0UL; + if (SLICE_NUM_HIGH) + bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, @@ -745,18 +774,17 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, { struct slice_mask mask, available; unsigned int psize = mm->context.user_psize; - unsigned long high_limit = mm->context.slb_addr_limit; if (radix_enabled()) return 0; slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available, high_limit); + available = *slice_mask_for_size(mm, psize); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); + compat_mask = *slice_mask_for_size(mm, MMU_PAGE_4K); slice_or_mask(&available, &compat_mask); } #endif |