summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/hash_utils_64.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2014-12-04 08:30:14 +0300
committerMichael Ellerman <mpe@ellerman.id.au>2014-12-05 08:26:15 +0300
commitaefa5688c070727b8729de1aef85cad7b9933fc7 (patch)
treed6036007b0eca0b325aa3f050defb00f89341209 /arch/powerpc/mm/hash_utils_64.c
parentabb90ee7bca5af977b90a0c6be44f631fdacc932 (diff)
downloadlinux-aefa5688c070727b8729de1aef85cad7b9933fc7.tar.xz
powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault
upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm/hash_utils_64.c')
-rw-r--r--arch/powerpc/mm/hash_utils_64.c44
1 files changed, 28 insertions, 16 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 68211d398fdb..e56a307bc676 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
* -1 - critical hash insertion error
* -2 - access not permitted by subpage protection mechanism
*/
-int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap,
+ unsigned long flags)
{
enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
@@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
pte_t *ptep;
unsigned hugeshift;
const struct cpumask *tmp;
- int rc, user_region = 0, local = 0;
+ int rc, user_region = 0;
int psize, ssize;
DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
@@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
/* Check CPU locality */
tmp = cpumask_of(smp_processor_id());
if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
- local = 1;
+ flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
/* If we use 4K pages and our psize is not 4K, then we might
@@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
if (hugeshift) {
if (pmd_trans_huge(*(pmd_t *)ptep))
rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
- trap, local, ssize, psize);
+ trap, flags, ssize, psize);
#ifdef CONFIG_HUGETLB_PAGE
else
rc = __hash_page_huge(ea, access, vsid, ptep, trap,
- local, ssize, hugeshift, psize);
+ flags, ssize, hugeshift, psize);
#else
else {
/*
@@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
#ifdef CONFIG_PPC_HAS_HASH_64K
if (psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ flags, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
{
@@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
rc = -2;
else
rc = __hash_page_4K(ea, access, vsid, ptep, trap,
- local, ssize, spp);
+ flags, ssize, spp);
}
/* Dump some info in case of hash insertion failure, they should
@@ -1181,14 +1184,19 @@ bail:
}
EXPORT_SYMBOL_GPL(hash_page_mm);
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+ unsigned long dsisr)
{
+ unsigned long flags = 0;
struct mm_struct *mm = current->mm;
if (REGION_ID(ea) == VMALLOC_REGION_ID)
mm = &init_mm;
- return hash_page_mm(mm, ea, access, trap);
+ if (dsisr & DSISR_NOHPTE)
+ flags |= HPTE_NOHPTE_UPDATE;
+
+ return hash_page_mm(mm, ea, access, trap, flags);
}
EXPORT_SYMBOL_GPL(hash_page);
@@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
pgd_t *pgdir;
pte_t *ptep;
unsigned long flags;
- int rc, ssize, local = 0;
+ int rc, ssize, update_flags = 0;
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
@@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Is that local to this CPU ? */
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- local = 1;
+ update_flags |= HPTE_LOCAL_UPDATE;
/* Hash it in */
#ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ update_flags, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
- subpage_protection(mm, ea));
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
+ ssize, subpage_protection(mm, ea));
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
@@ -1278,9 +1287,10 @@ out_exit:
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
- int local)
+ unsigned long flags)
{
unsigned long hash, index, shift, hidx, slot;
+ int local = flags & HPTE_LOCAL_UPDATE;
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
@@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
- pmd_t *pmdp, unsigned int psize, int ssize, int local)
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags)
{
int i, max_hpte_count, valid;
unsigned long s_addr;
unsigned char *hpte_slot_array;
unsigned long hidx, shift, vpn, hash, slot;
+ int local = flags & HPTE_LOCAL_UPDATE;
s_addr = addr & HPAGE_PMD_MASK;
hpte_slot_array = get_hpte_slot_array(pmdp);