summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/hash_utils_64.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2013-06-20 13:00:22 +0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-06-21 10:01:56 +0400
commit0ac52dd7666d5c0d0147d73a8e4b1d1ffd81cdf3 (patch)
tree124824b5deed8f2ee0c0c56305283496099a4774 /arch/powerpc/mm/hash_utils_64.c
parent6d492ecc6489113968ec269be1cf88942d4a5d29 (diff)
downloadlinux-0ac52dd7666d5c0d0147d73a8e4b1d1ffd81cdf3.tar.xz
powerpc: Make linux pagetable walk safe with THP enabled
We need to have irqs disabled to handle all the possible parallel update for linux page table without holding locks. Events that we are intersted in while walking page tables are 1) Page fault 2) umap 3) THP split 4) THP collapse A) local_irq_disabled: ------------------------ 1) page fault: A none to valid transition via page fault is not an issue because we would either see a none or valid. If it is none, we would error out the page table walk. We may need to use on stack values when checking for type of page table elements, because if we do if (!is_hugepd()) { if (!pmd_none() { if (pmd_bad() { We could take that bad condition because the pmd got converted to a hugepd after the !is_hugepd check via a hugetlb fault. The right way would be to check for pmd_none higher up or use on stack value. 2) A valid to none conversion via unmap: We can safely walk the upper level table, because we don't remove the the page table entries until rcu grace period. So even if we followed a wrong pointer we still have the pointer valid till the grace period. A PTE pointer returned need to be atomically checked for _PAGE_PRESENT and _PAGE_BUSY. A valid pointer returned could becoming none later. To prevent pte_clear we take _PAGE_BUSY. 3) THP split: A valid transparent hugepage is converted to nomal page. Before we split we do pmd_splitting_flush, which sets the hugepage PTE to _PAGE_SPLITTING So when walking page table we need to check for pmd_trans_splitting and handle that. The pte returned should also need to be checked for _PAGE_SPLITTING before setting _PAGE_BUSY similar to _PAGE_PRESENT. We save the value of PTE on stack and check for the flag in the local pte value. If we don't have the value set we can safely operate on the local pte value and we atomicaly set _PAGE_BUSY. 4) THP collapse: A normal page gets converted to hugepage. In the collapse path, we mark the pmd none early (pmdp_clear_flush). With irq disabled, if we are aleady walking page table we would see the pmd_none and won't continue. If we see a valid PMD, we should still check for _PAGE_PRESENT before setting _PAGE_BUSY, to make sure we didn't collapse the PTE to a Huge PTE. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/hash_utils_64.c')
-rw-r--r--arch/powerpc/mm/hash_utils_64.c27
1 files changed, 15 insertions, 12 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7a81e866e7b1..845231643987 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1180,13 +1180,25 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
pgdir = mm->pgd;
if (pgdir == NULL)
return;
+
+ /* Get VSID */
+ ssize = user_segment_size(ea);
+ vsid = get_vsid(mm->context.id, ea, ssize);
+ if (!vsid)
+ return;
+ /*
+ * Hash doesn't like irqs. Walking linux page table with irq disabled
+ * saves us from holding multiple locks.
+ */
+ local_irq_save(flags);
+
/*
* THP pages use update_mmu_cache_pmd. We don't do
* hash preload there. Hence can ignore THP here
*/
ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift);
if (!ptep)
- return;
+ goto out_exit;
WARN_ON(hugepage_shift);
#ifdef CONFIG_PPC_64K_PAGES
@@ -1197,18 +1209,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
* page size demotion here
*/
if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
- return;
+ goto out_exit;
#endif /* CONFIG_PPC_64K_PAGES */
- /* Get VSID */
- ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
- if (!vsid)
- return;
-
- /* Hash doesn't like irqs */
- local_irq_save(flags);
-
/* Is that local to this CPU ? */
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
local = 1;
@@ -1230,7 +1233,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
mm->context.user_psize,
mm->context.user_psize,
pte_val(*ptep));
-
+out_exit:
local_irq_restore(flags);
}