diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2020-09-17 21:12:22 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2020-09-18 16:31:45 +0300 |
commit | 5dee5872f87552cfb173c899d35fc1413c2aa77f (patch) | |
tree | 273a8551247785dbec22d1e7c55e2b636e1c9e8e /arch/x86/mm/fault.c | |
parent | c0a6b5ecc5b7dd028c2921415ea036074a8f8b00 (diff) | |
parent | 376ceb31ff879c2ee6b48eef841d6fa7720f6f43 (diff) | |
download | linux-5dee5872f87552cfb173c899d35fc1413c2aa77f.tar.xz |
Merge branch 'mlx5_active_speed' into rdma.git for-next
Leon Romanovsky says:
====================
IBTA declares speed as 16 bits, but kernel stores it in u8. This series
fixes in-kernel declaration while keeping external interface intact.
====================
Based on the mlx5-next branch at
git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
due to dependencies.
* branch 'mlx5_active_speed':
RDMA: Fix link active_speed size
RDMA/mlx5: Delete duplicated mlx5_ptys_width enum
net/mlx5: Refactor query port speed functions
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 35f1498e9832..6e3e8a124903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } +/* + * Handle a fault on the vmalloc or module mapping area + * + * This is needed because there is a race condition between the time + * when the vmalloc mapping code updates the PMD to the point in time + * where it synchronizes this update with the other page-tables in the + * system. + * + * In this race window another thread/CPU can map an area on the same + * PMD, finds it already present and does not synchronize it with the + * rest of the system yet. As a result v[mz]alloc might return areas + * which are not mapped in every page-table in the system, causing an + * unhandled page-fault when they are accessed. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3_pa(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + if (pmd_large(*pmd_k)) + return 0; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} +NOKPROBE_SYMBOL(vmalloc_fault); + void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); +#ifdef CONFIG_X86_32 + /* + * We can fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * Before doing this on-demand faulting, ensure that the + * fault is not any of the following: + * 1. A fault on a PTE with a reserved bit set. + * 2. A fault caused by a user-mode access. (Do not demand- + * fault kernel memory due to user-mode accesses). + * 3. A fault caused by a page-level protection violation. + * (A demand fault would be on a non-present page which + * would have X86_PF_PROT==0). + * + * This is only needed to close a race condition on x86-32 in + * the vmalloc mapping/unmapping code. See the comment above + * vmalloc_fault() for details. On x86-64 the race does not + * exist as the vmalloc mappings don't need to be synchronized + * there. + */ + if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + } +#endif + /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; |