diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 37 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 14 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 1 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 72 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 1 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 1 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 492 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 14 |
9 files changed, 595 insertions, 39 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fe103e891e7a..a9a301866b3c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -299,13 +299,28 @@ static inline int do_exception(struct pt_regs *regs, int access, goto out; address = trans_exc_code & __FAIL_ADDR_MASK; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; -retry: down_read(&mm->mmap_sem); +#ifdef CONFIG_PGSTE + if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { + address = __gmap_fault(address, + (struct gmap *) S390_lowcore.gmap); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + } +#endif + +retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) @@ -345,17 +360,18 @@ retry: if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; + down_read(&mm->mmap_sem); goto retry; } } @@ -377,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, int fault; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code @@ -438,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); @@ -571,8 +587,13 @@ static void pfault_interrupt(unsigned int ext_int_code, } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. */ - tsk->thread.pfault_wait = -1; + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } put_task_struct(tsk); } else { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405ca2567..65cb06e2af4e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; result = write ? 0 : _SEGMENT_ENTRY_RO; @@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index a4d856db9154..597bb2d27c3c 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -35,7 +35,7 @@ int arch_prepare_hugepage(struct page *page) if (MACHINE_HAS_HPAGE) return 0; - ptep = (pte_t *) pte_alloc_one(&init_mm, address); + ptep = (pte_t *) pte_alloc_one(&init_mm, addr); if (!ptep) return -ENOMEM; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 59b663109d90..d4b9fb4d0042 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -26,6 +26,7 @@ #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> +#include <linux/export.h> #include <linux/gfp.h> #include <asm/processor.h> #include <asm/system.h> diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 51e5cd9b906a..1cb8427bedfb 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/gfp.h> #include <asm/system.h> /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -85,3 +89,71 @@ int memcpy_real(void *dest, void *src, size_t count) arch_local_irq_restore(flags); return rc; } + +/* + * Copy memory to absolute zero + */ +void copy_to_absolute_zero(void *dest, void *src, size_t count) +{ + unsigned long cr0; + + BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore)); + preempt_disable(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + memcpy_real(dest + store_prefix(), src, count); + __ctl_load(cr0, 0, 0); + preempt_enable(); +} + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f18188..f09c74881b7e 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <asm/pgalloc.h> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed39743b..b36537a5f43e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/hugetlb.h> +#include <asm/cacheflush.h> #include <asm/pgtable.h> static void change_page_attr(unsigned long addr, int numpages, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37a23c223705..301c84d3b542 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/quicklist.h> #include <linux/rcupdate.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -133,30 +134,439 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) } #endif -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +#ifdef CONFIG_PGSTE + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm) { - unsigned int old, new; + struct gmap *gmap; + struct page *page; + unsigned long *table; - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!page) + goto out_free; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, _REGION1_ENTRY_EMPTY); + gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + list_add(&gmap->list, &mm->context.gmap_list); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) +{ + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + if (*table & _SEGMENT_ENTRY_INV) + return 0; + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry(rmap, &mp->mapper, list) { + if (rmap->entry != table) + continue; + list_del(&rmap->list); + kfree(rmap); + break; + } + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + return 1; +} + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); } +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + unsigned long *table; + int i; + + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { + table = (unsigned long *) page_to_phys(page); + if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) + /* Remove gmap rmap structures for segment table. */ + for (i = 0; i < PTRS_PER_PMD; i++, table++) + gmap_unlink_segment(gmap, table); + __free_pages(page, ALLOC_ORDER); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + list_del(&gmap->list); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + /* - * page table entry allocation/free routines. + * gmap_alloc_table is assumed to be called with mmap_sem held */ -#ifdef CONFIG_PGSTE -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) +static int gmap_alloc_table(struct gmap *gmap, + unsigned long *table, unsigned long init) { struct page *page; + unsigned long *new; + + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + if (*table & _REGION_ENTRY_INV) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + } else + __free_pages(page, ALLOC_ORDER); + return 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @addr: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ unsigned long *table; + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the guest addr space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if (*table & _REGION_ENTRY_INV) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Clear segment table entry in guest address space. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INV; + } +out: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_mmap_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * + * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len > PGDIR_SIZE || + from + len < from || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the gmap address space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if ((*table & _REGION_ENTRY_INV) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Store 'from' address in an invalid segment table entry. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; + +out_unmap: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *table, vmaddr, segment; + struct mm_struct *mm; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct vm_area_struct *vma; + struct page *page; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + current->thread.gmap_addr = address; + mm = gmap->mm; + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return -EFAULT; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + + /* Convert the gmap address to an mm address. */ + segment = *table; + if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_RO) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->entry = table; + spin_lock(&mm->page_table_lock); + list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); + /* Set gmap segment table entry to page table. */ + *table = pmd_val(*pmd) & PAGE_MASK; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + struct page *page; + unsigned long *table; + struct gmap_pgtable *mp; page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; + mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); + if (!mp) { + __free_page(page); + return NULL; + } pgtable_page_ctor(page); + mp->vmaddr = vmaddr & PMD_MASK; + INIT_LIST_HEAD(&mp->mapper); + page->index = (unsigned long) mp; atomic_set(&page->_mapcount, 3); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); @@ -167,24 +577,58 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) static inline void page_table_free_pgste(unsigned long *table) { struct page *page; + struct gmap_pgtable *mp; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + BUG_ON(!list_empty(&mp->mapper)); pgtable_page_ctor(page); atomic_set(&page->_mapcount, -1); + kfree(mp); __free_page(page); } -#endif -unsigned long *page_table_alloc(struct mm_struct *mm) +#else /* CONFIG_PGSTE */ + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + return NULL; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ +} + +static inline void gmap_unmap_notifier(struct mm_struct *mm, + unsigned long *table) +{ +} + +#endif /* CONFIG_PGSTE */ + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) { struct page *page; unsigned long *table; unsigned int mask, bit; -#ifdef CONFIG_PGSTE if (mm_has_pgste(mm)) - return page_table_alloc_pgste(mm); -#endif + return page_table_alloc_pgste(mm, vmaddr); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); mask = FRAG_MASK; @@ -222,10 +666,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; -#ifdef CONFIG_PGSTE - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { + gmap_unmap_notifier(mm, table); return page_table_free_pgste(table); -#endif + } /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); @@ -249,10 +693,8 @@ static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; -#ifdef CONFIG_PGSTE if (bit == FRAG_MASK) return page_table_free_pgste(table); -#endif /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); if (atomic_xor_bits(&page->_mapcount, bit) == 0) { @@ -269,13 +711,12 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) unsigned int bit, mask; mm = tlb->mm; -#ifdef CONFIG_PGSTE if (mm_has_pgste(mm)) { + gmap_unmap_notifier(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; } -#endif bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -291,8 +732,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8c1970d1dd91..4799383e2df9 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -61,12 +61,12 @@ static inline pmd_t *vmem_pmd_alloc(void) return pmd; } -static pte_t __ref *vmem_pte_alloc(void) +static pte_t __ref *vmem_pte_alloc(unsigned long address) { pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm); + pte = (pte_t *) page_table_alloc(&init_mm, address); else pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) @@ -120,7 +120,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } #endif if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); + pt_dir = vmem_pte_alloc(address); if (!pt_dir) goto out; pmd_populate(&init_mm, pm_dir, pt_dir); @@ -205,7 +205,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); + pt_dir = vmem_pte_alloc(address); if (!pt_dir) goto out; pmd_populate(&init_mm, pm_dir, pt_dir); @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); |