summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c37
-rw-r--r--arch/s390/mm/gup.c14
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/maccess.c72
-rw-r--r--arch/s390/mm/mmap.c1
-rw-r--r--arch/s390/mm/pageattr.c1
-rw-r--r--arch/s390/mm/pgtable.c492
-rw-r--r--arch/s390/mm/vmem.c14
9 files changed, 595 insertions, 39 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fe103e891e7a..a9a301866b3c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -299,13 +299,28 @@ static inline int do_exception(struct pt_regs *regs, int access,
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
-retry:
down_read(&mm->mmap_sem);
+#ifdef CONFIG_PGSTE
+ if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
+ address = __gmap_fault(address,
+ (struct gmap *) S390_lowcore.gmap);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (address == -ENOMEM) {
+ fault = VM_FAULT_OOM;
+ goto out_up;
+ }
+ }
+#endif
+
+retry:
fault = VM_FAULT_BADMAP;
vma = find_vma(mm, address);
if (!vma)
@@ -345,17 +360,18 @@ retry:
if (flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
regs, address);
} else {
tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
regs, address);
}
if (fault & VM_FAULT_RETRY) {
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ down_read(&mm->mmap_sem);
goto retry;
}
}
@@ -377,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
int fault;
/* Protection exception is suppressing, decrement psw address. */
- regs->psw.addr -= (pgm_int_code >> 16);
+ regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16);
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
@@ -438,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
struct pt_regs regs;
int access, fault;
- regs.psw.mask = psw_kernel_bits;
+ regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
if (!irqs_disabled())
regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
regs.psw.addr = (unsigned long) __builtin_return_address(0);
@@ -571,8 +587,13 @@ static void pfault_interrupt(unsigned int ext_int_code,
} else {
/* Completion interrupt was faster than initial
* interrupt. Set pfault_wait to -1 so the initial
- * interrupt doesn't put the task to sleep. */
- tsk->thread.pfault_wait = -1;
+ * interrupt doesn't put the task to sleep.
+ * If the task is not running, ignore the completion
+ * interrupt since it must be a leftover of a PFAULT
+ * CANCEL operation which didn't remove all pending
+ * completion interrupts. */
+ if (tsk->state == TASK_RUNNING)
+ tsk->thread.pfault_wait = -1;
}
put_task_struct(tsk);
} else {
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 45b405ca2567..65cb06e2af4e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
- struct page *head, *page;
+ struct page *head, *page, *tail;
int refs;
result = write ? 0 : _SEGMENT_ENTRY_RO;
@@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
@@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
*nr -= refs;
while (refs--)
put_page(head);
+ return 0;
+ }
+
+ /*
+ * Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
}
return 1;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index a4d856db9154..597bb2d27c3c 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -35,7 +35,7 @@ int arch_prepare_hugepage(struct page *page)
if (MACHINE_HAS_HPAGE)
return 0;
- ptep = (pte_t *) pte_alloc_one(&init_mm, address);
+ ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
if (!ptep)
return -ENOMEM;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 59b663109d90..d4b9fb4d0042 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/initrd.h>
+#include <linux/export.h>
#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/system.h>
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 51e5cd9b906a..1cb8427bedfb 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/gfp.h>
#include <asm/system.h>
/*
@@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size)
return copied < 0 ? -EFAULT : 0;
}
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
int memcpy_real(void *dest, void *src, size_t count)
{
register unsigned long _dest asm("2") = (unsigned long) dest;
@@ -85,3 +89,71 @@ int memcpy_real(void *dest, void *src, size_t count)
arch_local_irq_restore(flags);
return rc;
}
+
+/*
+ * Copy memory to absolute zero
+ */
+void copy_to_absolute_zero(void *dest, void *src, size_t count)
+{
+ unsigned long cr0;
+
+ BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore));
+ preempt_disable();
+ __ctl_store(cr0, 0, 0);
+ __ctl_clear_bit(0, 28); /* disable lowcore protection */
+ memcpy_real(dest + store_prefix(), src, count);
+ __ctl_load(cr0, 0, 0);
+ preempt_enable();
+}
+
+/*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+int copy_to_user_real(void __user *dest, void *src, size_t count)
+{
+ int offs = 0, size, rc;
+ char *buf;
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ rc = -EFAULT;
+ while (offs < count) {
+ size = min(PAGE_SIZE, count - offs);
+ if (memcpy_real(buf, src + offs, size))
+ goto out;
+ if (copy_to_user(dest + offs, buf, size))
+ goto out;
+ offs += size;
+ }
+ rc = 0;
+out:
+ free_page((unsigned long) buf);
+ return rc;
+}
+
+/*
+ * Copy memory from user (virtual) to kernel (real)
+ */
+int copy_from_user_real(void *dest, void __user *src, size_t count)
+{
+ int offs = 0, size, rc;
+ char *buf;
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ rc = -EFAULT;
+ while (offs < count) {
+ size = min(PAGE_SIZE, count - offs);
+ if (copy_from_user(buf, src + offs, size))
+ goto out;
+ if (memcpy_real(dest + offs, buf, size))
+ goto out;
+ offs += size;
+ }
+ rc = 0;
+out:
+ free_page((unsigned long) buf);
+ return rc;
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c9a9f7f18188..f09c74881b7e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -26,6 +26,7 @@
#include <linux/personality.h>
#include <linux/mm.h>
+#include <linux/mman.h>
#include <linux/module.h>
#include <linux/random.h>
#include <asm/pgalloc.h>
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index d013ed39743b..b36537a5f43e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <asm/cacheflush.h>
#include <asm/pgtable.h>
static void change_page_attr(unsigned long addr, int numpages,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 37a23c223705..301c84d3b542 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,5 +1,5 @@
/*
- * Copyright IBM Corp. 2007,2009
+ * Copyright IBM Corp. 2007,2011
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/quicklist.h>
#include <linux/rcupdate.h>
+#include <linux/slab.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -133,30 +134,439 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
}
#endif
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+#ifdef CONFIG_PGSTE
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm)
{
- unsigned int old, new;
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ gmap->mm = mm;
+ page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ if (!page)
+ goto out_free;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ gmap->table = table;
+ gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ list_add(&gmap->list, &mm->context.gmap_list);
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
+{
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct page *page;
+
+ if (*table & _SEGMENT_ENTRY_INV)
+ return 0;
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ list_for_each_entry(rmap, &mp->mapper, list) {
+ if (rmap->entry != table)
+ continue;
+ list_del(&rmap->list);
+ kfree(rmap);
+ break;
+ }
+ *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ return 1;
+}
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte((unsigned long) gmap->table |
+ _ASCE_TYPE_REGION1);
+ else
+ __tlb_flush_global();
}
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+ unsigned long *table;
+ int i;
+
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte((unsigned long) gmap->table |
+ _ASCE_TYPE_REGION1);
+ else
+ __tlb_flush_global();
+
+ /* Free all segment & region tables. */
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
+ table = (unsigned long *) page_to_phys(page);
+ if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
+ /* Remove gmap rmap structures for segment table. */
+ for (i = 0; i < PTRS_PER_PMD; i++, table++)
+ gmap_unlink_segment(gmap, table);
+ __free_pages(page, ALLOC_ORDER);
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ list_del(&gmap->list);
+ kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
/*
- * page table entry allocation/free routines.
+ * gmap_alloc_table is assumed to be called with mmap_sem held
*/
-#ifdef CONFIG_PGSTE
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+static int gmap_alloc_table(struct gmap *gmap,
+ unsigned long *table, unsigned long init)
{
struct page *page;
+ unsigned long *new;
+
+ page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ if (*table & _REGION_ENTRY_INV) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ } else
+ __free_pages(page, ALLOC_ORDER);
+ return 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @addr: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
unsigned long *table;
+ unsigned long off;
+ int flush;
+
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Walk the guest addr space page table */
+ table = gmap->table + (((to + off) >> 53) & 0x7ff);
+ if (*table & _REGION_ENTRY_INV)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 42) & 0x7ff);
+ if (*table & _REGION_ENTRY_INV)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 31) & 0x7ff);
+ if (*table & _REGION_ENTRY_INV)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 20) & 0x7ff);
+
+ /* Clear segment table entry in guest address space. */
+ flush |= gmap_unlink_segment(gmap, table);
+ *table = _SEGMENT_ENTRY_INV;
+ }
+out:
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_mmap_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ *
+ * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long *table;
+ unsigned long off;
+ int flush;
+
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len > PGDIR_SIZE ||
+ from + len < from || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Walk the gmap address space page table */
+ table = gmap->table + (((to + off) >> 53) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INV) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 42) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INV) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 31) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INV) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 20) & 0x7ff);
+
+ /* Store 'from' address in an invalid segment table entry. */
+ flush |= gmap_unlink_segment(gmap, table);
+ *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+
+out_unmap:
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *table, vmaddr, segment;
+ struct mm_struct *mm;
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct vm_area_struct *vma;
+ struct page *page;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ current->thread.gmap_addr = address;
+ mm = gmap->mm;
+ /* Walk the gmap address space page table */
+ table = gmap->table + ((address >> 53) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV))
+ return -EFAULT;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 42) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV))
+ return -EFAULT;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 31) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV))
+ return -EFAULT;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 20) & 0x7ff);
+
+ /* Convert the gmap address to an mm address. */
+ segment = *table;
+ if (likely(!(segment & _SEGMENT_ENTRY_INV))) {
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ return mp->vmaddr | (address & ~PMD_MASK);
+ } else if (segment & _SEGMENT_ENTRY_RO) {
+ vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
+ vma = find_vma(mm, vmaddr);
+ if (!vma || vma->vm_start > vmaddr)
+ return -EFAULT;
+
+ /* Walk the parent mm page table */
+ pgd = pgd_offset(mm, vmaddr);
+ pud = pud_alloc(mm, pgd, vmaddr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = pmd_alloc(mm, pud, vmaddr);
+ if (!pmd)
+ return -ENOMEM;
+ if (!pmd_present(*pmd) &&
+ __pte_alloc(mm, vma, pmd, vmaddr))
+ return -ENOMEM;
+ /* pmd now points to a valid segment table entry. */
+ rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
+ if (!rmap)
+ return -ENOMEM;
+ /* Link gmap segment table entry location to page table. */
+ page = pmd_page(*pmd);
+ mp = (struct gmap_pgtable *) page->index;
+ rmap->entry = table;
+ spin_lock(&mm->page_table_lock);
+ list_add(&rmap->list, &mp->mapper);
+ spin_unlock(&mm->page_table_lock);
+ /* Set gmap segment table entry to page table. */
+ *table = pmd_val(*pmd) & PAGE_MASK;
+ return vmaddr | (address & ~PMD_MASK);
+ }
+ return -EFAULT;
+}
+
+unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_fault(address, gmap);
+ up_read(&gmap->mm->mmap_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+{
+
+ unsigned long *table, address, size;
+ struct vm_area_struct *vma;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ down_read(&gmap->mm->mmap_sem);
+ address = from;
+ while (address < to) {
+ /* Walk the gmap address space page table */
+ table = gmap->table + ((address >> 53) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 42) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 31) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INV)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 20) & 0x7ff);
+ if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ vma = find_vma(gmap->mm, mp->vmaddr);
+ size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
+ zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
+ size, NULL);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
+{
+ struct gmap_rmap *rmap, *next;
+ struct gmap_pgtable *mp;
+ struct page *page;
+ int flush;
+
+ flush = 0;
+ spin_lock(&mm->page_table_lock);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
+ *rmap->entry =
+ _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr;
+ list_del(&rmap->list);
+ kfree(rmap);
+ flush = 1;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (flush)
+ __tlb_flush_global();
+}
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
+ unsigned long vmaddr)
+{
+ struct page *page;
+ unsigned long *table;
+ struct gmap_pgtable *mp;
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
+ mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
+ if (!mp) {
+ __free_page(page);
+ return NULL;
+ }
pgtable_page_ctor(page);
+ mp->vmaddr = vmaddr & PMD_MASK;
+ INIT_LIST_HEAD(&mp->mapper);
+ page->index = (unsigned long) mp;
atomic_set(&page->_mapcount, 3);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
@@ -167,24 +577,58 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
static inline void page_table_free_pgste(unsigned long *table)
{
struct page *page;
+ struct gmap_pgtable *mp;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ BUG_ON(!list_empty(&mp->mapper));
pgtable_page_ctor(page);
atomic_set(&page->_mapcount, -1);
+ kfree(mp);
__free_page(page);
}
-#endif
-unsigned long *page_table_alloc(struct mm_struct *mm)
+#else /* CONFIG_PGSTE */
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
+ unsigned long vmaddr)
+{
+ return NULL;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+}
+
+static inline void gmap_unmap_notifier(struct mm_struct *mm,
+ unsigned long *table)
+{
+}
+
+#endif /* CONFIG_PGSTE */
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
{
struct page *page;
unsigned long *table;
unsigned int mask, bit;
-#ifdef CONFIG_PGSTE
if (mm_has_pgste(mm))
- return page_table_alloc_pgste(mm);
-#endif
+ return page_table_alloc_pgste(mm, vmaddr);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
mask = FRAG_MASK;
@@ -222,10 +666,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned int bit, mask;
-#ifdef CONFIG_PGSTE
- if (mm_has_pgste(mm))
+ if (mm_has_pgste(mm)) {
+ gmap_unmap_notifier(mm, table);
return page_table_free_pgste(table);
-#endif
+ }
/* Free 1K/2K page table fragment of a 4K page */
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
@@ -249,10 +693,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
{
struct page *page;
-#ifdef CONFIG_PGSTE
if (bit == FRAG_MASK)
return page_table_free_pgste(table);
-#endif
/* Free 1K/2K page table fragment of a 4K page */
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
@@ -269,13 +711,12 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
unsigned int bit, mask;
mm = tlb->mm;
-#ifdef CONFIG_PGSTE
if (mm_has_pgste(mm)) {
+ gmap_unmap_notifier(mm, table);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
}
-#endif
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
@@ -291,8 +732,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
void __tlb_remove_table(void *_table)
{
- void *table = (void *)((unsigned long) _table & PAGE_MASK);
- unsigned type = (unsigned long) _table & ~PAGE_MASK;
+ const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
+ void *table = (void *)((unsigned long) _table & ~mask);
+ unsigned type = (unsigned long) _table & mask;
if (type)
__page_table_free_rcu(table, type);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8c1970d1dd91..4799383e2df9 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -61,12 +61,12 @@ static inline pmd_t *vmem_pmd_alloc(void)
return pmd;
}
-static pte_t __ref *vmem_pte_alloc(void)
+static pte_t __ref *vmem_pte_alloc(unsigned long address)
{
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm);
+ pte = (pte_t *) page_table_alloc(&init_mm, address);
else
pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
@@ -120,7 +120,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
}
#endif
if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
+ pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
goto out;
pmd_populate(&init_mm, pm_dir, pt_dir);
@@ -205,7 +205,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
pm_dir = pmd_offset(pu_dir, address);
if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
+ pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
goto out;
pmd_populate(&init_mm, pm_dir, pt_dir);
@@ -335,6 +335,9 @@ void __init vmem_map_init(void)
ro_start = ((unsigned long)&_stext) & PAGE_MASK;
ro_end = PFN_ALIGN((unsigned long)&_eshared);
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+ if (memory_chunk[i].type == CHUNK_CRASHK ||
+ memory_chunk[i].type == CHUNK_OLDMEM)
+ continue;
start = memory_chunk[i].addr;
end = memory_chunk[i].addr + memory_chunk[i].size;
if (start >= ro_end || end <= ro_start)
@@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void)
for (i = 0; i < MEMORY_CHUNKS; i++) {
if (!memory_chunk[i].size)
continue;
+ if (memory_chunk[i].type == CHUNK_CRASHK ||
+ memory_chunk[i].type == CHUNK_OLDMEM)
+ continue;
seg = kzalloc(sizeof(*seg), GFP_KERNEL);
if (!seg)
panic("Out of memory...\n");