diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/gmap.c | 37 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 84 | ||||
-rw-r--r-- | arch/s390/mm/page-states.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 153 |
7 files changed, 204 insertions, 89 deletions
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a07b1ec1391d..7f6db1e6c048 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) + from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page); * Called with sg->parent->shadow_lock. */ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, - unsigned long offset, pte_t *pte) + unsigned long gaddr, pte_t *pte) { struct gmap_rmap *rmap, *rnext, *head; - unsigned long gaddr, start, end, bits, raddr; - unsigned long *table; + unsigned long start, end, bits, raddr; BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->parent->guest_table_lock); - table = radix_tree_lookup(&sg->parent->host_to_guest, - vmaddr >> PMD_SHIFT); - gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; - spin_unlock(&sg->parent->guest_table_lock); - if (!table) - return; spin_lock(&sg->guest_table_lock); if (sg->removed) { @@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { - unsigned long offset, gaddr; + unsigned long offset, gaddr = 0; unsigned long *table; struct gmap *gmap, *sg, *next; @@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, offset = offset * (4096 / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { - if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { - spin_lock(&gmap->shadow_lock); - list_for_each_entry_safe(sg, next, - &gmap->children, list) - gmap_shadow_notify(sg, vmaddr, offset, pte); - spin_unlock(&gmap->shadow_lock); - } - if (!(bits & PGSTE_IN_BIT)) - continue; spin_lock(&gmap->guest_table_lock); table = radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); if (table) gaddr = __gmap_segment_gaddr(table) + offset; spin_unlock(&gmap->guest_table_lock); - if (table) + if (!table) + continue; + + if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, + &gmap->children, list) + gmap_shadow_notify(sg, vmaddr, gaddr, pte); + spin_unlock(&gmap->shadow_lock); + } + if (bits & PGSTE_IN_BIT) gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); } rcu_read_unlock(); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 18d4107e10ee..b7b779c40a5b 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if ((end <= start) || (end > TASK_SIZE)) + if ((end <= start) || (end > mm->context.asce_limit)) return 0; /* * local_irq_save() doesn't prevent pagetable teardown, but does diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 50618614881f..b017daed6887 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; + int rc; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = 0; @@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, else info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; - return vm_unmapped_area(&info); + addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { + rc = crst_table_upgrade(mm); + if (rc) + return (unsigned long) rc; + } + + return addr; } unsigned long @@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; + int rc; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) - return addr; + goto check_asce_limit; /* requesting a specific address */ if (addr) { @@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) - return addr; + goto check_asce_limit; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; @@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = TASK_SIZE; addr = vm_unmapped_area(&info); + if (addr & ~PAGE_MASK) + return addr; } - return addr; -} - -int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) -{ - if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE) - return 0; - if (!(flags & MAP_FIXED)) - addr = 0; - if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm); - return 0; -} - -static unsigned long -s390_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ +check_asce_limit: + if (addr + len > current->mm->context.asce_limit) { rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; - area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); } - return area; -} - -static unsigned long -s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct mm_struct *mm = current->mm; - unsigned long area; - int rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); - if (!(area & ~PAGE_MASK)) - return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { - /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm); - if (rc) - return (unsigned long) rc; - area = arch_get_unmapped_area_topdown(filp, addr, len, - pgoff, flags); - } - return area; + return addr; } + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = mmap_base_legacy(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area; + mm->get_unmapped_area = arch_get_unmapped_area; } else { mm->mmap_base = mmap_base(random_factor); - mm->get_unmapped_area = s390_get_unmapped_area_topdown; + mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 3330ea124eec..69a7b01ae746 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -13,8 +13,7 @@ #include <linux/gfp.h> #include <linux/init.h> -#define ESSA_SET_STABLE 1 -#define ESSA_SET_UNUSED 2 +#include <asm/page-states.h> static int cmma_flag = 1; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index fc5dc33bb141..fc321c5ec30e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, new = pte_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pte_val(new) |= _PAGE_NOEXEC; else if (flags & SET_MEMORY_X) pte_val(new) &= ~_PAGE_NOEXEC; @@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, new = pmd_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; @@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, new = pud_wrprotect(new); else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); - if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) + if (flags & SET_MEMORY_NX) pud_val(new) |= _REGION_ENTRY_NOEXEC; else if (flags & SET_MEMORY_X) pud_val(new) &= ~_REGION_ENTRY_NOEXEC; @@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end, int __set_memory(unsigned long addr, int numpages, unsigned long flags) { + if (!MACHINE_HAS_NX) + flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); + if (!flags) + return 0; addr &= PAGE_MASK; return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 995f78532cc2..f502cbe657af 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 53; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - mm->task_size = mm->context.asce_limit; spin_unlock_bh(&mm->page_table_lock); on_each_cpu(__crst_table_upgrade, mm, 0); @@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm) mm->context.asce_limit = 1UL << 31; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); if (current->active_mm == mm) @@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) struct page *page; unsigned long *table; - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (page) { table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 463e5ef02304..947b66a5cdba 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -23,6 +23,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/page-states.h> static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, return 0; } EXPORT_SYMBOL(get_guest_storage_key); + +/** + * pgste_perform_essa - perform ESSA actions on the PGSTE. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @orc: the specific action to perform, see the ESSA_SET_* macros. + * @oldpte: the PTE will be saved there if the pointer is not NULL. + * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. + * + * Return: 1 if the page is to be added to the CBRL, otherwise 0, + * or < 0 in case of error. -EINVAL is returned for invalid values + * of orc, -EFAULT for invalid addresses. + */ +int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, + unsigned long *oldpte, unsigned long *oldpgste) +{ + unsigned long pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + int res = 0; + + WARN_ON_ONCE(orc > ESSA_MAX); + if (unlikely(orc > ESSA_MAX)) + return -EINVAL; + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + if (oldpte) + *oldpte = pte_val(*ptep); + if (oldpgste) + *oldpgste = pgstev; + + switch (orc) { + case ESSA_GET_STATE: + break; + case ESSA_SET_STABLE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + break; + case ESSA_SET_UNUSED: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_UNUSED; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + if (pte_val(*ptep) & _PAGE_INVALID) + res = 1; + break; + case ESSA_SET_POT_VOLATILE: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; + break; + } + if (pgstev & _PGSTE_GPS_ZERO) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + break; + } + if (!(pgstev & PGSTE_GC_BIT)) { + pgstev |= _PGSTE_GPS_USAGE_VOLATILE; + res = 1; + break; + } + break; + case ESSA_SET_STABLE_RESIDENT: + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + /* + * Since the resident state can go away any time after this + * call, we will not make this page resident. We can revisit + * this decision if a guest will ever start using this. + */ + break; + case ESSA_SET_STABLE_IF_RESIDENT: + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + pgstev &= ~_PGSTE_GPS_USAGE_MASK; + pgstev |= _PGSTE_GPS_USAGE_STABLE; + } + break; + default: + /* we should never get here! */ + break; + } + /* If we are discarding a page, set it to logical zero */ + if (res) + pgstev |= _PGSTE_GPS_ZERO; + + pgste_val(pgste) = pgstev; + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + return res; +} +EXPORT_SYMBOL(pgste_perform_essa); + +/** + * set_pgste_bits - set specific PGSTE bits. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @bits: a bitmask representing the bits that will be touched + * @value: the values of the bits to be written. Only the bits in the mask + * will be written. + * + * Return: 0 on success, < 0 in case of error. + */ +int set_pgste_bits(struct mm_struct *mm, unsigned long hva, + unsigned long bits, unsigned long value) +{ + spinlock_t *ptl; + pgste_t new; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + new = pgste_get_lock(ptep); + + pgste_val(new) &= ~bits; + pgste_val(new) |= value & bits; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(set_pgste_bits); + +/** + * get_pgste - get the current PGSTE for the given address. + * @mm: the memory context. It must have PGSTEs, no check is performed here! + * @hva: the host virtual address of the page whose PGSTE is to be processed + * @pgstep: will be written with the current PGSTE for the given address. + * + * Return: 0 on success, < 0 in case of error. + */ +int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) +{ + spinlock_t *ptl; + pte_t *ptep; + + ptep = get_locked_pte(mm, hva, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + *pgstep = pgste_val(pgste_get(ptep)); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(get_pgste); #endif |