summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/gmap.c37
-rw-r--r--arch/s390/mm/gup.c2
-rw-r--r--arch/s390/mm/mmap.c84
-rw-r--r--arch/s390/mm/page-states.c3
-rw-r--r--arch/s390/mm/pageattr.c10
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/mm/pgtable.c153
7 files changed, 204 insertions, 89 deletions
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a07b1ec1391d..7f6db1e6c048 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
if (len == 0 || from + len < from || to + len < to ||
- from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+ from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
return -EINVAL;
flush = 0;
@@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long offset, pte_t *pte)
+ unsigned long gaddr, pte_t *pte)
{
struct gmap_rmap *rmap, *rnext, *head;
- unsigned long gaddr, start, end, bits, raddr;
- unsigned long *table;
+ unsigned long start, end, bits, raddr;
BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->parent->guest_table_lock);
- table = radix_tree_lookup(&sg->parent->host_to_guest,
- vmaddr >> PMD_SHIFT);
- gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
- spin_unlock(&sg->parent->guest_table_lock);
- if (!table)
- return;
spin_lock(&sg->guest_table_lock);
if (sg->removed) {
@@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
- unsigned long offset, gaddr;
+ unsigned long offset, gaddr = 0;
unsigned long *table;
struct gmap *gmap, *sg, *next;
@@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
offset = offset * (4096 / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
- if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
- spin_lock(&gmap->shadow_lock);
- list_for_each_entry_safe(sg, next,
- &gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, offset, pte);
- spin_unlock(&gmap->shadow_lock);
- }
- if (!(bits & PGSTE_IN_BIT))
- continue;
spin_lock(&gmap->guest_table_lock);
table = radix_tree_lookup(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT);
if (table)
gaddr = __gmap_segment_gaddr(table) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (table)
+ if (!table)
+ continue;
+
+ if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+ spin_lock(&gmap->shadow_lock);
+ list_for_each_entry_safe(sg, next,
+ &gmap->children, list)
+ gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ spin_unlock(&gmap->shadow_lock);
+ }
+ if (bits & PGSTE_IN_BIT)
gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
}
rcu_read_unlock();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..b7b779c40a5b 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if ((end <= start) || (end > TASK_SIZE))
+ if ((end <= start) || (end > mm->context.asce_limit))
return 0;
/*
* local_irq_save() doesn't prevent pagetable teardown, but does
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 50618614881f..b017daed6887 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
+ int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
- return addr;
+ goto check_asce_limit;
}
info.flags = 0;
@@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
else
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
- return vm_unmapped_area(&info);
+ addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
+ rc = crst_table_upgrade(mm);
+ if (rc)
+ return (unsigned long) rc;
+ }
+
+ return addr;
}
unsigned long
@@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
/* requesting a specific address */
if (addr) {
@@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
- return addr;
+ goto check_asce_limit;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
}
- return addr;
-}
-
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
-{
- if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
- return 0;
- if (!(flags & MAP_FIXED))
- addr = 0;
- if ((addr + len) >= TASK_SIZE)
- return crst_table_upgrade(current->mm);
- return 0;
-}
-
-static unsigned long
-s390_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
-
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
rc = crst_table_upgrade(mm);
if (rc)
return (unsigned long) rc;
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
}
- return area;
-}
-
-static unsigned long
-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
- area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm);
- if (rc)
- return (unsigned long) rc;
- area = arch_get_unmapped_area_topdown(filp, addr, len,
- pgoff, flags);
- }
- return area;
+ return addr;
}
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
if (mmap_is_legacy()) {
mm->mmap_base = mmap_base_legacy(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 3330ea124eec..69a7b01ae746 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,8 +13,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
-#define ESSA_SET_STABLE 1
-#define ESSA_SET_UNUSED 2
+#include <asm/page-states.h>
static int cmma_flag = 1;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..fc321c5ec30e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite(pte_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pte_val(new) |= _PAGE_NOEXEC;
else if (flags & SET_MEMORY_X)
pte_val(new) &= ~_PAGE_NOEXEC;
@@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite(pmd_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
@@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
new = pud_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pud_val(new) |= _REGION_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
@@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
int __set_memory(unsigned long addr, int numpages, unsigned long flags)
{
+ if (!MACHINE_HAS_NX)
+ flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+ if (!flags)
+ return 0;
addr &= PAGE_MASK;
return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..f502cbe657af 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm)
mm->context.asce_limit = 1UL << 53;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm->task_size = mm->context.asce_limit;
spin_unlock_bh(&mm->page_table_lock);
on_each_cpu(__crst_table_upgrade, mm, 0);
@@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm)
mm->context.asce_limit = 1UL << 31;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- mm->task_size = mm->context.asce_limit;
crst_table_free(mm, (unsigned long *) pgd);
if (current->active_mm == mm)
@@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
struct page *page;
unsigned long *table;
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ page = alloc_page(GFP_KERNEL);
if (page) {
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 463e5ef02304..947b66a5cdba 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
@@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
return 0;
}
EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ * or < 0 in case of error. -EINVAL is returned for invalid values
+ * of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste)
+{
+ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+ int res = 0;
+
+ WARN_ON_ONCE(orc > ESSA_MAX);
+ if (unlikely(orc > ESSA_MAX))
+ return -EINVAL;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ if (oldpte)
+ *oldpte = pte_val(*ptep);
+ if (oldpgste)
+ *oldpgste = pgstev;
+
+ switch (orc) {
+ case ESSA_GET_STATE:
+ break;
+ case ESSA_SET_STABLE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ break;
+ case ESSA_SET_UNUSED:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_POT_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+ break;
+ }
+ if (pgstev & _PGSTE_GPS_ZERO) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ break;
+ }
+ if (!(pgstev & PGSTE_GC_BIT)) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ res = 1;
+ break;
+ }
+ break;
+ case ESSA_SET_STABLE_RESIDENT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ /*
+ * Since the resident state can go away any time after this
+ * call, we will not make this page resident. We can revisit
+ * this decision if a guest will ever start using this.
+ */
+ break;
+ case ESSA_SET_STABLE_IF_RESIDENT:
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ }
+ break;
+ default:
+ /* we should never get here! */
+ break;
+ }
+ /* If we are discarding a page, set it to logical zero */
+ if (res)
+ pgstev |= _PGSTE_GPS_ZERO;
+
+ pgste_val(pgste) = pgstev;
+ pgste_set_unlock(ptep, pgste);
+ pte_unmap_unlock(ptep, ptl);
+ return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ * will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+ unsigned long bits, unsigned long value)
+{
+ spinlock_t *ptl;
+ pgste_t new;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ new = pgste_get_lock(ptep);
+
+ pgste_val(new) &= ~bits;
+ pgste_val(new) |= value & bits;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ *pgstep = pgste_val(pgste_get(ptep));
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(get_pgste);
#endif