diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 19:45:46 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 19:45:46 +0300 |
commit | 9e85ae6af6e907975f68d82ff127073ec024cb05 (patch) | |
tree | 3d3349b03da858e53ef8f8dce467e4a691eabf88 /arch/s390/mm | |
parent | 6caffe21ddeaae4a9d18d46eed2445a8d269a1fe (diff) | |
parent | fa41ba0d08de7c975c3e94d0067553f9b934221f (diff) | |
download | linux-9e85ae6af6e907975f68d82ff127073ec024cb05.tar.xz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
"The first part of the s390 updates for 4.14:
- Add machine type 0x3906 for IBM z14
- Add IBM z14 TLB flushing improvements for KVM guests
- Exploit the TOD clock epoch extension to provide a continuous TOD
clock afer 2042/09/17
- Add NIAI spinlock hints for IBM z14
- Rework the vmcp driver and use CMA for the respone buffer of z/VM
CP commands
- Drop some s390 specific asm headers and use the generic version
- Add block discard for DASD-FBA devices under z/VM
- Add average request times to DASD statistics
- A few of those constify patches which seem to be in vogue right now
- Cleanup and bug fixes"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits)
s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs
s390/dasd: Add discard support for FBA devices
s390/zcrypt: make CPRBX const
s390/uaccess: avoid mvcos jump label
s390/mm: use generic mm_hooks
s390/facilities: fix typo
s390/vmcp: simplify vmcp_response_free()
s390/topology: Remove the unused parent_node() macro
s390/dasd: Change unsigned long long to unsigned long
s390/smp: convert cpuhp_setup_state() return code to zero on success
s390: fix 'novx' early parameter handling
s390/dasd: add average request times to dasd statistics
s390/scm: use common completion path
s390/pci: log changes to uid checking
s390/vmcp: simplify vmcp_ioctl()
s390/vmcp: return -ENOTTY for unknown ioctl commands
s390/vmcp: split vmcp header file and move to uapi
s390/vmcp: make use of contiguous memory allocator
s390/cpcmd,vmcp: avoid GFP_DMA allocations
s390/vmcp: fix uaccess check and avoid undefined behavior
...
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 163 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 60 | ||||
-rw-r--r-- | arch/s390/mm/page-states.c | 192 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 12 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 154 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 47 |
8 files changed, 476 insertions, 167 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 14f25798b001..bdabb013537b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: - table = table + ((address >> 53) & 0x7ff); + table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; if (bad_address(table)) goto bad; pr_cont("R1:%016lx ", *table); @@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION2: - table = table + ((address >> 42) & 0x7ff); + table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; if (bad_address(table)) goto bad; pr_cont("R2:%016lx ", *table); @@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION3: - table = table + ((address >> 31) & 0x7ff); + table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; if (bad_address(table)) goto bad; pr_cont("R3:%016lx ", *table); @@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_SEGMENT: - table = table + ((address >> 20) & 0x7ff); + table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; if (bad_address(table)) goto bad; pr_cont("S:%016lx ", *table); @@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) goto out; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); } - table = table + ((address >> 12) & 0xff); + table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; if (bad_address(table)) goto bad; pr_cont("P:%016lx ", *table); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 4fb3d3cdb370..9e1494e3d849 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit) unsigned long *table; unsigned long etype, atype; - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; + if (limit < _REGION3_SIZE) { + limit = _REGION3_SIZE - 1; atype = _ASCE_TYPE_SEGMENT; etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; + } else if (limit < _REGION2_SIZE) { + limit = _REGION2_SIZE - 1; atype = _ASCE_TYPE_REGION3; etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; + } else if (limit < _REGION1_SIZE) { + limit = _REGION1_SIZE - 1; atype = _ASCE_TYPE_REGION2; etype = _REGION2_ENTRY_EMPTY; } else { @@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit) spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); atomic_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) goto out_free; page->index = 0; @@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); @@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, } spin_unlock(&gmap->guest_table_lock); if (page) - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return 0; } @@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) /* Create higher level tables in the gmap page table */ table = gmap->table; if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) + gaddr & _REGION1_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) + gaddr & _REGION2_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) + gaddr & _REGION3_MASK)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } - table += (gaddr >> 20) & 0x7ff; + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; /* Walk the parent mm page table */ mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); @@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: - table += (gaddr >> 53) & 0x7ff; + table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) break; if (*table & _REGION_ENTRY_INVALID) @@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_REGION2: - table += (gaddr >> 42) & 0x7ff; + table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; if (level == 3) break; if (*table & _REGION_ENTRY_INVALID) @@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_REGION3: - table += (gaddr >> 31) & 0x7ff; + table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; if (level == 2) break; if (*table & _REGION_ENTRY_INVALID) @@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* Fallthrough */ case _ASCE_TYPE_SEGMENT: - table += (gaddr >> 20) & 0x7ff; + table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; if (level == 1) break; if (*table & _REGION_ENTRY_INVALID) return NULL; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); - table += (gaddr >> 12) & 0xff; + table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; } return table; } @@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ if (!table || *table & _PAGE_INVALID) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); + gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1); ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); } @@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, int i; BUG_ON(!gmap_is_shadow(sg)); - for (i = 0; i < 256; i++, raddr += 1UL << 12) + for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE) pgt[i] = _PAGE_INVALID; } @@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); - sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); + sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); *ste = _SEGMENT_ENTRY_EMPTY; @@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; - for (i = 0; i < 2048; i++, raddr += 1UL << 20) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); @@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); - r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); + r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); *r3e = _REGION3_ENTRY_EMPTY; @@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) /* Free segment table */ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; - for (i = 0; i < 2048; i++, raddr += 1UL << 31) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); @@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, /* Free segment table */ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); - r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); + r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); *r2e = _REGION2_ENTRY_EMPTY; @@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) /* Free region 3 table */ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; - for (i = 0; i < 2048; i++, raddr += 1UL << 42) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); @@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, /* Free region 3 table */ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) return; - gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); - r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); + gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); + r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); *r1e = _REGION1_ENTRY_EMPTY; @@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) /* Free region 2 table */ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } /** @@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, BUG_ON(!gmap_is_shadow(sg)); asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; - for (i = 0; i < 2048; i++, raddr += 1UL << 53) { + for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) continue; r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); @@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, /* Free region 2 table */ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); list_del(&page->lru); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, /* protect after insertion, so it will get properly invalidated */ down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, + ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, PROT_READ, PGSTE_VSIE_BIT); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); @@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, } spin_unlock(&sg->guest_table_lock); /* Make r2t read-only in parent gmap page table */ - raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; + raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; origin = r2t & _REGION_ENTRY_ORIGIN; - offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_r2t); @@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, } spin_unlock(&sg->guest_table_lock); /* Make r3t read-only in parent gmap page table */ - raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; + raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; origin = r3t & _REGION_ENTRY_ORIGIN; - offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_r3t); @@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL, 2); + page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, } spin_unlock(&sg->guest_table_lock); /* Make sgt read-only in parent gmap page table */ - raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; + raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; origin = sgt & _REGION_ENTRY_ORIGIN; - offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; - len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; + len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); spin_lock(&sg->guest_table_lock); if (!rc) { @@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, return rc; out_free: spin_unlock(&sg->guest_table_lock); - __free_pages(page, 2); + __free_pages(page, CRST_ALLOC_ORDER); return rc; } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); @@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, } spin_unlock(&sg->guest_table_lock); /* Make pgt read-only in parent gmap page table (not the pgste) */ - raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; + raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); spin_lock(&sg->guest_table_lock); @@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, } /* Check for top level table */ start = sg->orig_asce & _ASCE_ORIGIN; - end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; + end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && gaddr < end) { /* The complete shadow table has to go */ @@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, return; } /* Remove the page table tree from on specific entry */ - head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); + head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); gmap_for_each_rmap_safe(rmap, rnext, head) { bits = rmap->raddr & _SHADOW_RMAP_MASK; raddr = rmap->raddr ^ bits; @@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); + offset = offset * (PAGE_SIZE / sizeof(pte_t)); rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); @@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm) } /* + * Remove all empty zero pages from the mapping for lazy refaulting + * - This must be called after mm->context.has_pgste is set, to avoid + * future creation of zero pages + * - This must be called after THP was enabled + */ +static int __zap_zero_pages(pmd_t *pmd, unsigned long start, + unsigned long end, struct mm_walk *walk) +{ + unsigned long addr; + + for (addr = start; addr != end; addr += PAGE_SIZE) { + pte_t *ptep; + spinlock_t *ptl; + + ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (is_zero_pfn(pte_pfn(*ptep))) + ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_unmap_unlock(ptep, ptl); + } + return 0; +} + +static inline void zap_zero_pages(struct mm_struct *mm) +{ + struct mm_walk walk = { .pmd_entry = __zap_zero_pages }; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); +} + +/* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) @@ -2137,6 +2168,7 @@ int s390_enable_sie(void) mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); + zap_zero_pages(mm); up_write(&mm->mmap_sem); return 0; } @@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); static int __s390_enable_skey(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) - ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8111694ce55a..3b567838b905 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -26,6 +26,7 @@ #include <linux/poison.h> #include <linux/initrd.h> #include <linux/export.h> +#include <linux/cma.h> #include <linux/gfp.h> #include <linux/memblock.h> #include <asm/processor.h> @@ -84,7 +85,7 @@ void __init paging_init(void) psw_t psw; init_mm.pgd = swapper_pg_dir; - if (VMALLOC_END > (1UL << 42)) { + if (VMALLOC_END > _REGION2_SIZE) { asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; pgd_type = _REGION2_ENTRY_EMPTY; } else { @@ -93,8 +94,7 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; - clear_table((unsigned long *) init_mm.pgd, pgd_type, - sizeof(unsigned long)*2048); + crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); /* enable virtual mapping in kernel mode */ @@ -137,6 +137,8 @@ void __init mem_init(void) free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ + cmma_init_nodat(); + mem_init_print_info(NULL); } @@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void) } #ifdef CONFIG_MEMORY_HOTPLUG + +#ifdef CONFIG_CMA + +/* Prevent memory blocks which contain cma regions from going offline */ + +struct s390_cma_mem_data { + unsigned long start; + unsigned long end; +}; + +static int s390_cma_check_range(struct cma *cma, void *data) +{ + struct s390_cma_mem_data *mem_data; + unsigned long start, end; + + mem_data = data; + start = cma_get_base(cma); + end = start + cma_get_size(cma); + if (end < mem_data->start) + return 0; + if (start >= mem_data->end) + return 0; + return -EBUSY; +} + +static int s390_cma_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct s390_cma_mem_data mem_data; + struct memory_notify *arg; + int rc = 0; + + arg = data; + mem_data.start = arg->start_pfn << PAGE_SHIFT; + mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT); + if (action == MEM_GOING_OFFLINE) + rc = cma_for_each_area(s390_cma_check_range, &mem_data); + return notifier_from_errno(rc); +} + +static struct notifier_block s390_cma_mem_nb = { + .notifier_call = s390_cma_mem_notifier, +}; + +static int __init s390_cma_mem_init(void) +{ + return register_memory_notifier(&s390_cma_mem_nb); +} +device_initcall(s390_cma_mem_init); + +#endif /* CONFIG_CMA */ + int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 69a7b01ae746..07fa7b8ae233 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -10,9 +10,10 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/mm.h> +#include <linux/memblock.h> #include <linux/gfp.h> #include <linux/init.h> - +#include <asm/facility.h> #include <asm/page-states.h> static int cmma_flag = 1; @@ -36,14 +37,16 @@ __setup("cmma=", cmma); static inline int cmma_test_essa(void) { register unsigned long tmp asm("0") = 0; - register int rc asm("1") = -EOPNOTSUPP; + register int rc asm("1"); + /* test ESSA_GET_STATE */ asm volatile( - " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + " .insn rrf,0xb9ab0000,%1,%1,%2,0\n" "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "+&d" (rc), "+&d" (tmp)); + : "=&d" (rc), "+&d" (tmp) + : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP)); return rc; } @@ -51,11 +54,26 @@ void __init cmma_init(void) { if (!cmma_flag) return; - if (cmma_test_essa()) + if (cmma_test_essa()) { cmma_flag = 0; + return; + } + if (test_facility(147)) + cmma_flag = 2; } -static inline void set_page_unstable(struct page *page, int order) +static inline unsigned char get_page_state(struct page *page) +{ + unsigned char state; + + asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (state) + : "a" (page_to_phys(page)), + "i" (ESSA_GET_STATE)); + return state & 0x3f; +} + +static inline void set_page_unused(struct page *page, int order) { int i, rc; @@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order) "i" (ESSA_SET_UNUSED)); } -void arch_free_page(struct page *page, int order) +static inline void set_page_stable_dat(struct page *page, int order) { - if (!cmma_flag) - return; - set_page_unstable(page, order); + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_STABLE)); } -static inline void set_page_stable(struct page *page, int order) +static inline void set_page_stable_nodat(struct page *page, int order) { int i, rc; @@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order) asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" : "=&d" (rc) : "a" (page_to_phys(page + i)), - "i" (ESSA_SET_STABLE)); + "i" (ESSA_SET_STABLE_NODAT)); +} + +static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd) || pmd_large(*pmd)) + continue; + page = virt_to_page(pmd_val(*pmd)); + set_bit(PG_arch_1, &page->flags); + } while (pmd++, addr = next, addr != end); +} + +static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + pud_t *pud; + int i; + + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none(*pud) || pud_large(*pud)) + continue; + if (!pud_folded(*pud)) { + page = virt_to_page(pud_val(*pud)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long next; + struct page *page; + p4d_t *p4d; + int i; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) + continue; + if (!p4d_folded(*p4d)) { + page = virt_to_page(p4d_val(*p4d)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_pud(p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + +static void mark_kernel_pgd(void) +{ + unsigned long addr, next; + struct page *page; + pgd_t *pgd; + int i; + + addr = 0; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, MODULES_END); + if (pgd_none(*pgd)) + continue; + if (!pgd_folded(*pgd)) { + page = virt_to_page(pgd_val(*pgd)); + for (i = 0; i < 3; i++) + set_bit(PG_arch_1, &page[i].flags); + } + mark_kernel_p4d(pgd, addr, next); + } while (pgd++, addr = next, addr != MODULES_END); +} + +void __init cmma_init_nodat(void) +{ + struct memblock_region *reg; + struct page *page; + unsigned long start, end, ix; + + if (cmma_flag < 2) + return; + /* Mark pages used in kernel page tables */ + mark_kernel_pgd(); + + /* Set all kernel pages not used for page tables to stable/no-dat */ + for_each_memblock(memory, reg) { + start = memblock_region_memory_base_pfn(reg); + end = memblock_region_memory_end_pfn(reg); + page = pfn_to_page(start); + for (ix = start; ix < end; ix++, page++) { + if (__test_and_clear_bit(PG_arch_1, &page->flags)) + continue; /* skip page table pages */ + if (!list_empty(&page->lru)) + continue; /* skip free pages */ + set_page_stable_nodat(page, 0); + } + } +} + +void arch_free_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_unused(page, order); } void arch_alloc_page(struct page *page, int order) { if (!cmma_flag) return; - set_page_stable(page, order); + if (cmma_flag < 2) + set_page_stable_dat(page, order); + else + set_page_stable_nodat(page, order); +} + +void arch_set_page_dat(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_stable_dat(page, order); +} + +void arch_set_page_nodat(struct page *page, int order) +{ + if (cmma_flag < 2) + return; + set_page_stable_nodat(page, order); +} + +int arch_test_page_nodat(struct page *page) +{ + unsigned char state; + + if (cmma_flag < 2) + return 0; + state = get_page_state(page); + return !!(state & 0x20); } void arch_set_page_states(int make_stable) @@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable(page, order); + set_page_stable_dat(page, 0); else - set_page_unstable(page, order); + set_page_unused(page, order); } } spin_unlock_irqrestore(&zone->lock, flags); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 180481589246..552f898dfa74 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -7,6 +7,7 @@ #include <asm/cacheflush.h> #include <asm/facility.h> #include <asm/pgtable.h> +#include <asm/pgalloc.h> #include <asm/page.h> #include <asm/set_memory.h> @@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) pud_t new; int i, ro, nx; - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) return -ENOMEM; pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; @@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) return; } for (i = 0; i < nr; i++) { - __ptep_ipte(address, pte, IPTE_GLOBAL); + __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); address += PAGE_SIZE; pte++; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 18918e394ce4..c5b74dd61197 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) if (!page) return NULL; + arch_set_page_dat(page, 2); return (unsigned long *) page_to_phys(page); } @@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) int rc, notify; /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ - BUG_ON(mm->context.asce_limit < (1UL << 42)); + BUG_ON(mm->context.asce_limit < _REGION2_SIZE); if (end >= TASK_SIZE_MAX) return -ENOMEM; rc = 0; @@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) } spin_lock_bh(&mm->page_table_lock); pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit == (1UL << 42)) { + if (mm->context.asce_limit == _REGION2_SIZE) { crst_table_init(table, _REGION2_ENTRY_EMPTY); p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); mm->pgd = (pgd_t *) table; - mm->context.asce_limit = 1UL << 53; + mm->context.asce_limit = _REGION1_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; } else { @@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd_t *pgd; /* downgrade should only happen from 3 to 2 levels (compat only) */ - BUG_ON(mm->context.asce_limit != (1UL << 42)); + BUG_ON(mm->context.asce_limit != _REGION2_SIZE); if (current->active_mm == mm) { clear_user_asce(); @@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd = mm->pgd; mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->context.asce_limit = 1UL << 31; + mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; crst_table_free(mm, (unsigned long *) pgd); @@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) __free_page(page); return NULL; } + arch_set_page_dat(page, 0); /* Initialize page table */ table = (unsigned long *) page_to_phys(page); if (mm_alloc_pgste(mm)) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4a1f7366b17a..4198a71b8fdd 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -25,8 +25,49 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> +static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); + } +} + +static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int nodat) +{ + unsigned long opt, asce; + + if (MACHINE_HAS_TLB_GUEST) { + opt = 0; + asce = READ_ONCE(mm->context.gmap_asce); + if (asce == 0UL || nodat) + opt |= IPTE_NODAT; + if (asce != -1UL) { + asce = asce ? : mm->context.asce; + opt |= IPTE_GUEST_ASCE; + } + __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); + } else { + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + } +} + static inline pte_t ptep_flush_direct(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte(addr, ptep, IPTE_LOCAL); + ptep_ipte_local(mm, addr, ptep, nodat); else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + int nodat) { pte_t old; @@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + ptep_ipte_global(mm, addr, ptep, nodat); atomic_dec(&mm->context.flush_count); return old; } @@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_direct(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_direct(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; @@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, { pgste_t pgste; pte_t old; + int nodat; preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); - old = ptep_flush_lazy(mm, addr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + old = ptep_flush_lazy(mm, addr, ptep, nodat); if (mm_has_pgste(mm)) { pgste = pgste_update_all(old, pgste, mm); pgste_set(ptep, pgste); @@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_modify_prot_commit); +static inline void pmdp_idte_local(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); +} + +static inline void pmdp_idte_global(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); +} + static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { @@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte(addr, pmdp, IDTE_LOCAL); + pmdp_idte_local(mm, addr, pmdp); else - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); + pmdp_idte_global(mm, addr, pmdp); atomic_dec(&mm->context.flush_count); return old; } @@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(addr, pmdp, IDTE_GLOBAL); - else - __pmdp_csp(pmdp); + } else { + pmdp_idte_global(mm, addr, pmdp); + } atomic_dec(&mm->context.flush_count); return old; } @@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); -static inline pud_t pudp_flush_direct(struct mm_struct *mm, - unsigned long addr, pud_t *pudp) +static inline void pudp_idte_local(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) { - pud_t old; + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_LOCAL); + else + __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); +} - old = *pudp; - if (pud_val(old) & _REGION_ENTRY_INVALID) - return old; - if (!MACHINE_HAS_IDTE) { +static inline void pudp_idte_global(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + if (MACHINE_HAS_TLB_GUEST) + __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, + mm->context.asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); + else /* * Invalid bit position is the same for pmd and pud, so we can * re-use _pmd_csp() here */ __pmdp_csp((pmd_t *) pudp); +} + +static inline pud_t pudp_flush_direct(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t old; + + old = *pudp; + if (pud_val(old) & _REGION_ENTRY_INVALID) return old; - } atomic_inc(&mm->context.flush_count); if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pudp_idte(addr, pudp, IDTE_LOCAL); + pudp_idte_local(mm, addr, pudp); else - __pudp_idte(addr, pudp, IDTE_GLOBAL); + pudp_idte_global(mm, addr, pudp); atomic_dec(&mm->context.flush_count); return old; } @@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, { pte_t entry; pgste_t pgste; - int pte_i, pte_p; + int pte_i, pte_p, nodat; pgste = pgste_get_lock(ptep); entry = *ptep; @@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, return -EAGAIN; } /* Change access rights and set pgste bit */ + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); if (prot == PROT_NONE && !pte_i) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); pte_val(entry) |= _PAGE_INVALID; } if (prot == PROT_READ && !pte_p) { - ptep_flush_direct(mm, addr, ptep); + ptep_flush_direct(mm, addr, ptep, nodat); pte_val(entry) &= ~_PAGE_INVALID; pte_val(entry) |= _PAGE_PROTECT; } @@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) { pgste_t pgste; + int nodat; pgste = pgste_get_lock(ptep); /* notifier is called by the caller */ - ptep_flush_direct(mm, saddr, ptep); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_flush_direct(mm, saddr, ptep, nodat); /* don't touch the storage key - it belongs to parent pgste */ pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); pgste_set_unlock(ptep, pgste); @@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte_t *ptep; pte_t pte; bool dirty; + int nodat; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); @@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep, IPTE_GLOBAL); + nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); + ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; else diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d8398962a723..c0af0d7b6e5f 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_alloc(size, size); } -static inline p4d_t *vmem_p4d_alloc(void) +void *vmem_crst_alloc(unsigned long val) { - p4d_t *p4d = NULL; + unsigned long *table; - p4d = vmem_alloc_pages(2); - if (!p4d) - return NULL; - clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); - return p4d; -} - -static inline pud_t *vmem_pud_alloc(void) -{ - pud_t *pud = NULL; - - pud = vmem_alloc_pages(2); - if (!pud) - return NULL; - clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); - return pud; -} - -pmd_t *vmem_pmd_alloc(void) -{ - pmd_t *pmd = NULL; - - pmd = vmem_alloc_pages(2); - if (!pmd) - return NULL; - clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); - return pmd; + table = vmem_alloc_pages(CRST_ALLOC_ORDER); + if (table) + crst_table_init(table, val); + return table; } pte_t __ref *vmem_pte_alloc(void) @@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size) while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { - p4_dir = vmem_p4d_alloc(); + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); if (!p4_dir) goto out; pgd_populate(&init_mm, pg_dir, p4_dir); } p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { - pu_dir = vmem_pud_alloc(); + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); if (!pu_dir) goto out; p4d_populate(&init_mm, p4_dir, pu_dir); @@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) continue; } if (pud_none(*pu_dir)) { - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) goto out; pud_populate(&init_mm, pu_dir, pm_dir); @@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { - p4_dir = vmem_p4d_alloc(); + p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); if (!p4_dir) goto out; pgd_populate(&init_mm, pg_dir, p4_dir); @@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { - pu_dir = vmem_pud_alloc(); + pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); if (!pu_dir) goto out; p4d_populate(&init_mm, p4_dir, pu_dir); @@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { - pm_dir = vmem_pmd_alloc(); + pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); if (!pm_dir) goto out; pud_populate(&init_mm, pu_dir, pm_dir); |