diff options
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/kvm_host.h | 8 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 19 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 16 | ||||
-rw-r--r-- | arch/s390/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/socket.h | 2 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 4 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 17 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 27 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 10 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 32 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 24 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 188 |
14 files changed, 209 insertions, 146 deletions
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3238d4004e84..e87ecaa2c569 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -274,6 +274,14 @@ struct kvm_arch{ int css_support; }; +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) + +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 6340178748bf..ff132ac64ddd 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -12,8 +12,6 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - /* Cloned contexts will be created with extended page tables. */ - unsigned int alloc_pgste:1; /* The mmu context has extended page tables. */ unsigned int has_pgste:1; } mm_context_t; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 084e7755ed9b..4fb67a0e4ddf 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm && current->mm->context.alloc_pgste) { - /* - * alloc_pgste indicates, that any NEW context will be created - * with extended page tables. The old context is unchanged. The - * page table allocation and the page table operations will - * look at has_pgste to distinguish normal and extended page - * tables. The only way to create extended page tables is to - * set alloc_pgste and then create a new context (e.g. dup_mm). - * The page table allocation is called after init_new_context - * and if has_pgste is set, it will create extended page - * tables. - */ - mm->context.has_pgste = 1; - mm->context.alloc_pgste = 1; - } else { - mm->context.has_pgste = 0; - mm->context.alloc_pgste = 0; - } + mm->context.has_pgste = 0; mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0ea4e591fa78..7a60bb93e83c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1361,13 +1361,25 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ +static inline void pmdp_flush_lazy(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) + __pmd_idte(address, pmdp); + else + mm->context.flush_mm = 1; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6b499870662f..83c85c217f5c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -43,6 +43,7 @@ extern void execve_tail(void); #ifndef CONFIG_64BIT #define TASK_SIZE (1UL << 31) +#define TASK_MAX_SIZE (1UL << 31) #define TASK_UNMAPPED_BASE (1UL << 30) #else /* CONFIG_64BIT */ @@ -51,6 +52,7 @@ extern void execve_tail(void); #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_MAX_SIZE (1UL << 53) #endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 2dacb306835c..92494494692e 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_BUSY_POLL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095104c9..497451ec5e26 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 3074475c8ae0..3a74d8af0d69 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) * - gpr 3 contains the virtqueue index (passed as datamatch) + * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], - 8, &vcpu->run->s.regs.gprs[3]); + ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); srcu_read_unlock(&vcpu->kvm->srcu, idx); - /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + + /* + * Return cookie in gpr 2, but don't overwrite the register if the + * diagnose will be handled by userspace. + */ + if (ret != -EOPNOTSUPP) + vcpu->run->s.regs.gprs[2] = ret; + /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ return ret < 0 ? ret : 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ba694d2ba51e..ac8e6670c551 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static unsigned long long *facilities; +unsigned long *vfacilities; static struct gmap_notifier gmap_notifier; +/* test availability of vfacility */ +static inline int test_vfacility(unsigned long nr) +{ + return __test_facility(nr, (void *) vfacilities); +} + /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) { @@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb = 6; vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; - vcpu->arch.sie_block->fac = (int) (long) facilities; + vcpu->arch.sie_block->fac = (int) (long) vfacilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); @@ -1056,6 +1063,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) return 0; } +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1122,20 +1133,20 @@ static int __init kvm_s390_init(void) * to hold the maximum amount of facilities. On the other hand, we * only set facilities that are known to work in KVM. */ - facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); - if (!facilities) { + vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!vfacilities) { kvm_exit(); return -ENOMEM; } - memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff82fff3f47c0000ULL; - facilities[1] &= 0x001c000000000000ULL; + memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); + vfacilities[0] &= 0xff82fff3f47c0000UL; + vfacilities[1] &= 0x001c000000000000UL; return 0; } static void __exit kvm_s390_exit(void) { - free_page((unsigned long) facilities); + free_page((unsigned long) vfacilities); kvm_exit(); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 028ca9fd2158..dc99f1ca4267 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -24,6 +24,9 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); +/* declare vfacilities extern */ +extern unsigned long *vfacilities; + /* negativ values are error codes, positive values for internal conditions */ #define SIE_INTERCEPT_RERUNVCPU (1<<0) #define SIE_INTERCEPT_UCONTROL (1<<1) @@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +/* Set the condition code in the guest program status word */ +static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) +{ + vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); + vcpu->arch.sie_block->gpsw.mask |= cc << 44; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0da3e6eb6be6..8f8d8ee9b1fb 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -163,8 +163,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) kfree(inti); no_interrupt: /* Set condition code and we're done. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, cc); return 0; } @@ -219,15 +218,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) * Set condition code 3 to stop the guest from issueing channel * I/O instructions. */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, 3); return 0; } } static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; @@ -235,15 +232,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), - &facility_list, sizeof(facility_list)); + vfacilities, 4); if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); + VCPU_EVENT(vcpu, 5, "store facility list value %x", + *(unsigned int *) vfacilities); + trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); return 0; } @@ -386,7 +381,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (fc > 3) { - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ + kvm_s390_set_psw_cc(vcpu, 3); return 0; } @@ -396,7 +391,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ + kvm_s390_set_psw_cc(vcpu, 0); return 0; } @@ -430,12 +425,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu) } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + kvm_s390_set_psw_cc(vcpu, 0); vcpu->run->s.regs.gprs[0] = 0; return 0; out_no_data: - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + kvm_s390_set_psw_cc(vcpu, 3); out_exception: free_page(mem); return rc; @@ -493,12 +487,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ - vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; if (reg2) { - vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; vcpu->run->s.regs.gprs[reg2] |= - vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; } return 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4008f2..ce36ea80e4f9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -135,30 +135,17 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); @@ -166,13 +153,14 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 06bafec00278..40023290ee5b 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } @@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = s390_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = s390_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 17bf4d3d303a..967d0bf1c059 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; - if (len == 0 || from + len > PGDIR_SIZE || + if (len == 0 || from + len > TASK_MAX_SIZE || from + len < from || to + len < to) return -EINVAL; @@ -731,6 +731,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) spin_unlock(&gmap_notifier_lock); } +static inline int page_table_with_pgste(struct page *page) +{ + return atomic_read(&page->_mapcount) == 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -750,7 +755,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, mp->vmaddr = vmaddr & PMD_MASK; INIT_LIST_HEAD(&mp->mapper); page->index = (unsigned long) mp; - atomic_set(&page->_mapcount, 3); + atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); @@ -821,6 +826,11 @@ EXPORT_SYMBOL(set_guest_storage_key); #else /* CONFIG_PGSTE */ +static inline int page_table_with_pgste(struct page *page) +{ + return 0; +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -897,12 +907,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) @@ -940,14 +950,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) unsigned int bit, mask; mm = tlb->mm; - if (mm_has_pgste(mm)) { + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; } bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) list_del(&page->lru); @@ -1033,36 +1043,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -void thp_split_vma(struct vm_area_struct *vma) +static inline void thp_split_vma(struct vm_area_struct *vma) { unsigned long addr; - struct page *page; - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - page = follow_page(vma, addr, FOLL_SPLIT); - } + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); } -void thp_split_mm(struct mm_struct *mm) +static inline void thp_split_mm(struct mm_struct *mm) { - struct vm_area_struct *vma = mm->mmap; + struct vm_area_struct *vma; - while (vma != NULL) { + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { thp_split_vma(vma); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; - vma = vma->vm_next; } + mm->def_flags |= VM_NOHUGEPAGE; +} +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, + struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + unsigned long next, *table, *new; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); +again: + if (pmd_none_or_clear_bad(pmd)) + continue; + table = (unsigned long *) pmd_deref(*pmd); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) + continue; + /* Allocate new page table with pgstes */ + new = page_table_alloc_pgste(mm, addr); + if (!new) { + mm->context.has_pgste = 0; + continue; + } + spin_lock(&mm->page_table_lock); + if (likely((unsigned long *) pmd_deref(*pmd) == table)) { + /* Nuke pmd entry pointing to the "short" page table */ + pmdp_flush_lazy(mm, addr, pmd); + pmd_clear(pmd); + /* Copy ptes from old table to new table */ + memcpy(new, table, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + /* Establish new table */ + pmd_populate(mm, pmd, (pte_t *) new); + /* Free old table with rcu, there might be a walker! */ + page_table_free_rcu(tlb, table); + new = NULL; + } + spin_unlock(&mm->page_table_lock); + if (new) { + page_table_free_pgste(new); + goto again; + } + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, + struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_realloc_pmd(tlb, mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + + return addr; +} + +static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_realloc_pud(tlb, mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + /* * switch on pgstes for its userspace process (for kvm) */ int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm, *old_mm; + struct mm_struct *mm = tsk->mm; + struct mmu_gather tlb; /* Do we have switched amode? If no, we cannot do sie */ if (s390_user_mode == HOME_SPACE_MODE) @@ -1072,57 +1166,16 @@ int s390_enable_sie(void) if (mm_has_pgste(tsk->mm)) return 0; - /* lets check if we are allowed to replace the mm */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - task_unlock(tsk); - return -EINVAL; - } - task_unlock(tsk); - - /* we copy the mm and let dup_mm create the page tables with_pgstes */ - tsk->mm->context.alloc_pgste = 1; - /* make sure that both mms have a correct rss state */ - sync_mm_rss(tsk->mm); - mm = dup_mm(tsk); - tsk->mm->context.alloc_pgste = 0; - if (!mm) - return -ENOMEM; - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE + down_write(&mm->mmap_sem); /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - mm->def_flags |= VM_NOHUGEPAGE; -#endif - - /* Now lets check again if something happened */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - mmput(mm); - task_unlock(tsk); - return -EINVAL; - } - - /* ok, we are alone. No ptrace, no threads, etc. */ - old_mm = tsk->mm; - tsk->mm = tsk->active_mm = mm; - preempt_disable(); - update_mm(mm, tsk); - atomic_inc(&mm->context.attach_count); - atomic_dec(&old_mm->context.attach_count); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - preempt_enable(); - task_unlock(tsk); - mmput(old_mm); - return 0; + /* Reallocate the page tables with pgstes */ + mm->context.has_pgste = 1; + tlb_gather_mmu(&tlb, mm, 0); + page_table_realloc(&tlb, mm, 0, TASK_SIZE); + tlb_finish_mmu(&tlb, 0, -1); + up_write(&mm->mmap_sem); + return mm->context.has_pgste ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(s390_enable_sie); @@ -1165,7 +1218,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1179,7 +1233,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; |