diff options
Diffstat (limited to 'arch/s390')
33 files changed, 464 insertions, 530 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ff2d2371b2e9..c03fef7a9c22 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -2,7 +2,7 @@ config MMU def_bool y config ZONE_DMA - def_bool y if 64BIT + def_bool y config LOCKDEP_SUPPORT def_bool y @@ -89,6 +89,7 @@ config S390 select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_RCU_TABLE_FREE if SMP select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -578,6 +579,7 @@ config S390_GUEST def_bool y prompt "s390 guest support for KVM (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL + select VIRTUALIZATION select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index e43fe7537031..f7d3dc555bdb 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -92,9 +92,7 @@ static void appldata_get_mem_data(void *data) mem_data->pswpin = ev[PSWPIN]; mem_data->pswpout = ev[PSWPOUT]; mem_data->pgalloc = ev[PGALLOC_NORMAL]; -#ifdef CONFIG_ZONE_DMA mem_data->pgalloc += ev[PGALLOC_DMA]; -#endif mem_data->pgfault = ev[PGFAULT]; mem_data->pgmajfault = ev[PGMAJFAULT]; diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 8a096b83f51f..0e3b35f96be1 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -14,10 +14,12 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H -extern void __udelay(unsigned long long usecs); -extern void udelay_simple(unsigned long long usecs); -extern void __delay(unsigned long loops); +void __ndelay(unsigned long long nsecs); +void __udelay(unsigned long long usecs); +void udelay_simple(unsigned long long usecs); +void __delay(unsigned long loops); +#define ndelay(n) __ndelay((unsigned long long) (n)) #define udelay(n) __udelay((unsigned long long) (n)) #define mdelay(n) __udelay((unsigned long long) (n) * 1000) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 1544b90bd6d6..ba7b01c726a3 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -2,6 +2,7 @@ #define _ASM_IRQ_H #include <linux/hardirq.h> +#include <linux/types.h> enum interruption_class { EXTERNAL_INTERRUPT, @@ -31,4 +32,11 @@ enum interruption_class { NR_IRQS, }; +typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); + +int register_external_interrupt(u16 code, ext_int_handler_t handler); +int unregister_external_interrupt(u16 code, ext_int_handler_t handler); +void service_subclass_irq_register(void); +void service_subclass_irq_unregister(void); + #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f6314af3b354..38e71ebcd3c2 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -17,15 +17,15 @@ #include <linux/gfp.h> #include <linux/mm.h> -#define check_pgt_cache() do {} while (0) - unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); -void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mm_struct *, unsigned long *); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +void page_table_free_rcu(struct mmu_gather *, unsigned long *); +void __tlb_remove_table(void *_table); +#endif static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c4773a2ef3d3..801fbe1d837d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -293,19 +293,6 @@ extern unsigned long VMALLOC_START; * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. */ -/* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL - #ifndef __s390x__ /* Bits in the segment table address-space-control-element */ @@ -325,6 +312,19 @@ extern unsigned long VMALLOC_START; #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf0000000UL +#define RCP_FP_BIT 0x08000000UL +#define RCP_PCL_BIT 0x00800000UL +#define RCP_HR_BIT 0x00400000UL +#define RCP_HC_BIT 0x00200000UL +#define RCP_GR_BIT 0x00040000UL +#define RCP_GC_BIT 0x00020000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x00008000UL +#define KVM_UC_BIT 0x00004000UL + #else /* __s390x__ */ /* Bits in the segment/region table address-space-control-element */ @@ -367,6 +367,19 @@ extern unsigned long VMALLOC_START; #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +/* Page status table bits for virtualization */ +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL + #endif /* __s390x__ */ /* @@ -577,16 +590,16 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn, bits; + unsigned long address, bits; unsigned char skey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - skey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ if (bits) { skey ^= bits; - page_set_storage_key(pfn, skey, 1); + page_set_storage_key(address, skey, 1); } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ @@ -628,16 +641,16 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn; + unsigned long address; unsigned long okey, nkey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - okey = nkey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; if (okey != nkey) - page_set_storage_key(pfn, nkey, 1); + page_set_storage_key(address, nkey, 1); #endif } diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 350e7ee5952d..15c97625df8d 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -139,110 +139,47 @@ struct slib { struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(2048))); -/** - * struct sbal_flags - storage block address list flags - * @last: last entry - * @cont: contiguous storage - * @frag: fragmentation - */ -struct sbal_flags { - u8 : 1; - u8 last : 1; - u8 cont : 1; - u8 : 1; - u8 frag : 2; - u8 : 2; -} __attribute__ ((packed)); - -#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL -#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL -#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL -#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL -#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL +#define SBAL_EFLAGS_LAST_ENTRY 0x40 +#define SBAL_EFLAGS_CONTIGUOUS 0x20 +#define SBAL_EFLAGS_FIRST_FRAG 0x04 +#define SBAL_EFLAGS_MIDDLE_FRAG 0x08 +#define SBAL_EFLAGS_LAST_FRAG 0x0c +#define SBAL_EFLAGS_MASK 0x6f -#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL +#define SBAL_SFLAGS0_PCI_REQ 0x40 +#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20 /* Awesome OpenFCP extensions */ -#define SBAL_FLAGS0_TYPE_STATUS 0x00UL -#define SBAL_FLAGS0_TYPE_WRITE 0x08UL -#define SBAL_FLAGS0_TYPE_READ 0x10UL -#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL -#define SBAL_FLAGS0_MORE_SBALS 0x04UL -#define SBAL_FLAGS0_COMMAND 0x02UL -#define SBAL_FLAGS0_LAST_SBAL 0x00UL -#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS -#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_PCI 0x40 - -/** - * struct sbal_sbalf_0 - sbal flags for sbale 0 - * @pci: PCI indicator - * @cont: data continuation - * @sbtype: storage-block type (FCP) - */ -struct sbal_sbalf_0 { - u8 : 1; - u8 pci : 1; - u8 cont : 1; - u8 sbtype : 2; - u8 : 3; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_1 - sbal flags for sbale 1 - * @key: storage key - */ -struct sbal_sbalf_1 { - u8 : 4; - u8 key : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_14 - sbal flags for sbale 14 - * @erridx: error index - */ -struct sbal_sbalf_14 { - u8 : 4; - u8 erridx : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_15 - sbal flags for sbale 15 - * @reason: reason for error state - */ -struct sbal_sbalf_15 { - u8 reason; -} __attribute__ ((packed)); - -/** - * union sbal_sbalf - storage block address list flags - * @i0: sbalf0 - * @i1: sbalf1 - * @i14: sbalf14 - * @i15: sblaf15 - * @value: raw value - */ -union sbal_sbalf { - struct sbal_sbalf_0 i0; - struct sbal_sbalf_1 i1; - struct sbal_sbalf_14 i14; - struct sbal_sbalf_15 i15; - u8 value; -}; +#define SBAL_SFLAGS0_TYPE_STATUS 0x00 +#define SBAL_SFLAGS0_TYPE_WRITE 0x08 +#define SBAL_SFLAGS0_TYPE_READ 0x10 +#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18 +#define SBAL_SFLAGS0_MORE_SBALS 0x04 +#define SBAL_SFLAGS0_COMMAND 0x02 +#define SBAL_SFLAGS0_LAST_SBAL 0x00 +#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND +#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS +#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND) /** * struct qdio_buffer_element - SBAL entry - * @flags: flags + * @eflags: SBAL entry flags + * @scount: SBAL count + * @sflags: whole SBAL flags * @length: length * @addr: address */ struct qdio_buffer_element { - u32 flags; + u8 eflags; + /* private: */ + u8 res1; + /* public: */ + u8 scount; + u8 sflags; u32 length; #ifdef CONFIG_32BIT /* private: */ - void *reserved; + void *res2; /* public: */ #endif void *addr; diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h deleted file mode 100644 index 080876d5f196..000000000000 --- a/arch/s390/include/asm/s390_ext.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright IBM Corp. 1999,2010 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#ifndef _S390_EXTINT_H -#define _S390_EXTINT_H - -#include <linux/types.h> - -typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); - -int register_external_interrupt(__u16 code, ext_int_handler_t handler); -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler); - -#endif /* _S390_EXTINT_H */ diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h deleted file mode 100644 index dc75c616eafe..000000000000 --- a/arch/s390/include/asm/suspend.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_S390_SUSPEND_H -#define __ASM_S390_SUSPEND_H - -static inline int arch_prepare_suspend(void) -{ - return 0; -} - -#endif - diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 77eee5477a52..c687a2c83462 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -26,67 +26,60 @@ #include <linux/swap.h> #include <asm/processor.h> #include <asm/pgalloc.h> -#include <asm/smp.h> #include <asm/tlbflush.h> struct mmu_gather { struct mm_struct *mm; +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + struct mmu_table_batch *batch; +#endif unsigned int fullmm; - unsigned int nr_ptes; - unsigned int nr_pxds; - unsigned int max; - void **array; - void *local[8]; + unsigned int need_flush; }; -static inline void __tlb_alloc_page(struct mmu_gather *tlb) -{ - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +struct mmu_table_batch { + struct rcu_head rcu; + unsigned int nr; + void *tables[0]; +}; - if (addr) { - tlb->array = (void *) addr; - tlb->max = PAGE_SIZE / sizeof(void *); - } -} +#define MAX_TABLE_BATCH \ + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) + +extern void tlb_table_flush(struct mmu_gather *tlb); +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#endif static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) { tlb->mm = mm; - tlb->max = ARRAY_SIZE(tlb->local); - tlb->array = tlb->local; tlb->fullmm = full_mm_flush; + tlb->need_flush = 0; +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif if (tlb->fullmm) __tlb_flush_mm(mm); - else - __tlb_alloc_page(tlb); - tlb->nr_ptes = 0; - tlb->nr_pxds = tlb->max; } static inline void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max)) - __tlb_flush_mm(tlb->mm); - while (tlb->nr_ptes > 0) - page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); - while (tlb->nr_pxds < tlb->max) - crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); + if (!tlb->need_flush) + return; + tlb->need_flush = 0; + __tlb_flush_mm(tlb->mm); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); - - rcu_table_freelist_finish(); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - if (tlb->array != tlb->local) - free_pages((unsigned long) tlb->array, 0); } /* @@ -112,12 +105,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - if (!tlb->fullmm) { - tlb->array[tlb->nr_ptes++] = pte; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - page_table_free(tlb->mm, (unsigned long *) pte); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return page_table_free_rcu(tlb, (unsigned long *) pte); +#endif + page_table_free(tlb->mm, (unsigned long *) pte); } /* @@ -133,12 +125,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 31)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pmd; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - crst_table_free(tlb->mm, (unsigned long *) pmd); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return tlb_remove_table(tlb, pmd); +#endif + crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -155,12 +146,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 42)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pud; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb); - } else - crst_table_free(tlb->mm, (unsigned long *) pud); +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + if (!tlb->fullmm) + return tlb_remove_table(tlb, pud); +#endif + crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c5338834ddbd..005d77d8ae2a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -7,7 +7,7 @@ extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; -static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; } @@ -21,7 +21,7 @@ static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) extern unsigned char cpu_book_id[NR_CPUS]; extern cpumask_t cpu_book_map[NR_CPUS]; -static inline const struct cpumask *cpu_book_mask(unsigned int cpu) +static inline const struct cpumask *cpu_book_mask(int cpu) { return &cpu_book_map[cpu]; } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2d9ea11f919a..2b23885e81e9 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -49,12 +49,13 @@ #define segment_eq(a,b) ((a).ar4 == (b).ar4) +#define __access_ok(addr, size) \ +({ \ + __chk_user_ptr(addr); \ + 1; \ +}) -static inline int __access_ok(const void __user *addr, unsigned long size) -{ - return 1; -} -#define access_ok(type,addr,size) __access_ok(addr,size) +#define access_ok(type, addr, size) __access_ok(addr, size) /* * The exception table consists of pairs of addresses: the first is the diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 9208e69245a0..404bdb9671b4 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -276,7 +276,8 @@ #define __NR_open_by_handle_at 336 #define __NR_clock_adjtime 337 #define __NR_syncfs 338 -#define NR_syscalls 339 +#define __NR_setns 339 +#define NR_syscalls 340 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 5ff15dacb571..df3732249baa 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -20,10 +20,10 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ - processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o +obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ + processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \ + debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ + sysinfo.o jump_label.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 1dc96ea08fa8..1f5eb789c3a7 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1904,3 +1904,9 @@ compat_sys_clock_adjtime_wrapper: sys_syncfs_wrapper: lgfr %r2,%r2 # int jg sys_syncfs + + .globl sys_setns_wrapper +sys_setns_wrapper: + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_setns diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 3d4a78fc1adc..1ca3d1d6a86c 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -30,9 +30,9 @@ #include <asm/atomic.h> #include <asm/mathemu.h> #include <asm/cpcmd.h> -#include <asm/s390_ext.h> #include <asm/lowcore.h> #include <asm/debug.h> +#include <asm/irq.h> #ifndef CONFIG_64BIT #define ONELONG "%08lx: " diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index e204f9597aaf..e3264f6a9720 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,19 +1,28 @@ /* - * Copyright IBM Corp. 2004,2010 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) + * Copyright IBM Corp. 2004,2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Thomas Spatzier <tspat@de.ibm.com>, * * This file contains interrupt related functions. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/seq_file.h> -#include <linux/cpu.h> #include <linux/proc_fs.h> #include <linux/profile.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include "entry.h" struct irq_class { char *name; @@ -82,8 +91,7 @@ int show_interrupts(struct seq_file *p, void *v) * For compatibilty only. S/390 specific setup of interrupts et al. is done * much later in init_channel_subsystem(). */ -void __init -init_IRQ(void) +void __init init_IRQ(void) { /* nothing... */ } @@ -134,3 +142,116 @@ void init_irq_proc(void) create_prof_cpu_mask(root_irq_dir); } #endif + +/* + * ext_int_hash[index] is the start of the list for all external interrupts + * that hash to this index. With the current set of external interrupts + * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 + * iucv and 0x2603 pfault) this is always the first element. + */ + +struct ext_int_info { + struct ext_int_info *next; + ext_int_handler_t handler; + u16 code; +}; + +static struct ext_int_info *ext_int_hash[256]; + +static inline int ext_hash(u16 code) +{ + return (code + (code >> 9)) & 0xff; +} + +int register_external_interrupt(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + int index; + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; + index = ext_hash(code); + p->next = ext_int_hash[index]; + ext_int_hash[index] = p; + return 0; +} +EXPORT_SYMBOL(register_external_interrupt); + +int unregister_external_interrupt(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p, *q; + int index; + + index = ext_hash(code); + q = NULL; + p = ext_int_hash[index]; + while (p) { + if (p->code == code && p->handler == handler) + break; + q = p; + p = p->next; + } + if (!p) + return -ENOENT; + if (q) + q->next = p->next; + else + ext_int_hash[index] = p->next; + kfree(p); + return 0; +} +EXPORT_SYMBOL(unregister_external_interrupt); + +void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, + unsigned int param32, unsigned long param64) +{ + struct pt_regs *old_regs; + unsigned short code; + struct ext_int_info *p; + int index; + + code = (unsigned short) ext_int_code; + old_regs = set_irq_regs(regs); + s390_idle_check(regs, S390_lowcore.int_clock, + S390_lowcore.async_enter_timer); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; + if (code != 0x1004) + __get_cpu_var(s390_idle).nohz_delay = 1; + index = ext_hash(code); + for (p = ext_int_hash[index]; p; p = p->next) { + if (likely(p->code == code)) + p->handler(ext_int_code, param32, param64); + } + irq_exit(); + set_irq_regs(old_regs); +} + +static DEFINE_SPINLOCK(sc_irq_lock); +static int sc_irq_refcount; + +void service_subclass_irq_register(void) +{ + spin_lock(&sc_irq_lock); + if (!sc_irq_refcount) + ctl_set_bit(0, 9); + sc_irq_refcount++; + spin_unlock(&sc_irq_lock); +} +EXPORT_SYMBOL(service_subclass_irq_register); + +void service_subclass_irq_unregister(void) +{ + spin_lock(&sc_irq_lock); + sc_irq_refcount--; + if (!sc_irq_refcount) + ctl_clear_bit(0, 9); + spin_unlock(&sc_irq_lock); +} +EXPORT_SYMBOL(service_subclass_irq_unregister); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c deleted file mode 100644 index 185029919c4d..000000000000 --- a/arch/s390/kernel/s390_ext.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright IBM Corp. 1999,2010 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ftrace.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <asm/s390_ext.h> -#include <asm/irq_regs.h> -#include <asm/cputime.h> -#include <asm/lowcore.h> -#include <asm/irq.h> -#include "entry.h" - -struct ext_int_info { - struct ext_int_info *next; - ext_int_handler_t handler; - __u16 code; -}; - -/* - * ext_int_hash[index] is the start of the list for all external interrupts - * that hash to this index. With the current set of external interrupts - * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 - * iucv and 0x2603 pfault) this is always the first element. - */ -static struct ext_int_info *ext_int_hash[256]; - -static inline int ext_hash(__u16 code) -{ - return (code + (code >> 9)) & 0xff; -} - -int register_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - struct ext_int_info *p; - int index; - - p = kmalloc(sizeof(*p), GFP_ATOMIC); - if (!p) - return -ENOMEM; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} -EXPORT_SYMBOL(register_external_interrupt); - -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - struct ext_int_info *p, *q; - int index; - - index = ext_hash(code); - q = NULL; - p = ext_int_hash[index]; - while (p) { - if (p->code == code && p->handler == handler) - break; - q = p; - p = p->next; - } - if (!p) - return -ENOENT; - if (q) - q->next = p->next; - else - ext_int_hash[index] = p->next; - kfree(p); - return 0; -} -EXPORT_SYMBOL(unregister_external_interrupt); - -void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, - unsigned int param32, unsigned long param64) -{ - struct pt_regs *old_regs; - unsigned short code; - struct ext_int_info *p; - int index; - - code = (unsigned short) ext_int_code; - old_regs = set_irq_regs(regs); - s390_idle_check(regs, S390_lowcore.int_clock, - S390_lowcore.async_enter_timer); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) - /* Serve timer interrupts first. */ - clock_comparator_work(); - kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - if (code != 0x1004) - __get_cpu_var(s390_idle).nohz_delay = 1; - index = ext_hash(code); - for (p = ext_int_hash[index]; p; p = p->next) { - if (likely(p->code == code)) - p->handler(ext_int_code, param32, param64); - } - irq_exit(); - set_irq_regs(old_regs); -} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f8e85ecbc459..1d55c95f617c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -44,7 +44,6 @@ #include <asm/sigp.h> #include <asm/pgalloc.h> #include <asm/irq.h> -#include <asm/s390_ext.h> #include <asm/cpcmd.h> #include <asm/tlbflush.h> #include <asm/timer.h> @@ -263,7 +262,7 @@ void smp_ctl_set_bit(int cr, int bit) memset(&parms.orvals, 0, sizeof(parms.orvals)); memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.orvals[cr] = 1 << bit; + parms.orvals[cr] = 1UL << bit; on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -277,7 +276,7 @@ void smp_ctl_clear_bit(int cr, int bit) memset(&parms.orvals, 0, sizeof(parms.orvals)); memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.andvals[cr] = ~(1L << bit); + parms.andvals[cr] = ~(1UL << bit); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9c65fd4ddce0..6ee39ef8fe4a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper) SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) +SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a59557f1fb5f..dff933065ab6 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -41,7 +41,6 @@ #include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/delay.h> -#include <asm/s390_ext.h> #include <asm/div64.h> #include <asm/vdso.h> #include <asm/irq.h> diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 2eafb8c7a746..0cd340b72632 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -17,7 +17,6 @@ #include <linux/smp.h> #include <linux/cpuset.h> #include <asm/delay.h> -#include <asm/s390_ext.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index b5a4a739b477..a65d2e82f61d 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -39,7 +39,6 @@ #include <asm/atomic.h> #include <asm/mathemu.h> #include <asm/cpcmd.h> -#include <asm/s390_ext.h> #include <asm/lowcore.h> #include <asm/debug.h> #include "entry.h" diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 5e8ead4b4aba..2d6228f60cd6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -22,10 +22,10 @@ #include <linux/cpu.h> #include <linux/kprobes.h> -#include <asm/s390_ext.h> #include <asm/timer.h> #include <asm/irq_regs.h> #include <asm/cputime.h> +#include <asm/irq.h> static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 30ca85cce314..67345ae7ce8d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -731,6 +731,7 @@ static int __init kvm_s390_init(void) } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[1] &= 0x201c000000000000ULL; return 0; } diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S index ab0e041ac54c..5faa1b1b23fa 100644 --- a/arch/s390/kvm/sie64a.S +++ b/arch/s390/kvm/sie64a.S @@ -93,4 +93,6 @@ sie_err: .section __ex_table,"a" .quad sie_inst,sie_err + .quad sie_exit,sie_err + .quad sie_reenter,sie_err .previous diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 0f53110e1d09..a65229d91c92 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/irqflags.h> #include <linux/interrupt.h> +#include <asm/div64.h> void __delay(unsigned long loops) { @@ -116,3 +117,17 @@ void udelay_simple(unsigned long long usecs) while (get_clock() < end) cpu_relax(); } + +void __ndelay(unsigned long long nsecs) +{ + u64 end; + + nsecs <<= 9; + do_div(nsecs, 125); + end = get_clock() + nsecs; + if (nsecs & ~0xfffUL) + __udelay(nsecs >> 12); + while (get_clock() < end) + barrier(); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a0f9e730f26a..fe103e891e7a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -34,7 +34,7 @@ #include <asm/asm-offsets.h> #include <asm/system.h> #include <asm/pgtable.h> -#include <asm/s390_ext.h> +#include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/compat.h> #include "../kernel/entry.h" @@ -245,9 +245,12 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, do_no_context(regs, int_code, trans_exc_code); break; default: /* fault & VM_FAULT_ERROR */ - if (fault & VM_FAULT_OOM) - pagefault_out_of_memory(); - else if (fault & VM_FAULT_SIGBUS) { + if (fault & VM_FAULT_OOM) { + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + do_no_context(regs, int_code, trans_exc_code); + else + pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) do_no_context(regs, int_code, trans_exc_code); @@ -277,7 +280,8 @@ static inline int do_exception(struct pt_regs *regs, int access, struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int fault, write; + unsigned int flags; + int fault; if (notify_page_fault(regs)) return 0; @@ -296,6 +300,10 @@ static inline int do_exception(struct pt_regs *regs, int access, address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + flags = FAULT_FLAG_ALLOW_RETRY; + if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + flags |= FAULT_FLAG_WRITE; +retry: down_read(&mm->mmap_sem); fault = VM_FAULT_BADMAP; @@ -325,21 +333,31 @@ static inline int do_exception(struct pt_regs *regs, int access, * make sure we exit gracefully rather than endlessly redo * the fault. */ - write = (access == VM_WRITE || - (trans_exc_code & store_indication) == 0x400) ? - FAULT_FLAG_WRITE : 0; - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } } /* * The instruction that caused the program check will @@ -429,10 +447,9 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) access = write ? VM_WRITE : VM_READ; fault = do_exception(®s, access, uaddr | 2); if (unlikely(fault)) { - if (fault & VM_FAULT_OOM) { - pagefault_out_of_memory(); - fault = 0; - } else if (fault & VM_FAULT_SIGBUS) + if (fault & VM_FAULT_OOM) + return -EFAULT; + else if (fault & VM_FAULT_SIGBUS) do_sigbus(®s, pgm_int_code, uaddr); } return fault ? -EFAULT : 0; @@ -485,7 +502,6 @@ int pfault_init(void) "2:\n" EX_TABLE(0b,1b) : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); - __ctl_set_bit(0, 9); return rc; } @@ -500,7 +516,6 @@ void pfault_fini(void) if (!MACHINE_IS_VM || pfault_disable) return; - __ctl_clear_bit(0,9); asm volatile( " diag %0,0,0x258\n" "0:\n" @@ -615,6 +630,7 @@ static int __init pfault_irq_init(void) rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; if (rc) goto out_pfault; + service_subclass_irq_register(); hotcpu_notifier(pfault_cpu_notify, 0); return 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index dfefc2171691..59b663109d90 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -119,9 +119,7 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); -#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); -#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); fault_init(); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 71a4b0d34be0..51e5cd9b906a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -19,7 +19,7 @@ * using the stura instruction. * Returns the number of bytes copied or -EFAULT. */ -static long probe_kernel_write_odd(void *dst, void *src, size_t size) +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long count, aligned; int offset, mask; @@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) return rc ? rc : count; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { long copied = 0; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 14c6fae6fe6b..37a23c223705 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,91 +24,12 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -struct rcu_table_freelist { - struct rcu_head rcu; - struct mm_struct *mm; - unsigned int pgt_index; - unsigned int crst_index; - unsigned long *table[0]; -}; - -#define RCU_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ - / sizeof(unsigned long)) - -static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); - -static void __page_table_free(struct mm_struct *mm, unsigned long *table); - -static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) -{ - struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); - struct rcu_table_freelist *batch = *batchp; - - if (batch) - return batch; - batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); - if (batch) { - batch->mm = mm; - batch->pgt_index = 0; - batch->crst_index = RCU_FREELIST_SIZE; - *batchp = batch; - } - return batch; -} - -static void rcu_table_freelist_callback(struct rcu_head *head) -{ - struct rcu_table_freelist *batch = - container_of(head, struct rcu_table_freelist, rcu); - - while (batch->pgt_index > 0) - __page_table_free(batch->mm, batch->table[--batch->pgt_index]); - while (batch->crst_index < RCU_FREELIST_SIZE) - crst_table_free(batch->mm, batch->table[batch->crst_index++]); - free_page((unsigned long) batch); -} - -void rcu_table_freelist_finish(void) -{ - struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); - - if (!batch) - return; - call_rcu(&batch->rcu, rcu_table_freelist_callback); - __get_cpu_var(rcu_table_freelist) = NULL; -} - -static void smp_sync(void *arg) -{ -} - #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 -#define TABLES_PER_PAGE 4 -#define FRAG_MASK 15UL -#define SECOND_HALVES 10UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 256, 0, PAGE_SIZE/4); - clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 768, 0, PAGE_SIZE/4); -} - +#define FRAG_MASK 0x0f #else #define ALLOC_ORDER 2 -#define TABLES_PER_PAGE 2 -#define FRAG_MASK 3UL -#define SECOND_HALVES 2UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); - memset(table + 256, 0, PAGE_SIZE/2); -} - +#define FRAG_MASK 0x03 #endif unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; @@ -137,26 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) free_pages((unsigned long) table, ALLOC_ORDER); } -void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) -{ - struct rcu_table_freelist *batch; - - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - crst_table_free(mm, table); - return; - } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - crst_table_free(mm, table); - return; - } - batch->table[--batch->crst_index] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); -} - #ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { @@ -232,121 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) } #endif +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + /* * page table entry allocation/free routines. */ +#ifdef CONFIG_PGSTE +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) +{ + struct page *page; + unsigned long *table; + + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + pgtable_page_ctor(page); + atomic_set(&page->_mapcount, 3); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + return table; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ + struct page *page; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + pgtable_page_ctor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} +#endif + unsigned long *page_table_alloc(struct mm_struct *mm) { struct page *page; unsigned long *table; - unsigned long bits; + unsigned int mask, bit; - bits = (mm->context.has_pgste) ? 3UL : 1UL; +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) + return page_table_alloc_pgste(mm); +#endif + /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); - page = NULL; + mask = FRAG_MASK; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - page = NULL; + table = (unsigned long *) page_to_phys(page); + mask = atomic_read(&page->_mapcount); + mask = mask | (mask >> 4); } - if (!page) { + if ((mask & FRAG_MASK) == FRAG_MASK) { spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; pgtable_page_ctor(page); - page->flags &= ~FRAG_MASK; + atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - if (mm->context.has_pgste) - clear_table_pgstes(table); - else - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); + } else { + for (bit = 1; mask & bit; bit <<= 1) + table += PTRS_PER_PTE; + mask = atomic_xor_bits(&page->_mapcount, bit); + if ((mask & FRAG_MASK) == FRAG_MASK) + list_del(&page->lru); } - table = (unsigned long *) page_to_phys(page); - while (page->flags & bits) { - table += 256; - bits <<= 1; - } - page->flags |= bits; - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - list_move_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); return table; } -static void __page_table_free(struct mm_struct *mm, unsigned long *table) +void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; - unsigned long bits; + unsigned int bit, mask; - bits = ((unsigned long) table) & 15; - table = (unsigned long *)(((unsigned long) table) ^ bits); +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) + return page_table_free_pgste(table); +#endif + /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - page->flags ^= bits; - if (!(page->flags & FRAG_MASK)) { + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit); + if (mask & FRAG_MASK) + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + if (mask == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free(struct mm_struct *mm, unsigned long *table) +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; - unsigned long bits; - bits = (mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); +#ifdef CONFIG_PGSTE + if (bit == FRAG_MASK) + return page_table_free_pgste(table); +#endif + /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock_bh(&mm->context.list_lock); - page->flags ^= bits; - if (page->flags & FRAG_MASK) { - /* Page now has some free pgtable fragments. */ - if (!list_empty(&page->lru)) - list_move(&page->lru, &mm->context.pgtable_list); - page = NULL; - } else - /* All fragments of the 4K page have been freed. */ - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (page) { + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) { - struct rcu_table_freelist *batch; + struct mm_struct *mm; struct page *page; - unsigned long bits; + unsigned int bit, mask; - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - page_table_free(mm, table); - return; - } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - page_table_free(mm, table); + mm = tlb->mm; +#ifdef CONFIG_PGSTE + if (mm_has_pgste(mm)) { + table = (unsigned long *) (__pa(table) | FRAG_MASK); + tlb_remove_table(tlb, table); return; } - bits = (mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); +#endif + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); - /* Delayed freeing with rcu prevents reuse of pgtable fragments */ - list_del_init(&page->lru); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); + if (mask & FRAG_MASK) + list_add_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *)(((unsigned long) table) | bits); - batch->table[batch->pgt_index++] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); + table = (unsigned long *) (__pa(table) | (bit << 4)); + tlb_remove_table(tlb, table); } +void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long) _table & PAGE_MASK); + unsigned type = (unsigned long) _table & ~PAGE_MASK; + + if (type) + __page_table_free_rcu(table, type); + else + free_pages((unsigned long) table, ALLOC_ORDER); +} + +#endif + /* * switch on pgstes for its userspace process (for kvm) */ @@ -360,7 +315,7 @@ int s390_enable_sie(void) return -EINVAL; /* Do we have pgstes? if yes, we are done */ - if (tsk->mm->context.has_pgste) + if (mm_has_pgste(tsk->mm)) return 0; /* lets check if we are allowed to replace the mm */ diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 053caa0fd276..4552ce40c81a 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -19,7 +19,7 @@ #include <linux/oprofile.h> #include <asm/lowcore.h> -#include <asm/s390_ext.h> +#include <asm/irq.h> #include "hwsampler.h" @@ -580,7 +580,7 @@ static int hws_cpu_callback(struct notifier_block *nfb, { /* We do not have sampler space available for all possible CPUs. All CPUs should be online when hw sampling is activated. */ - return NOTIFY_BAD; + return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; } static struct notifier_block hws_cpu_notifier = { diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 5995e9bc72d9..0e358c2cffeb 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -25,7 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #include "hwsampler.h" -#define DEFAULT_INTERVAL 4096 +#define DEFAULT_INTERVAL 4127518 #define DEFAULT_SDBT_BLOCKS 1 #define DEFAULT_SDB_BLOCKS 511 @@ -151,6 +151,12 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_max_interval == 0) return -ENODEV; + /* The initial value should be sane */ + if (oprofile_hw_interval < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + if (oprofile_hw_interval > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + if (oprofile_timer_init(ops)) return -ENODEV; |