summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig4
-rw-r--r--arch/s390/appldata/appldata_mem.c2
-rw-r--r--arch/s390/include/asm/delay.h8
-rw-r--r--arch/s390/include/asm/irq.h8
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h55
-rw-r--r--arch/s390/include/asm/qdio.h119
-rw-r--r--arch/s390/include/asm/s390_ext.h17
-rw-r--r--arch/s390/include/asm/suspend.h10
-rw-r--r--arch/s390/include/asm/tlb.h94
-rw-r--r--arch/s390/include/asm/topology.h4
-rw-r--r--arch/s390/include/asm/uaccess.h11
-rw-r--r--arch/s390/include/asm/unistd.h3
-rw-r--r--arch/s390/kernel/Makefile8
-rw-r--r--arch/s390/kernel/compat_wrapper.S6
-rw-r--r--arch/s390/kernel/dis.c2
-rw-r--r--arch/s390/kernel/irq.c137
-rw-r--r--arch/s390/kernel/s390_ext.c108
-rw-r--r--arch/s390/kernel/smp.c5
-rw-r--r--arch/s390/kernel/syscalls.S1
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/topology.c1
-rw-r--r--arch/s390/kernel/traps.c1
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/sie64a.S2
-rw-r--r--arch/s390/lib/delay.c15
-rw-r--r--arch/s390/mm/fault.c62
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/maccess.c4
-rw-r--r--arch/s390/mm/pgtable.c281
-rw-r--r--arch/s390/oprofile/hwsampler.c4
-rw-r--r--arch/s390/oprofile/init.c8
33 files changed, 464 insertions, 530 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ff2d2371b2e9..c03fef7a9c22 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -2,7 +2,7 @@ config MMU
def_bool y
config ZONE_DMA
- def_bool y if 64BIT
+ def_bool y
config LOCKDEP_SUPPORT
def_bool y
@@ -89,6 +89,7 @@ config S390
select HAVE_GET_USER_PAGES_FAST
select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+ select HAVE_RCU_TABLE_FREE if SMP
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_SPIN_TRYLOCK_BH
select ARCH_INLINE_SPIN_LOCK
@@ -578,6 +579,7 @@ config S390_GUEST
def_bool y
prompt "s390 guest support for KVM (EXPERIMENTAL)"
depends on 64BIT && EXPERIMENTAL
+ select VIRTUALIZATION
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index e43fe7537031..f7d3dc555bdb 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -92,9 +92,7 @@ static void appldata_get_mem_data(void *data)
mem_data->pswpin = ev[PSWPIN];
mem_data->pswpout = ev[PSWPOUT];
mem_data->pgalloc = ev[PGALLOC_NORMAL];
-#ifdef CONFIG_ZONE_DMA
mem_data->pgalloc += ev[PGALLOC_DMA];
-#endif
mem_data->pgfault = ev[PGFAULT];
mem_data->pgmajfault = ev[PGMAJFAULT];
diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
index 8a096b83f51f..0e3b35f96be1 100644
--- a/arch/s390/include/asm/delay.h
+++ b/arch/s390/include/asm/delay.h
@@ -14,10 +14,12 @@
#ifndef _S390_DELAY_H
#define _S390_DELAY_H
-extern void __udelay(unsigned long long usecs);
-extern void udelay_simple(unsigned long long usecs);
-extern void __delay(unsigned long loops);
+void __ndelay(unsigned long long nsecs);
+void __udelay(unsigned long long usecs);
+void udelay_simple(unsigned long long usecs);
+void __delay(unsigned long loops);
+#define ndelay(n) __ndelay((unsigned long long) (n))
#define udelay(n) __udelay((unsigned long long) (n))
#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 1544b90bd6d6..ba7b01c726a3 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -2,6 +2,7 @@
#define _ASM_IRQ_H
#include <linux/hardirq.h>
+#include <linux/types.h>
enum interruption_class {
EXTERNAL_INTERRUPT,
@@ -31,4 +32,11 @@ enum interruption_class {
NR_IRQS,
};
+typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long);
+
+int register_external_interrupt(u16 code, ext_int_handler_t handler);
+int unregister_external_interrupt(u16 code, ext_int_handler_t handler);
+void service_subclass_irq_register(void);
+void service_subclass_irq_unregister(void);
+
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f6314af3b354..38e71ebcd3c2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -17,15 +17,15 @@
#include <linux/gfp.h>
#include <linux/mm.h>
-#define check_pgt_cache() do {} while (0)
-
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
-void crst_table_free_rcu(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mm_struct *, unsigned long *);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void __tlb_remove_table(void *_table);
+#endif
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c4773a2ef3d3..801fbe1d837d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -293,19 +293,6 @@ extern unsigned long VMALLOC_START;
* swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
*/
-/* Page status table bits for virtualization */
-#define RCP_ACC_BITS 0xf000000000000000UL
-#define RCP_FP_BIT 0x0800000000000000UL
-#define RCP_PCL_BIT 0x0080000000000000UL
-#define RCP_HR_BIT 0x0040000000000000UL
-#define RCP_HC_BIT 0x0020000000000000UL
-#define RCP_GR_BIT 0x0004000000000000UL
-#define RCP_GC_BIT 0x0002000000000000UL
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT 0x0000800000000000UL
-#define KVM_UC_BIT 0x0000400000000000UL
-
#ifndef __s390x__
/* Bits in the segment table address-space-control-element */
@@ -325,6 +312,19 @@ extern unsigned long VMALLOC_START;
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS 0xf0000000UL
+#define RCP_FP_BIT 0x08000000UL
+#define RCP_PCL_BIT 0x00800000UL
+#define RCP_HR_BIT 0x00400000UL
+#define RCP_HC_BIT 0x00200000UL
+#define RCP_GR_BIT 0x00040000UL
+#define RCP_GC_BIT 0x00020000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT 0x00008000UL
+#define KVM_UC_BIT 0x00004000UL
+
#else /* __s390x__ */
/* Bits in the segment/region table address-space-control-element */
@@ -367,6 +367,19 @@ extern unsigned long VMALLOC_START;
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS 0xf000000000000000UL
+#define RCP_FP_BIT 0x0800000000000000UL
+#define RCP_PCL_BIT 0x0080000000000000UL
+#define RCP_HR_BIT 0x0040000000000000UL
+#define RCP_HC_BIT 0x0020000000000000UL
+#define RCP_GR_BIT 0x0004000000000000UL
+#define RCP_GC_BIT 0x0002000000000000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT 0x0000800000000000UL
+#define KVM_UC_BIT 0x0000400000000000UL
+
#endif /* __s390x__ */
/*
@@ -577,16 +590,16 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- unsigned long pfn, bits;
+ unsigned long address, bits;
unsigned char skey;
- pfn = pte_val(*ptep) >> PAGE_SHIFT;
- skey = page_get_storage_key(pfn);
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Clear page changed & referenced bit in the storage key */
if (bits) {
skey ^= bits;
- page_set_storage_key(pfn, skey, 1);
+ page_set_storage_key(address, skey, 1);
}
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
@@ -628,16 +641,16 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- unsigned long pfn;
+ unsigned long address;
unsigned long okey, nkey;
- pfn = pte_val(*ptep) >> PAGE_SHIFT;
- okey = nkey = page_get_storage_key(pfn);
+ address = pte_val(*ptep) & PAGE_MASK;
+ okey = nkey = page_get_storage_key(address);
nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set page access key and fetch protection bit from pgste */
nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
if (okey != nkey)
- page_set_storage_key(pfn, nkey, 1);
+ page_set_storage_key(address, nkey, 1);
#endif
}
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 350e7ee5952d..15c97625df8d 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -139,110 +139,47 @@ struct slib {
struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
} __attribute__ ((packed, aligned(2048)));
-/**
- * struct sbal_flags - storage block address list flags
- * @last: last entry
- * @cont: contiguous storage
- * @frag: fragmentation
- */
-struct sbal_flags {
- u8 : 1;
- u8 last : 1;
- u8 cont : 1;
- u8 : 1;
- u8 frag : 2;
- u8 : 2;
-} __attribute__ ((packed));
-
-#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL
-#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL
-#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL
-#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL
-#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL
+#define SBAL_EFLAGS_LAST_ENTRY 0x40
+#define SBAL_EFLAGS_CONTIGUOUS 0x20
+#define SBAL_EFLAGS_FIRST_FRAG 0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG 0x08
+#define SBAL_EFLAGS_LAST_FRAG 0x0c
+#define SBAL_EFLAGS_MASK 0x6f
-#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL
+#define SBAL_SFLAGS0_PCI_REQ 0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20
/* Awesome OpenFCP extensions */
-#define SBAL_FLAGS0_TYPE_STATUS 0x00UL
-#define SBAL_FLAGS0_TYPE_WRITE 0x08UL
-#define SBAL_FLAGS0_TYPE_READ 0x10UL
-#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL
-#define SBAL_FLAGS0_MORE_SBALS 0x04UL
-#define SBAL_FLAGS0_COMMAND 0x02UL
-#define SBAL_FLAGS0_LAST_SBAL 0x00UL
-#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS
-#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_PCI 0x40
-
-/**
- * struct sbal_sbalf_0 - sbal flags for sbale 0
- * @pci: PCI indicator
- * @cont: data continuation
- * @sbtype: storage-block type (FCP)
- */
-struct sbal_sbalf_0 {
- u8 : 1;
- u8 pci : 1;
- u8 cont : 1;
- u8 sbtype : 2;
- u8 : 3;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_1 - sbal flags for sbale 1
- * @key: storage key
- */
-struct sbal_sbalf_1 {
- u8 : 4;
- u8 key : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_14 - sbal flags for sbale 14
- * @erridx: error index
- */
-struct sbal_sbalf_14 {
- u8 : 4;
- u8 erridx : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_15 - sbal flags for sbale 15
- * @reason: reason for error state
- */
-struct sbal_sbalf_15 {
- u8 reason;
-} __attribute__ ((packed));
-
-/**
- * union sbal_sbalf - storage block address list flags
- * @i0: sbalf0
- * @i1: sbalf1
- * @i14: sbalf14
- * @i15: sblaf15
- * @value: raw value
- */
-union sbal_sbalf {
- struct sbal_sbalf_0 i0;
- struct sbal_sbalf_1 i1;
- struct sbal_sbalf_14 i14;
- struct sbal_sbalf_15 i15;
- u8 value;
-};
+#define SBAL_SFLAGS0_TYPE_STATUS 0x00
+#define SBAL_SFLAGS0_TYPE_WRITE 0x08
+#define SBAL_SFLAGS0_TYPE_READ 0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18
+#define SBAL_SFLAGS0_MORE_SBALS 0x04
+#define SBAL_SFLAGS0_COMMAND 0x02
+#define SBAL_SFLAGS0_LAST_SBAL 0x00
+#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
/**
* struct qdio_buffer_element - SBAL entry
- * @flags: flags
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
* @length: length
* @addr: address
*/
struct qdio_buffer_element {
- u32 flags;
+ u8 eflags;
+ /* private: */
+ u8 res1;
+ /* public: */
+ u8 scount;
+ u8 sflags;
u32 length;
#ifdef CONFIG_32BIT
/* private: */
- void *reserved;
+ void *res2;
/* public: */
#endif
void *addr;
diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h
deleted file mode 100644
index 080876d5f196..000000000000
--- a/arch/s390/include/asm/s390_ext.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright IBM Corp. 1999,2010
- * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
- * Martin Schwidefsky <schwidefsky@de.ibm.com>,
- */
-
-#ifndef _S390_EXTINT_H
-#define _S390_EXTINT_H
-
-#include <linux/types.h>
-
-typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long);
-
-int register_external_interrupt(__u16 code, ext_int_handler_t handler);
-int unregister_external_interrupt(__u16 code, ext_int_handler_t handler);
-
-#endif /* _S390_EXTINT_H */
diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h
deleted file mode 100644
index dc75c616eafe..000000000000
--- a/arch/s390/include/asm/suspend.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_S390_SUSPEND_H
-#define __ASM_S390_SUSPEND_H
-
-static inline int arch_prepare_suspend(void)
-{
- return 0;
-}
-
-#endif
-
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 77eee5477a52..c687a2c83462 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -26,67 +26,60 @@
#include <linux/swap.h>
#include <asm/processor.h>
#include <asm/pgalloc.h>
-#include <asm/smp.h>
#include <asm/tlbflush.h>
struct mmu_gather {
struct mm_struct *mm;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ struct mmu_table_batch *batch;
+#endif
unsigned int fullmm;
- unsigned int nr_ptes;
- unsigned int nr_pxds;
- unsigned int max;
- void **array;
- void *local[8];
+ unsigned int need_flush;
};
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
- if (addr) {
- tlb->array = (void *) addr;
- tlb->max = PAGE_SIZE / sizeof(void *);
- }
-}
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#endif
static inline void tlb_gather_mmu(struct mmu_gather *tlb,
struct mm_struct *mm,
unsigned int full_mm_flush)
{
tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->array = tlb->local;
tlb->fullmm = full_mm_flush;
+ tlb->need_flush = 0;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb->batch = NULL;
+#endif
if (tlb->fullmm)
__tlb_flush_mm(mm);
- else
- __tlb_alloc_page(tlb);
- tlb->nr_ptes = 0;
- tlb->nr_pxds = tlb->max;
}
static inline void tlb_flush_mmu(struct mmu_gather *tlb)
{
- if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
- __tlb_flush_mm(tlb->mm);
- while (tlb->nr_ptes > 0)
- page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]);
- while (tlb->nr_pxds < tlb->max)
- crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]);
+ if (!tlb->need_flush)
+ return;
+ tlb->need_flush = 0;
+ __tlb_flush_mm(tlb->mm);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
}
static inline void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
tlb_flush_mmu(tlb);
-
- rcu_table_freelist_finish();
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->array != tlb->local)
- free_pages((unsigned long) tlb->array, 0);
}
/*
@@ -112,12 +105,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long address)
{
- if (!tlb->fullmm) {
- tlb->array[tlb->nr_ptes++] = pte;
- if (tlb->nr_ptes >= tlb->nr_pxds)
- tlb_flush_mmu(tlb);
- } else
- page_table_free(tlb->mm, (unsigned long *) pte);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ if (!tlb->fullmm)
+ return page_table_free_rcu(tlb, (unsigned long *) pte);
+#endif
+ page_table_free(tlb->mm, (unsigned long *) pte);
}
/*
@@ -133,12 +125,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
#ifdef __s390x__
if (tlb->mm->context.asce_limit <= (1UL << 31))
return;
- if (!tlb->fullmm) {
- tlb->array[--tlb->nr_pxds] = pmd;
- if (tlb->nr_ptes >= tlb->nr_pxds)
- tlb_flush_mmu(tlb);
- } else
- crst_table_free(tlb->mm, (unsigned long *) pmd);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ if (!tlb->fullmm)
+ return tlb_remove_table(tlb, pmd);
+#endif
+ crst_table_free(tlb->mm, (unsigned long *) pmd);
#endif
}
@@ -155,12 +146,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#ifdef __s390x__
if (tlb->mm->context.asce_limit <= (1UL << 42))
return;
- if (!tlb->fullmm) {
- tlb->array[--tlb->nr_pxds] = pud;
- if (tlb->nr_ptes >= tlb->nr_pxds)
- tlb_flush_mmu(tlb);
- } else
- crst_table_free(tlb->mm, (unsigned long *) pud);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ if (!tlb->fullmm)
+ return tlb_remove_table(tlb, pud);
+#endif
+ crst_table_free(tlb->mm, (unsigned long *) pud);
#endif
}
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index c5338834ddbd..005d77d8ae2a 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -7,7 +7,7 @@
extern unsigned char cpu_core_id[NR_CPUS];
extern cpumask_t cpu_core_map[NR_CPUS];
-static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_core_map[cpu];
}
@@ -21,7 +21,7 @@ static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
extern unsigned char cpu_book_id[NR_CPUS];
extern cpumask_t cpu_book_map[NR_CPUS];
-static inline const struct cpumask *cpu_book_mask(unsigned int cpu)
+static inline const struct cpumask *cpu_book_mask(int cpu)
{
return &cpu_book_map[cpu];
}
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 2d9ea11f919a..2b23885e81e9 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -49,12 +49,13 @@
#define segment_eq(a,b) ((a).ar4 == (b).ar4)
+#define __access_ok(addr, size) \
+({ \
+ __chk_user_ptr(addr); \
+ 1; \
+})
-static inline int __access_ok(const void __user *addr, unsigned long size)
-{
- return 1;
-}
-#define access_ok(type,addr,size) __access_ok(addr,size)
+#define access_ok(type, addr, size) __access_ok(addr, size)
/*
* The exception table consists of pairs of addresses: the first is the
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 9208e69245a0..404bdb9671b4 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -276,7 +276,8 @@
#define __NR_open_by_handle_at 336
#define __NR_clock_adjtime 337
#define __NR_syncfs 338
-#define NR_syscalls 339
+#define __NR_setns 339
+#define NR_syscalls 340
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 5ff15dacb571..df3732249baa 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -20,10 +20,10 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
-obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \
- processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
- s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
- vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
+obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \
+ processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \
+ debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \
+ sysinfo.o jump_label.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 1dc96ea08fa8..1f5eb789c3a7 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1904,3 +1904,9 @@ compat_sys_clock_adjtime_wrapper:
sys_syncfs_wrapper:
lgfr %r2,%r2 # int
jg sys_syncfs
+
+ .globl sys_setns_wrapper
+sys_setns_wrapper:
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ jg sys_setns
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 3d4a78fc1adc..1ca3d1d6a86c 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -30,9 +30,9 @@
#include <asm/atomic.h>
#include <asm/mathemu.h>
#include <asm/cpcmd.h>
-#include <asm/s390_ext.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
+#include <asm/irq.h>
#ifndef CONFIG_64BIT
#define ONELONG "%08lx: "
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e204f9597aaf..e3264f6a9720 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -1,19 +1,28 @@
/*
- * Copyright IBM Corp. 2004,2010
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Thomas Spatzier (tspat@de.ibm.com)
+ * Copyright IBM Corp. 2004,2011
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Thomas Spatzier <tspat@de.ibm.com>,
*
* This file contains interrupt related functions.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
-#include <linux/cpu.h>
#include <linux/proc_fs.h>
#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include "entry.h"
struct irq_class {
char *name;
@@ -82,8 +91,7 @@ int show_interrupts(struct seq_file *p, void *v)
* For compatibilty only. S/390 specific setup of interrupts et al. is done
* much later in init_channel_subsystem().
*/
-void __init
-init_IRQ(void)
+void __init init_IRQ(void)
{
/* nothing... */
}
@@ -134,3 +142,116 @@ void init_irq_proc(void)
create_prof_cpu_mask(root_irq_dir);
}
#endif
+
+/*
+ * ext_int_hash[index] is the start of the list for all external interrupts
+ * that hash to this index. With the current set of external interrupts
+ * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000
+ * iucv and 0x2603 pfault) this is always the first element.
+ */
+
+struct ext_int_info {
+ struct ext_int_info *next;
+ ext_int_handler_t handler;
+ u16 code;
+};
+
+static struct ext_int_info *ext_int_hash[256];
+
+static inline int ext_hash(u16 code)
+{
+ return (code + (code >> 9)) & 0xff;
+}
+
+int register_external_interrupt(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ int index;
+
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+ if (!p)
+ return -ENOMEM;
+ p->code = code;
+ p->handler = handler;
+ index = ext_hash(code);
+ p->next = ext_int_hash[index];
+ ext_int_hash[index] = p;
+ return 0;
+}
+EXPORT_SYMBOL(register_external_interrupt);
+
+int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p, *q;
+ int index;
+
+ index = ext_hash(code);
+ q = NULL;
+ p = ext_int_hash[index];
+ while (p) {
+ if (p->code == code && p->handler == handler)
+ break;
+ q = p;
+ p = p->next;
+ }
+ if (!p)
+ return -ENOENT;
+ if (q)
+ q->next = p->next;
+ else
+ ext_int_hash[index] = p->next;
+ kfree(p);
+ return 0;
+}
+EXPORT_SYMBOL(unregister_external_interrupt);
+
+void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct pt_regs *old_regs;
+ unsigned short code;
+ struct ext_int_info *p;
+ int index;
+
+ code = (unsigned short) ext_int_code;
+ old_regs = set_irq_regs(regs);
+ s390_idle_check(regs, S390_lowcore.int_clock,
+ S390_lowcore.async_enter_timer);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
+ if (code != 0x1004)
+ __get_cpu_var(s390_idle).nohz_delay = 1;
+ index = ext_hash(code);
+ for (p = ext_int_hash[index]; p; p = p->next) {
+ if (likely(p->code == code))
+ p->handler(ext_int_code, param32, param64);
+ }
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+static DEFINE_SPINLOCK(sc_irq_lock);
+static int sc_irq_refcount;
+
+void service_subclass_irq_register(void)
+{
+ spin_lock(&sc_irq_lock);
+ if (!sc_irq_refcount)
+ ctl_set_bit(0, 9);
+ sc_irq_refcount++;
+ spin_unlock(&sc_irq_lock);
+}
+EXPORT_SYMBOL(service_subclass_irq_register);
+
+void service_subclass_irq_unregister(void)
+{
+ spin_lock(&sc_irq_lock);
+ sc_irq_refcount--;
+ if (!sc_irq_refcount)
+ ctl_clear_bit(0, 9);
+ spin_unlock(&sc_irq_lock);
+}
+EXPORT_SYMBOL(service_subclass_irq_unregister);
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
deleted file mode 100644
index 185029919c4d..000000000000
--- a/arch/s390/kernel/s390_ext.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright IBM Corp. 1999,2010
- * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
- * Martin Schwidefsky <schwidefsky@de.ibm.com>,
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ftrace.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <asm/s390_ext.h>
-#include <asm/irq_regs.h>
-#include <asm/cputime.h>
-#include <asm/lowcore.h>
-#include <asm/irq.h>
-#include "entry.h"
-
-struct ext_int_info {
- struct ext_int_info *next;
- ext_int_handler_t handler;
- __u16 code;
-};
-
-/*
- * ext_int_hash[index] is the start of the list for all external interrupts
- * that hash to this index. With the current set of external interrupts
- * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000
- * iucv and 0x2603 pfault) this is always the first element.
- */
-static struct ext_int_info *ext_int_hash[256];
-
-static inline int ext_hash(__u16 code)
-{
- return (code + (code >> 9)) & 0xff;
-}
-
-int register_external_interrupt(__u16 code, ext_int_handler_t handler)
-{
- struct ext_int_info *p;
- int index;
-
- p = kmalloc(sizeof(*p), GFP_ATOMIC);
- if (!p)
- return -ENOMEM;
- p->code = code;
- p->handler = handler;
- index = ext_hash(code);
- p->next = ext_int_hash[index];
- ext_int_hash[index] = p;
- return 0;
-}
-EXPORT_SYMBOL(register_external_interrupt);
-
-int unregister_external_interrupt(__u16 code, ext_int_handler_t handler)
-{
- struct ext_int_info *p, *q;
- int index;
-
- index = ext_hash(code);
- q = NULL;
- p = ext_int_hash[index];
- while (p) {
- if (p->code == code && p->handler == handler)
- break;
- q = p;
- p = p->next;
- }
- if (!p)
- return -ENOENT;
- if (q)
- q->next = p->next;
- else
- ext_int_hash[index] = p->next;
- kfree(p);
- return 0;
-}
-EXPORT_SYMBOL(unregister_external_interrupt);
-
-void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code,
- unsigned int param32, unsigned long param64)
-{
- struct pt_regs *old_regs;
- unsigned short code;
- struct ext_int_info *p;
- int index;
-
- code = (unsigned short) ext_int_code;
- old_regs = set_irq_regs(regs);
- s390_idle_check(regs, S390_lowcore.int_clock,
- S390_lowcore.async_enter_timer);
- irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
- /* Serve timer interrupts first. */
- clock_comparator_work();
- kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
- if (code != 0x1004)
- __get_cpu_var(s390_idle).nohz_delay = 1;
- index = ext_hash(code);
- for (p = ext_int_hash[index]; p; p = p->next) {
- if (likely(p->code == code))
- p->handler(ext_int_code, param32, param64);
- }
- irq_exit();
- set_irq_regs(old_regs);
-}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f8e85ecbc459..1d55c95f617c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -44,7 +44,6 @@
#include <asm/sigp.h>
#include <asm/pgalloc.h>
#include <asm/irq.h>
-#include <asm/s390_ext.h>
#include <asm/cpcmd.h>
#include <asm/tlbflush.h>
#include <asm/timer.h>
@@ -263,7 +262,7 @@ void smp_ctl_set_bit(int cr, int bit)
memset(&parms.orvals, 0, sizeof(parms.orvals));
memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.orvals[cr] = 1 << bit;
+ parms.orvals[cr] = 1UL << bit;
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_set_bit);
@@ -277,7 +276,7 @@ void smp_ctl_clear_bit(int cr, int bit)
memset(&parms.orvals, 0, sizeof(parms.orvals));
memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.andvals[cr] = ~(1L << bit);
+ parms.andvals[cr] = ~(1UL << bit);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_clear_bit);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4ddce0..6ee39ef8fe4a 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe
SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
+SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a59557f1fb5f..dff933065ab6 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -41,7 +41,6 @@
#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
-#include <asm/s390_ext.h>
#include <asm/div64.h>
#include <asm/vdso.h>
#include <asm/irq.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 2eafb8c7a746..0cd340b72632 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -17,7 +17,6 @@
#include <linux/smp.h>
#include <linux/cpuset.h>
#include <asm/delay.h>
-#include <asm/s390_ext.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index b5a4a739b477..a65d2e82f61d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -39,7 +39,6 @@
#include <asm/atomic.h>
#include <asm/mathemu.h>
#include <asm/cpcmd.h>
-#include <asm/s390_ext.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
#include "entry.h"
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 5e8ead4b4aba..2d6228f60cd6 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -22,10 +22,10 @@
#include <linux/cpu.h>
#include <linux/kprobes.h>
-#include <asm/s390_ext.h>
#include <asm/timer.h>
#include <asm/irq_regs.h>
#include <asm/cputime.h>
+#include <asm/irq.h>
static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 30ca85cce314..67345ae7ce8d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -731,6 +731,7 @@ static int __init kvm_s390_init(void)
}
memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
facilities[0] &= 0xff00fff3f47c0000ULL;
+ facilities[1] &= 0x201c000000000000ULL;
return 0;
}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index ab0e041ac54c..5faa1b1b23fa 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -93,4 +93,6 @@ sie_err:
.section __ex_table,"a"
.quad sie_inst,sie_err
+ .quad sie_exit,sie_err
+ .quad sie_reenter,sie_err
.previous
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 0f53110e1d09..a65229d91c92 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/irqflags.h>
#include <linux/interrupt.h>
+#include <asm/div64.h>
void __delay(unsigned long loops)
{
@@ -116,3 +117,17 @@ void udelay_simple(unsigned long long usecs)
while (get_clock() < end)
cpu_relax();
}
+
+void __ndelay(unsigned long long nsecs)
+{
+ u64 end;
+
+ nsecs <<= 9;
+ do_div(nsecs, 125);
+ end = get_clock() + nsecs;
+ if (nsecs & ~0xfffUL)
+ __udelay(nsecs >> 12);
+ while (get_clock() < end)
+ barrier();
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a0f9e730f26a..fe103e891e7a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -34,7 +34,7 @@
#include <asm/asm-offsets.h>
#include <asm/system.h>
#include <asm/pgtable.h>
-#include <asm/s390_ext.h>
+#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/compat.h>
#include "../kernel/entry.h"
@@ -245,9 +245,12 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code,
do_no_context(regs, int_code, trans_exc_code);
break;
default: /* fault & VM_FAULT_ERROR */
- if (fault & VM_FAULT_OOM)
- pagefault_out_of_memory();
- else if (fault & VM_FAULT_SIGBUS) {
+ if (fault & VM_FAULT_OOM) {
+ if (!(regs->psw.mask & PSW_MASK_PSTATE))
+ do_no_context(regs, int_code, trans_exc_code);
+ else
+ pagefault_out_of_memory();
+ } else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!(regs->psw.mask & PSW_MASK_PSTATE))
do_no_context(regs, int_code, trans_exc_code);
@@ -277,7 +280,8 @@ static inline int do_exception(struct pt_regs *regs, int access,
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
- int fault, write;
+ unsigned int flags;
+ int fault;
if (notify_page_fault(regs))
return 0;
@@ -296,6 +300,10 @@ static inline int do_exception(struct pt_regs *regs, int access,
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ flags = FAULT_FLAG_ALLOW_RETRY;
+ if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+ flags |= FAULT_FLAG_WRITE;
+retry:
down_read(&mm->mmap_sem);
fault = VM_FAULT_BADMAP;
@@ -325,21 +333,31 @@ static inline int do_exception(struct pt_regs *regs, int access,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- write = (access == VM_WRITE ||
- (trans_exc_code & store_indication) == 0x400) ?
- FAULT_FLAG_WRITE : 0;
- fault = handle_mm_fault(mm, vma, address, write);
+ fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
- regs, address);
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ goto retry;
+ }
}
/*
* The instruction that caused the program check will
@@ -429,10 +447,9 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
access = write ? VM_WRITE : VM_READ;
fault = do_exception(&regs, access, uaddr | 2);
if (unlikely(fault)) {
- if (fault & VM_FAULT_OOM) {
- pagefault_out_of_memory();
- fault = 0;
- } else if (fault & VM_FAULT_SIGBUS)
+ if (fault & VM_FAULT_OOM)
+ return -EFAULT;
+ else if (fault & VM_FAULT_SIGBUS)
do_sigbus(&regs, pgm_int_code, uaddr);
}
return fault ? -EFAULT : 0;
@@ -485,7 +502,6 @@ int pfault_init(void)
"2:\n"
EX_TABLE(0b,1b)
: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
- __ctl_set_bit(0, 9);
return rc;
}
@@ -500,7 +516,6 @@ void pfault_fini(void)
if (!MACHINE_IS_VM || pfault_disable)
return;
- __ctl_clear_bit(0,9);
asm volatile(
" diag %0,0,0x258\n"
"0:\n"
@@ -615,6 +630,7 @@ static int __init pfault_irq_init(void)
rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
if (rc)
goto out_pfault;
+ service_subclass_irq_register();
hotcpu_notifier(pfault_cpu_notify, 0);
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index dfefc2171691..59b663109d90 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -119,9 +119,7 @@ void __init paging_init(void)
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
-#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init_nodes(max_zone_pfns);
fault_init();
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 71a4b0d34be0..51e5cd9b906a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -19,7 +19,7 @@
* using the stura instruction.
* Returns the number of bytes copied or -EFAULT.
*/
-static long probe_kernel_write_odd(void *dst, void *src, size_t size)
+static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
{
unsigned long count, aligned;
int offset, mask;
@@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size)
return rc ? rc : count;
}
-long probe_kernel_write(void *dst, void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
{
long copied = 0;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 14c6fae6fe6b..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,91 +24,12 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-struct rcu_table_freelist {
- struct rcu_head rcu;
- struct mm_struct *mm;
- unsigned int pgt_index;
- unsigned int crst_index;
- unsigned long *table[0];
-};
-
-#define RCU_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
- / sizeof(unsigned long))
-
-static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
-
-static void __page_table_free(struct mm_struct *mm, unsigned long *table);
-
-static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
-{
- struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
- struct rcu_table_freelist *batch = *batchp;
-
- if (batch)
- return batch;
- batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
- if (batch) {
- batch->mm = mm;
- batch->pgt_index = 0;
- batch->crst_index = RCU_FREELIST_SIZE;
- *batchp = batch;
- }
- return batch;
-}
-
-static void rcu_table_freelist_callback(struct rcu_head *head)
-{
- struct rcu_table_freelist *batch =
- container_of(head, struct rcu_table_freelist, rcu);
-
- while (batch->pgt_index > 0)
- __page_table_free(batch->mm, batch->table[--batch->pgt_index]);
- while (batch->crst_index < RCU_FREELIST_SIZE)
- crst_table_free(batch->mm, batch->table[batch->crst_index++]);
- free_page((unsigned long) batch);
-}
-
-void rcu_table_freelist_finish(void)
-{
- struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist);
-
- if (!batch)
- return;
- call_rcu(&batch->rcu, rcu_table_freelist_callback);
- __get_cpu_var(rcu_table_freelist) = NULL;
-}
-
-static void smp_sync(void *arg)
-{
-}
-
#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
-#define TABLES_PER_PAGE 4
-#define FRAG_MASK 15UL
-#define SECOND_HALVES 10UL
-
-void clear_table_pgstes(unsigned long *table)
-{
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
- memset(table + 256, 0, PAGE_SIZE/4);
- clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
- memset(table + 768, 0, PAGE_SIZE/4);
-}
-
+#define FRAG_MASK 0x0f
#else
#define ALLOC_ORDER 2
-#define TABLES_PER_PAGE 2
-#define FRAG_MASK 3UL
-#define SECOND_HALVES 2UL
-
-void clear_table_pgstes(unsigned long *table)
-{
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
- memset(table + 256, 0, PAGE_SIZE/2);
-}
-
+#define FRAG_MASK 0x03
#endif
unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -137,26 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
free_pages((unsigned long) table, ALLOC_ORDER);
}
-void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
-{
- struct rcu_table_freelist *batch;
-
- if (atomic_read(&mm->mm_users) < 2 &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
- crst_table_free(mm, table);
- return;
- }
- batch = rcu_table_freelist_get(mm);
- if (!batch) {
- smp_call_function(smp_sync, NULL, 1);
- crst_table_free(mm, table);
- return;
- }
- batch->table[--batch->crst_index] = table;
- if (batch->pgt_index >= batch->crst_index)
- rcu_table_freelist_finish();
-}
-
#ifdef CONFIG_64BIT
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
{
@@ -232,121 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
}
#endif
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
/*
* page table entry allocation/free routines.
*/
+#ifdef CONFIG_PGSTE
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+{
+ struct page *page;
+ unsigned long *table;
+
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ pgtable_page_ctor(page);
+ atomic_set(&page->_mapcount, 3);
+ table = (unsigned long *) page_to_phys(page);
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ return table;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+ struct page *page;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ pgtable_page_ctor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+}
+#endif
+
unsigned long *page_table_alloc(struct mm_struct *mm)
{
struct page *page;
unsigned long *table;
- unsigned long bits;
+ unsigned int mask, bit;
- bits = (mm->context.has_pgste) ? 3UL : 1UL;
+#ifdef CONFIG_PGSTE
+ if (mm_has_pgste(mm))
+ return page_table_alloc_pgste(mm);
+#endif
+ /* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
- page = NULL;
+ mask = FRAG_MASK;
if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
- if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
- page = NULL;
+ table = (unsigned long *) page_to_phys(page);
+ mask = atomic_read(&page->_mapcount);
+ mask = mask | (mask >> 4);
}
- if (!page) {
+ if ((mask & FRAG_MASK) == FRAG_MASK) {
spin_unlock_bh(&mm->context.list_lock);
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
pgtable_page_ctor(page);
- page->flags &= ~FRAG_MASK;
+ atomic_set(&page->_mapcount, 1);
table = (unsigned long *) page_to_phys(page);
- if (mm->context.has_pgste)
- clear_table_pgstes(table);
- else
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
+ } else {
+ for (bit = 1; mask & bit; bit <<= 1)
+ table += PTRS_PER_PTE;
+ mask = atomic_xor_bits(&page->_mapcount, bit);
+ if ((mask & FRAG_MASK) == FRAG_MASK)
+ list_del(&page->lru);
}
- table = (unsigned long *) page_to_phys(page);
- while (page->flags & bits) {
- table += 256;
- bits <<= 1;
- }
- page->flags |= bits;
- if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
- list_move_tail(&page->lru, &mm->context.pgtable_list);
spin_unlock_bh(&mm->context.list_lock);
return table;
}
-static void __page_table_free(struct mm_struct *mm, unsigned long *table)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
{
struct page *page;
- unsigned long bits;
+ unsigned int bit, mask;
- bits = ((unsigned long) table) & 15;
- table = (unsigned long *)(((unsigned long) table) ^ bits);
+#ifdef CONFIG_PGSTE
+ if (mm_has_pgste(mm))
+ return page_table_free_pgste(table);
+#endif
+ /* Free 1K/2K page table fragment of a 4K page */
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- page->flags ^= bits;
- if (!(page->flags & FRAG_MASK)) {
+ bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
+ spin_lock_bh(&mm->context.list_lock);
+ if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+ list_del(&page->lru);
+ mask = atomic_xor_bits(&page->_mapcount, bit);
+ if (mask & FRAG_MASK)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask == 0) {
pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
__free_page(page);
}
}
-void page_table_free(struct mm_struct *mm, unsigned long *table)
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+static void __page_table_free_rcu(void *table, unsigned bit)
{
struct page *page;
- unsigned long bits;
- bits = (mm->context.has_pgste) ? 3UL : 1UL;
- bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#ifdef CONFIG_PGSTE
+ if (bit == FRAG_MASK)
+ return page_table_free_pgste(table);
+#endif
+ /* Free 1K/2K page table fragment of a 4K page */
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- spin_lock_bh(&mm->context.list_lock);
- page->flags ^= bits;
- if (page->flags & FRAG_MASK) {
- /* Page now has some free pgtable fragments. */
- if (!list_empty(&page->lru))
- list_move(&page->lru, &mm->context.pgtable_list);
- page = NULL;
- } else
- /* All fragments of the 4K page have been freed. */
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- if (page) {
+ if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
__free_page(page);
}
}
-void page_table_free_rcu(struct mm_struct *mm, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
{
- struct rcu_table_freelist *batch;
+ struct mm_struct *mm;
struct page *page;
- unsigned long bits;
+ unsigned int bit, mask;
- if (atomic_read(&mm->mm_users) < 2 &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
- page_table_free(mm, table);
- return;
- }
- batch = rcu_table_freelist_get(mm);
- if (!batch) {
- smp_call_function(smp_sync, NULL, 1);
- page_table_free(mm, table);
+ mm = tlb->mm;
+#ifdef CONFIG_PGSTE
+ if (mm_has_pgste(mm)) {
+ table = (unsigned long *) (__pa(table) | FRAG_MASK);
+ tlb_remove_table(tlb, table);
return;
}
- bits = (mm->context.has_pgste) ? 3UL : 1UL;
- bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#endif
+ bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
- /* Delayed freeing with rcu prevents reuse of pgtable fragments */
- list_del_init(&page->lru);
+ if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+ list_del(&page->lru);
+ mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
+ if (mask & FRAG_MASK)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *)(((unsigned long) table) | bits);
- batch->table[batch->pgt_index++] = table;
- if (batch->pgt_index >= batch->crst_index)
- rcu_table_freelist_finish();
+ table = (unsigned long *) (__pa(table) | (bit << 4));
+ tlb_remove_table(tlb, table);
}
+void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long) _table & PAGE_MASK);
+ unsigned type = (unsigned long) _table & ~PAGE_MASK;
+
+ if (type)
+ __page_table_free_rcu(table, type);
+ else
+ free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+#endif
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
@@ -360,7 +315,7 @@ int s390_enable_sie(void)
return -EINVAL;
/* Do we have pgstes? if yes, we are done */
- if (tsk->mm->context.has_pgste)
+ if (mm_has_pgste(tsk->mm))
return 0;
/* lets check if we are allowed to replace the mm */
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 053caa0fd276..4552ce40c81a 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -19,7 +19,7 @@
#include <linux/oprofile.h>
#include <asm/lowcore.h>
-#include <asm/s390_ext.h>
+#include <asm/irq.h>
#include "hwsampler.h"
@@ -580,7 +580,7 @@ static int hws_cpu_callback(struct notifier_block *nfb,
{
/* We do not have sampler space available for all possible CPUs.
All CPUs should be online when hw sampling is activated. */
- return NOTIFY_BAD;
+ return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
}
static struct notifier_block hws_cpu_notifier = {
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 5995e9bc72d9..0e358c2cffeb 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -25,7 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
#include "hwsampler.h"
-#define DEFAULT_INTERVAL 4096
+#define DEFAULT_INTERVAL 4127518
#define DEFAULT_SDBT_BLOCKS 1
#define DEFAULT_SDB_BLOCKS 511
@@ -151,6 +151,12 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
if (oprofile_max_interval == 0)
return -ENODEV;
+ /* The initial value should be sane */
+ if (oprofile_hw_interval < oprofile_min_interval)
+ oprofile_hw_interval = oprofile_min_interval;
+ if (oprofile_hw_interval > oprofile_max_interval)
+ oprofile_hw_interval = oprofile_max_interval;
+
if (oprofile_timer_init(ops))
return -ENODEV;