summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig15
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/include/asm/atomic.h8
-rw-r--r--arch/s390/include/asm/ccwdev.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h3
-rw-r--r--arch/s390/include/asm/pgtable.h4
-rw-r--r--arch/s390/include/asm/setup.h17
-rw-r--r--arch/s390/include/asm/smp.h54
-rw-r--r--arch/s390/include/asm/sockios.h21
-rw-r--r--arch/s390/include/asm/termbits.h206
-rw-r--r--arch/s390/include/asm/todclk.h23
-rw-r--r--arch/s390/include/asm/uaccess.h2
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/compat_linux.c6
-rw-r--r--arch/s390/kernel/compat_linux.h4
-rw-r--r--arch/s390/kernel/head64.S3
-rw-r--r--arch/s390/kernel/setup.c36
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kernel/vdso.c9
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/lib/uaccess_mvcos.c4
-rw-r--r--arch/s390/lib/uaccess_pt.c147
-rw-r--r--arch/s390/mm/cmm.c61
-rw-r--r--arch/s390/mm/fault.c378
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/s390/mm/vmem.c11
27 files changed, 371 insertions, 658 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 16c673096a22..c80235206c01 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -220,23 +220,8 @@ config AUDIT_ARCH
bool
default y
-config S390_SWITCH_AMODE
- bool "Switch kernel/user addressing modes"
- help
- This option allows to switch the addressing modes of kernel and user
- space. The kernel parameter switch_amode=on will enable this feature,
- default is disabled. Enabling this (via kernel parameter) on machines
- earlier than IBM System z9-109 EC/BC will reduce system performance.
-
- Note that this option will also be selected by selecting the execute
- protection option below. Enabling the execute protection via the
- noexec kernel parameter will also switch the addressing modes,
- independent of the switch_amode kernel parameter.
-
-
config S390_EXEC_PROTECT
bool "Data execute protection"
- select S390_SWITCH_AMODE
help
This option allows to enable a buffer overflow protection for user
space programs and it also selects the addressing mode option above.
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index ab4464486b7a..f4e53c6708dc 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -185,7 +185,6 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_COMPAT=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_AUDIT_ARCH=y
-CONFIG_S390_SWITCH_AMODE=y
CONFIG_S390_EXEC_PROTECT=y
#
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index ae7c8f9f94a5..2a113d6a7dfd 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -21,7 +21,7 @@
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
#define __CS_LOOP(ptr, op_val, op_string) ({ \
- typeof(ptr->counter) old_val, new_val; \
+ int old_val, new_val; \
asm volatile( \
" l %0,%2\n" \
"0: lr %1,%0\n" \
@@ -38,7 +38,7 @@
#else /* __GNUC__ */
#define __CS_LOOP(ptr, op_val, op_string) ({ \
- typeof(ptr->counter) old_val, new_val; \
+ int old_val, new_val; \
asm volatile( \
" l %0,0(%3)\n" \
"0: lr %1,%0\n" \
@@ -143,7 +143,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
#define __CSG_LOOP(ptr, op_val, op_string) ({ \
- typeof(ptr->counter) old_val, new_val; \
+ long long old_val, new_val; \
asm volatile( \
" lg %0,%2\n" \
"0: lgr %1,%0\n" \
@@ -160,7 +160,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
#else /* __GNUC__ */
#define __CSG_LOOP(ptr, op_val, op_string) ({ \
- typeof(ptr->counter) old_val, new_val; \
+ long long old_val, new_val; \
asm volatile( \
" lg %0,0(%3)\n" \
"0: lgr %1,%0\n" \
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 2a5419551176..f4bd346a52d3 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -142,6 +142,8 @@ struct ccw1;
extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long);
extern int ccw_device_set_options(struct ccw_device *, unsigned long);
extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
+int ccw_device_is_pathgroup(struct ccw_device *cdev);
+int ccw_device_is_multipath(struct ccw_device *cdev);
/* Allow for i/o completion notification after primary interrupt status. */
#define CCWDEV_EARLY_NOTIFICATION 0x0001
@@ -151,6 +153,8 @@ extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
#define CCWDEV_DO_PATHGROUP 0x0004
/* Allow forced onlining of boxed devices. */
#define CCWDEV_ALLOW_FORCE 0x0008
+/* Try to use multipath mode. */
+#define CCWDEV_DO_MULTIPATH 0x0010
extern int ccw_device_start(struct ccw_device *, struct ccw1 *,
unsigned long, __u8, unsigned long);
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fc7edd6f41b6..976e273988c2 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -36,7 +36,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 1;
mm->context.alloc_pgste = 1;
} else {
- mm->context.noexec = s390_noexec;
+ mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE);
mm->context.has_pgste = 0;
mm->context.alloc_pgste = 0;
}
@@ -58,7 +58,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
pgd_t *pgd = mm->pgd;
S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
- if (switch_amode) {
+ if (user_mode != HOME_SPACE_MODE) {
/* Load primary space page table origin. */
pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd;
S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index ddad5903341c..68940d0bad91 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -143,7 +143,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
spin_lock_init(&mm->context.list_lock);
INIT_LIST_HEAD(&mm->context.crst_list);
INIT_LIST_HEAD(&mm->context.pgtable_list);
- return (pgd_t *) crst_table_alloc(mm, s390_noexec);
+ return (pgd_t *)
+ crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE);
}
#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 60a7b1a1702f..e2fa79cf0614 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -169,12 +169,13 @@ extern unsigned long VMALLOC_START;
* STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
*
* A 64 bit pagetable entry of S390 has following format:
- * | PFRA |0IP0| OS |
+ * | PFRA |0IPC| OS |
* 0000000000111111111122222222223333333333444444444455555555556666
* 0123456789012345678901234567890123456789012345678901234567890123
*
* I Page-Invalid Bit: Page is not available for address-translation
* P Page-Protection Bit: Store access not possible for page
+ * C Change-bit override: HW is not required to set change bit
*
* A 64 bit segmenttable entry of S390 has following format:
* | P-table origin | TT
@@ -218,6 +219,7 @@ extern unsigned long VMALLOC_START;
*/
/* Hardware bits in the page table entry */
+#define _PAGE_CO 0x100 /* HW Change-bit override */
#define _PAGE_RO 0x200 /* HW read-only bit */
#define _PAGE_INVALID 0x400 /* HW invalid bit */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e37478e87286..52a779c337e8 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -49,17 +49,12 @@ extern unsigned long memory_end;
void detect_memory_layout(struct mem_chunk chunk[]);
-#ifdef CONFIG_S390_SWITCH_AMODE
-extern unsigned int switch_amode;
-#else
-#define switch_amode (0)
-#endif
-
-#ifdef CONFIG_S390_EXEC_PROTECT
-extern unsigned int s390_noexec;
-#else
-#define s390_noexec (0)
-#endif
+#define PRIMARY_SPACE_MODE 0
+#define ACCESS_REGISTER_MODE 1
+#define SECONDARY_SPACE_MODE 2
+#define HOME_SPACE_MODE 3
+
+extern unsigned int user_mode;
/*
* Machine features detected in head.S
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index a868b272c257..2ab1141eeb50 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -1,57 +1,22 @@
/*
- * include/asm-s390/smp.h
- *
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Martin Schwidefsky (schwidefsky@de.ibm.com)
- * Heiko Carstens (heiko.carstens@de.ibm.com)
+ * Copyright IBM Corp. 1999,2009
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/bitops.h>
+#ifdef CONFIG_SMP
-#if defined(__KERNEL__) && defined(CONFIG_SMP) && !defined(__ASSEMBLY__)
-
-#include <asm/lowcore.h>
-#include <asm/sigp.h>
-#include <asm/ptrace.h>
#include <asm/system.h>
-
-/*
- s390 specific smp.c headers
- */
-typedef struct
-{
- int intresting;
- sigp_ccode ccode;
- __u32 status;
- __u16 cpu;
-} sigp_info;
+#include <asm/sigp.h>
extern void machine_restart_smp(char *);
extern void machine_halt_smp(void);
extern void machine_power_off_smp(void);
-#define NO_PROC_ID 0xFF /* No processor magic marker */
-
-/*
- * This magic constant controls our willingness to transfer
- * a process across CPUs. Such a transfer incurs misses on the L1
- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- * gut feeling is this will vary by board in value. For a board
- * with separate L2 cache it probably depends also on the RSS, and
- * for a board with shared L2 cache it ought to decay fast as other
- * processes are run.
- */
-
-#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */
-
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
-#define cpu_logical_map(cpu) (cpu)
extern int __cpu_disable (void);
extern void __cpu_die (unsigned int cpu);
@@ -64,7 +29,9 @@ extern int smp_cpu_polarization[];
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-#endif
+extern union save_area *zfcpdump_save_areas[NR_CPUS + 1];
+
+#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
@@ -72,5 +39,4 @@ extern int smp_rescan_cpus(void);
static inline int smp_rescan_cpus(void) { return 0; }
#endif
-extern union save_area *zfcpdump_save_areas[NR_CPUS + 1];
-#endif
+#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/sockios.h b/arch/s390/include/asm/sockios.h
index f4fc16c7da59..6f60eee73242 100644
--- a/arch/s390/include/asm/sockios.h
+++ b/arch/s390/include/asm/sockios.h
@@ -1,21 +1,6 @@
-/*
- * include/asm-s390/sockios.h
- *
- * S390 version
- *
- * Derived from "include/asm-i386/sockios.h"
- */
+#ifndef _ASM_S390_SOCKIOS_H
+#define _ASM_S390_SOCKIOS_H
-#ifndef __ARCH_S390_SOCKIOS__
-#define __ARCH_S390_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#include <asm-generic/sockios.h>
#endif
diff --git a/arch/s390/include/asm/termbits.h b/arch/s390/include/asm/termbits.h
index 58731853d529..71bf6ac6a2b9 100644
--- a/arch/s390/include/asm/termbits.h
+++ b/arch/s390/include/asm/termbits.h
@@ -1,206 +1,6 @@
-/*
- * include/asm-s390/termbits.h
- *
- * S390 version
- *
- * Derived from "include/asm-i386/termbits.h"
- */
+#ifndef _ASM_S390_TERMBITS_H
+#define _ASM_S390_TERMBITS_H
-#ifndef __ARCH_S390_TERMBITS_H__
-#define __ARCH_S390_TERMBITS_H__
-
-#include <linux/posix_types.h>
-
-typedef unsigned char cc_t;
-typedef unsigned int speed_t;
-typedef unsigned int tcflag_t;
-
-#define NCCS 19
-struct termios {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
-};
-
-struct termios2 {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
- speed_t c_ispeed; /* input speed */
- speed_t c_ospeed; /* output speed */
-};
-
-struct ktermios {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
- speed_t c_ispeed; /* input speed */
- speed_t c_ospeed; /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-#define IGNBRK 0000001
-#define BRKINT 0000002
-#define IGNPAR 0000004
-#define PARMRK 0000010
-#define INPCK 0000020
-#define ISTRIP 0000040
-#define INLCR 0000100
-#define IGNCR 0000200
-#define ICRNL 0000400
-#define IUCLC 0001000
-#define IXON 0002000
-#define IXANY 0004000
-#define IXOFF 0010000
-#define IMAXBEL 0020000
-#define IUTF8 0040000
-
-/* c_oflag bits */
-#define OPOST 0000001
-#define OLCUC 0000002
-#define ONLCR 0000004
-#define OCRNL 0000010
-#define ONOCR 0000020
-#define ONLRET 0000040
-#define OFILL 0000100
-#define OFDEL 0000200
-#define NLDLY 0000400
-#define NL0 0000000
-#define NL1 0000400
-#define CRDLY 0003000
-#define CR0 0000000
-#define CR1 0001000
-#define CR2 0002000
-#define CR3 0003000
-#define TABDLY 0014000
-#define TAB0 0000000
-#define TAB1 0004000
-#define TAB2 0010000
-#define TAB3 0014000
-#define XTABS 0014000
-#define BSDLY 0020000
-#define BS0 0000000
-#define BS1 0020000
-#define VTDLY 0040000
-#define VT0 0000000
-#define VT1 0040000
-#define FFDLY 0100000
-#define FF0 0000000
-#define FF1 0100000
-
-/* c_cflag bit meaning */
-#define CBAUD 0010017
-#define B0 0000000 /* hang up */
-#define B50 0000001
-#define B75 0000002
-#define B110 0000003
-#define B134 0000004
-#define B150 0000005
-#define B200 0000006
-#define B300 0000007
-#define B600 0000010
-#define B1200 0000011
-#define B1800 0000012
-#define B2400 0000013
-#define B4800 0000014
-#define B9600 0000015
-#define B19200 0000016
-#define B38400 0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE 0000060
-#define CS5 0000000
-#define CS6 0000020
-#define CS7 0000040
-#define CS8 0000060
-#define CSTOPB 0000100
-#define CREAD 0000200
-#define PARENB 0000400
-#define PARODD 0001000
-#define HUPCL 0002000
-#define CLOCAL 0004000
-#define CBAUDEX 0010000
-#define BOTHER 0010000
-#define B57600 0010001
-#define B115200 0010002
-#define B230400 0010003
-#define B460800 0010004
-#define B500000 0010005
-#define B576000 0010006
-#define B921600 0010007
-#define B1000000 0010010
-#define B1152000 0010011
-#define B1500000 0010012
-#define B2000000 0010013
-#define B2500000 0010014
-#define B3000000 0010015
-#define B3500000 0010016
-#define B4000000 0010017
-#define CIBAUD 002003600000 /* input baud rate */
-#define CMSPAR 010000000000 /* mark or space (stick) parity */
-#define CRTSCTS 020000000000 /* flow control */
-
-#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG 0000001
-#define ICANON 0000002
-#define XCASE 0000004
-#define ECHO 0000010
-#define ECHOE 0000020
-#define ECHOK 0000040
-#define ECHONL 0000100
-#define NOFLSH 0000200
-#define TOSTOP 0000400
-#define ECHOCTL 0001000
-#define ECHOPRT 0002000
-#define ECHOKE 0004000
-#define FLUSHO 0010000
-#define PENDIN 0040000
-#define IEXTEN 0100000
-
-/* tcflow() and TCXONC use these */
-#define TCOOFF 0
-#define TCOON 1
-#define TCIOFF 2
-#define TCION 3
-
-/* tcflush() and TCFLSH use these */
-#define TCIFLUSH 0
-#define TCOFLUSH 1
-#define TCIOFLUSH 2
-
-/* tcsetattr uses these */
-#define TCSANOW 0
-#define TCSADRAIN 1
-#define TCSAFLUSH 2
+#include <asm-generic/termbits.h>
#endif
diff --git a/arch/s390/include/asm/todclk.h b/arch/s390/include/asm/todclk.h
deleted file mode 100644
index c7f62055488a..000000000000
--- a/arch/s390/include/asm/todclk.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * File...........: linux/include/asm/todclk.h
- * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
- * Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
- *
- * History of changes (starts July 2000)
- */
-
-#ifndef __ASM_TODCLK_H
-#define __ASM_TODCLK_H
-
-#ifdef __KERNEL__
-
-#define TOD_uSEC (0x1000ULL)
-#define TOD_mSEC (1000 * TOD_uSEC)
-#define TOD_SEC (1000 * TOD_mSEC)
-#define TOD_MIN (60 * TOD_SEC)
-#define TOD_HOUR (60 * TOD_MIN)
-
-#endif
-
-#endif
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 8377e91533d2..cbf0a8745bf4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -93,6 +93,8 @@ extern struct uaccess_ops uaccess_mvcos;
extern struct uaccess_ops uaccess_mvcos_switch;
extern struct uaccess_ops uaccess_pt;
+extern int __handle_fault(unsigned long, unsigned long, int);
+
static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
{
size = uaccess.copy_to_user_small(size, ptr, x);
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index c7be8e10b87e..683f6381cc59 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
# Kexec part
S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index fda1a8123f9b..25c31d681402 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -31,14 +31,8 @@
#include <linux/shm.h>
#include <linux/slab.h>
#include <linux/uio.h>
-#include <linux/nfs_fs.h>
#include <linux/quota.h>
#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 45e9092b3aad..cb97afc85c94 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -4,10 +4,6 @@
#include <linux/compat.h>
#include <linux/socket.h>
#include <linux/syscalls.h>
-#include <linux/nfs_fs.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/export.h>
/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
/* to a 64 bit pointer */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 6a250808092b..d984a2a380c3 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -83,6 +83,8 @@ startup_continue:
slr %r0,%r0 # set cpuid to zero
sigp %r1,%r0,0x12 # switch to esame mode
sam64 # switch to 64 bit mode
+ llgfr %r13,%r13 # clear high-order half of base reg
+ lmh %r0,%r15,.Lzero64-.LPG1(%r13) # clear high-order half
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
@@ -127,6 +129,7 @@ startup_continue:
.L4malign:.quad 0xffffffffffc00000
.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
.Lnop: .long 0x07000700
+.Lzero64:.fill 16,4,0x0
#ifdef CONFIG_ZFCPDUMP
.Lcurrent_cpu:
.long 0x0
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 061479ff029f..0663287fa1b3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,9 +305,8 @@ static int __init early_parse_mem(char *p)
}
early_param("mem", early_parse_mem);
-#ifdef CONFIG_S390_SWITCH_AMODE
-unsigned int switch_amode = 0;
-EXPORT_SYMBOL_GPL(switch_amode);
+unsigned int user_mode = HOME_SPACE_MODE;
+EXPORT_SYMBOL_GPL(user_mode);
static int set_amode_and_uaccess(unsigned long user_amode,
unsigned long user32_amode)
@@ -340,23 +339,29 @@ static int set_amode_and_uaccess(unsigned long user_amode,
*/
static int __init early_parse_switch_amode(char *p)
{
- switch_amode = 1;
+ if (user_mode != SECONDARY_SPACE_MODE)
+ user_mode = PRIMARY_SPACE_MODE;
return 0;
}
early_param("switch_amode", early_parse_switch_amode);
-#else /* CONFIG_S390_SWITCH_AMODE */
-static inline int set_amode_and_uaccess(unsigned long user_amode,
- unsigned long user32_amode)
+static int __init early_parse_user_mode(char *p)
{
+ if (p && strcmp(p, "primary") == 0)
+ user_mode = PRIMARY_SPACE_MODE;
+#ifdef CONFIG_S390_EXEC_PROTECT
+ else if (p && strcmp(p, "secondary") == 0)
+ user_mode = SECONDARY_SPACE_MODE;
+#endif
+ else if (!p || strcmp(p, "home") == 0)
+ user_mode = HOME_SPACE_MODE;
+ else
+ return 1;
return 0;
}
-#endif /* CONFIG_S390_SWITCH_AMODE */
+early_param("user_mode", early_parse_user_mode);
#ifdef CONFIG_S390_EXEC_PROTECT
-unsigned int s390_noexec = 0;
-EXPORT_SYMBOL_GPL(s390_noexec);
-
/*
* Enable execute protection?
*/
@@ -364,8 +369,7 @@ static int __init early_parse_noexec(char *p)
{
if (!strncmp(p, "off", 3))
return 0;
- switch_amode = 1;
- s390_noexec = 1;
+ user_mode = SECONDARY_SPACE_MODE;
return 0;
}
early_param("noexec", early_parse_noexec);
@@ -373,7 +377,7 @@ early_param("noexec", early_parse_noexec);
static void setup_addressing_mode(void)
{
- if (s390_noexec) {
+ if (user_mode == SECONDARY_SPACE_MODE) {
if (set_amode_and_uaccess(PSW_ASC_SECONDARY,
PSW32_ASC_SECONDARY))
pr_info("Execute protection active, "
@@ -381,7 +385,7 @@ static void setup_addressing_mode(void)
else
pr_info("Execute protection active, "
"mvcos not available\n");
- } else if (switch_amode) {
+ } else if (user_mode == PRIMARY_SPACE_MODE) {
if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY))
pr_info("Address spaces switched, "
"mvcos available\n");
@@ -411,7 +415,7 @@ setup_lowcore(void)
lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
lc->restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- if (switch_amode)
+ if (user_mode != HOME_SPACE_MODE)
lc->restart_psw.mask |= PSW_ASC_HOME;
lc->external_new_psw.mask = psw_kernel_bits;
lc->external_new_psw.addr =
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 68e1ecf5ebab..65065ac48ed3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -335,7 +335,7 @@ int get_sync_clock(unsigned long long *clock)
sw0 = atomic_read(sw_ptr);
*clock = get_clock();
sw1 = atomic_read(sw_ptr);
- put_cpu_var(clock_sync_sync);
+ put_cpu_var(clock_sync_word);
if (sw0 == sw1 && (sw0 & 0x80000000U))
/* Success: time is in sync. */
return 0;
@@ -385,7 +385,7 @@ static inline int check_sync_clock(void)
sw_ptr = &get_cpu_var(clock_sync_word);
rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
- put_cpu_var(clock_sync_sync);
+ put_cpu_var(clock_sync_word);
return rc;
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index adfb32aa6d59..5f99e66c51c3 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -86,7 +86,8 @@ static void vdso_init_data(struct vdso_data *vd)
unsigned int facility_list;
facility_list = stfl();
- vd->ectg_available = switch_amode && (facility_list & 1);
+ vd->ectg_available =
+ user_mode != HOME_SPACE_MODE && (facility_list & 1);
}
#ifdef CONFIG_64BIT
@@ -114,7 +115,7 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
lowcore->vdso_per_cpu_data = __LC_PASTE;
- if (!switch_amode || !vdso_enabled)
+ if (user_mode == HOME_SPACE_MODE || !vdso_enabled)
return 0;
segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
@@ -160,7 +161,7 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
- if (!switch_amode || !vdso_enabled)
+ if (user_mode == HOME_SPACE_MODE || !vdso_enabled)
return;
psal = (u32 *)(addr_t) lowcore->paste[4];
@@ -184,7 +185,7 @@ static void __vdso_init_cr5(void *dummy)
static void vdso_init_cr5(void)
{
- if (switch_amode && vdso_enabled)
+ if (user_mode != HOME_SPACE_MODE && vdso_enabled)
on_each_cpu(__vdso_init_cr5, NULL, 1);
}
#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index bf164fc21864..6ee55ae84ce2 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
depends on HAVE_KVM && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES
- select S390_SWITCH_AMODE
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
index 58da3f461214..60455f104ea3 100644
--- a/arch/s390/lib/uaccess_mvcos.c
+++ b/arch/s390/lib/uaccess_mvcos.c
@@ -162,7 +162,6 @@ static size_t clear_user_mvcos(size_t size, void __user *to)
return size;
}
-#ifdef CONFIG_S390_SWITCH_AMODE
static size_t strnlen_user_mvcos(size_t count, const char __user *src)
{
char buf[256];
@@ -200,7 +199,6 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src,
} while ((len_str == len) && (done < count));
return done;
}
-#endif /* CONFIG_S390_SWITCH_AMODE */
struct uaccess_ops uaccess_mvcos = {
.copy_from_user = copy_from_user_mvcos_check,
@@ -215,7 +213,6 @@ struct uaccess_ops uaccess_mvcos = {
.futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
};
-#ifdef CONFIG_S390_SWITCH_AMODE
struct uaccess_ops uaccess_mvcos_switch = {
.copy_from_user = copy_from_user_mvcos,
.copy_from_user_small = copy_from_user_mvcos,
@@ -228,4 +225,3 @@ struct uaccess_ops uaccess_mvcos_switch = {
.futex_atomic_op = futex_atomic_op_pt,
.futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
};
-#endif
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index cb5d59eab0ee..404f2de296dc 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -23,86 +23,21 @@ static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr)
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return NULL;
+ return (pte_t *) 0x3a;
pud = pud_offset(pgd, addr);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- return NULL;
+ return (pte_t *) 0x3b;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return NULL;
+ return (pte_t *) 0x10;
return pte_offset_map(pmd, addr);
}
-static int __handle_fault(struct mm_struct *mm, unsigned long address,
- int write_access)
-{
- struct vm_area_struct *vma;
- int ret = -EFAULT;
- int fault;
-
- if (in_atomic())
- return ret;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
- if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > address)) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (expand_stack(vma, address))
- goto out;
- }
-
- if (!write_access) {
- /* page not present, check vm flags */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto out;
- } else {
- if (!(vma->vm_flags & VM_WRITE))
- goto out;
- }
-
-survive:
- fault = handle_mm_fault(mm, vma, address, write_access ? FAULT_FLAG_WRITE : 0);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto out_sigbus;
- BUG();
- }
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
- ret = 0;
-out:
- up_read(&mm->mmap_sem);
- return ret;
-
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_global_init(current)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", current->comm);
- return ret;
-
-out_sigbus:
- up_read(&mm->mmap_sem);
- current->thread.prot_addr = address;
- current->thread.trap_no = 0x11;
- force_sig(SIGBUS, current);
- return ret;
-}
-
-static size_t __user_copy_pt(unsigned long uaddr, void *kptr,
- size_t n, int write_user)
+static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
+ size_t n, int write_user)
{
struct mm_struct *mm = current->mm;
unsigned long offset, pfn, done, size;
@@ -114,12 +49,17 @@ retry:
spin_lock(&mm->page_table_lock);
do {
pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte) ||
- (write_user && !pte_write(*pte)))
+ if ((unsigned long) pte < 0x1000)
goto fault;
+ if (!pte_present(*pte)) {
+ pte = (pte_t *) 0x11;
+ goto fault;
+ } else if (write_user && !pte_write(*pte)) {
+ pte = (pte_t *) 0x04;
+ goto fault;
+ }
pfn = pte_pfn(*pte);
-
offset = uaddr & (PAGE_SIZE - 1);
size = min(n - done, PAGE_SIZE - offset);
if (write_user) {
@@ -137,7 +77,7 @@ retry:
return n - done;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, write_user))
+ if (__handle_fault(uaddr, (unsigned long) pte, write_user))
return n - done;
goto retry;
}
@@ -146,30 +86,31 @@ fault:
* Do DAT for user address by page table walk, return kernel address.
* This function needs to be called with current->mm->page_table_lock held.
*/
-static unsigned long __dat_user_addr(unsigned long uaddr)
+static __always_inline unsigned long __dat_user_addr(unsigned long uaddr)
{
struct mm_struct *mm = current->mm;
- unsigned long pfn, ret;
+ unsigned long pfn;
pte_t *pte;
int rc;
- ret = 0;
retry:
pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte))
+ if ((unsigned long) pte < 0x1000)
goto fault;
+ if (!pte_present(*pte)) {
+ pte = (pte_t *) 0x11;
+ goto fault;
+ }
pfn = pte_pfn(*pte);
- ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
-out:
- return ret;
+ return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
fault:
spin_unlock(&mm->page_table_lock);
- rc = __handle_fault(mm, uaddr, 0);
+ rc = __handle_fault(uaddr, (unsigned long) pte, 0);
spin_lock(&mm->page_table_lock);
- if (rc)
- goto out;
- goto retry;
+ if (!rc)
+ goto retry;
+ return 0;
}
size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
@@ -234,8 +175,12 @@ retry:
spin_lock(&mm->page_table_lock);
do {
pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte))
+ if ((unsigned long) pte < 0x1000)
+ goto fault;
+ if (!pte_present(*pte)) {
+ pte = (pte_t *) 0x11;
goto fault;
+ }
pfn = pte_pfn(*pte);
offset = uaddr & (PAGE_SIZE-1);
@@ -249,9 +194,8 @@ retry:
return done + 1;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, 0)) {
+ if (__handle_fault(uaddr, (unsigned long) pte, 0))
return 0;
- }
goto retry;
}
@@ -284,7 +228,7 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
{
struct mm_struct *mm = current->mm;
unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
- uaddr, done, size;
+ uaddr, done, size, error_code;
unsigned long uaddr_from = (unsigned long) from;
unsigned long uaddr_to = (unsigned long) to;
pte_t *pte_from, *pte_to;
@@ -298,17 +242,28 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
retry:
spin_lock(&mm->page_table_lock);
do {
+ write_user = 0;
+ uaddr = uaddr_from;
pte_from = follow_table(mm, uaddr_from);
- if (!pte_from || !pte_present(*pte_from)) {
- uaddr = uaddr_from;
- write_user = 0;
+ error_code = (unsigned long) pte_from;
+ if (error_code < 0x1000)
+ goto fault;
+ if (!pte_present(*pte_from)) {
+ error_code = 0x11;
goto fault;
}
+ write_user = 1;
+ uaddr = uaddr_to;
pte_to = follow_table(mm, uaddr_to);
- if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) {
- uaddr = uaddr_to;
- write_user = 1;
+ error_code = (unsigned long) pte_to;
+ if (error_code < 0x1000)
+ goto fault;
+ if (!pte_present(*pte_to)) {
+ error_code = 0x11;
+ goto fault;
+ } else if (!pte_write(*pte_to)) {
+ error_code = 0x04;
goto fault;
}
@@ -329,7 +284,7 @@ retry:
return n - done;
fault:
spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, write_user))
+ if (__handle_fault(uaddr, error_code, write_user))
return n - done;
goto retry;
}
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index ff58779bf7e9..76a3637b88e0 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -18,6 +18,7 @@
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/oom.h>
+#include <linux/suspend.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -44,6 +45,7 @@ static volatile long cmm_pages_target;
static volatile long cmm_timed_pages_target;
static long cmm_timeout_pages;
static long cmm_timeout_seconds;
+static int cmm_suspended;
static struct cmm_page_array *cmm_page_list;
static struct cmm_page_array *cmm_timed_page_list;
@@ -147,9 +149,9 @@ cmm_thread(void *dummy)
while (1) {
rc = wait_event_interruptible(cmm_thread_wait,
- (cmm_pages != cmm_pages_target ||
- cmm_timed_pages != cmm_timed_pages_target ||
- kthread_should_stop()));
+ (!cmm_suspended && (cmm_pages != cmm_pages_target ||
+ cmm_timed_pages != cmm_timed_pages_target)) ||
+ kthread_should_stop());
if (kthread_should_stop() || rc == -ERESTARTSYS) {
cmm_pages_target = cmm_pages;
cmm_timed_pages_target = cmm_timed_pages;
@@ -410,6 +412,38 @@ cmm_smsg_target(char *from, char *msg)
static struct ctl_table_header *cmm_sysctl_header;
+static int cmm_suspend(void)
+{
+ cmm_suspended = 1;
+ cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+ cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+ return 0;
+}
+
+static int cmm_resume(void)
+{
+ cmm_suspended = 0;
+ cmm_kick_thread();
+ return 0;
+}
+
+static int cmm_power_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ switch (event) {
+ case PM_POST_HIBERNATION:
+ return cmm_resume();
+ case PM_HIBERNATION_PREPARE:
+ return cmm_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block cmm_power_notifier = {
+ .notifier_call = cmm_power_event,
+};
+
static int
cmm_init (void)
{
@@ -418,7 +452,7 @@ cmm_init (void)
#ifdef CONFIG_CMM_PROC
cmm_sysctl_header = register_sysctl_table(cmm_dir_table);
if (!cmm_sysctl_header)
- goto out;
+ goto out_sysctl;
#endif
#ifdef CONFIG_CMM_IUCV
rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
@@ -428,17 +462,21 @@ cmm_init (void)
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
goto out_oom_notify;
+ rc = register_pm_notifier(&cmm_power_notifier);
+ if (rc)
+ goto out_pm;
init_waitqueue_head(&cmm_thread_wait);
init_timer(&cmm_timer);
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
- if (!rc)
- goto out;
- /*
- * kthread_create failed. undo all the stuff from above again.
- */
- unregister_oom_notifier(&cmm_oom_nb);
+ if (rc)
+ goto out_kthread;
+ return 0;
+out_kthread:
+ unregister_pm_notifier(&cmm_power_notifier);
+out_pm:
+ unregister_oom_notifier(&cmm_oom_nb);
out_oom_notify:
#ifdef CONFIG_CMM_IUCV
smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
@@ -446,8 +484,8 @@ out_smsg:
#endif
#ifdef CONFIG_CMM_PROC
unregister_sysctl_table(cmm_sysctl_header);
+out_sysctl:
#endif
-out:
return rc;
}
@@ -455,6 +493,7 @@ static void
cmm_exit(void)
{
kthread_stop(cmm_thread_ptr);
+ unregister_pm_notifier(&cmm_power_notifier);
unregister_oom_notifier(&cmm_oom_nb);
cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 6d507462967a..fc102e70d9c2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -34,16 +34,15 @@
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
#include <asm/mmu_context.h>
+#include <asm/compat.h>
#include "../kernel/entry.h"
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
-#define __FIXUP_MASK 0x7fffffff
#define __SUBCODE_MASK 0x0200
#define __PF_RES_FIELD 0ULL
#else /* CONFIG_64BIT */
#define __FAIL_ADDR_MASK -4096L
-#define __FIXUP_MASK ~0L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
#endif /* CONFIG_64BIT */
@@ -52,11 +51,15 @@
extern int sysctl_userprocess_debug;
#endif
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, long err)
+#define VM_FAULT_BADCONTEXT 0x010000
+#define VM_FAULT_BADMAP 0x020000
+#define VM_FAULT_BADACCESS 0x040000
+
+static inline int notify_page_fault(struct pt_regs *regs)
{
int ret = 0;
+#ifdef CONFIG_KPROBES
/* kprobe_running() needs smp_processor_id() */
if (!user_mode(regs)) {
preempt_disable();
@@ -64,15 +67,9 @@ static inline int notify_page_fault(struct pt_regs *regs, long err)
ret = 1;
preempt_enable();
}
-
+#endif
return ret;
}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, long err)
-{
- return 0;
-}
-#endif
/*
@@ -100,57 +97,50 @@ void bust_spinlocks(int yes)
/*
* Returns the address space associated with the fault.
- * Returns 0 for kernel space, 1 for user space and
- * 2 for code execution in user space with noexec=on.
+ * Returns 0 for kernel space and 1 for user space.
*/
-static inline int check_space(struct task_struct *tsk)
+static inline int user_space_fault(unsigned long trans_exc_code)
{
/*
- * The lowest two bits of S390_lowcore.trans_exc_code
- * indicate which paging table was used.
+ * The lowest two bits of the translation exception
+ * identification indicate which paging table was used.
*/
- int desc = S390_lowcore.trans_exc_code & 3;
-
- if (desc == 3) /* Home Segment Table Descriptor */
- return switch_amode == 0;
- if (desc == 2) /* Secondary Segment Table Descriptor */
- return tsk->thread.mm_segment.ar4;
-#ifdef CONFIG_S390_SWITCH_AMODE
- if (unlikely(desc == 1)) { /* STD determined via access register */
- /* %a0 always indicates primary space. */
- if (S390_lowcore.exc_access_id != 0) {
- save_access_regs(tsk->thread.acrs);
- /*
- * An alet of 0 indicates primary space.
- * An alet of 1 indicates secondary space.
- * Any other alet values generate an
- * alen-translation exception.
- */
- if (tsk->thread.acrs[S390_lowcore.exc_access_id])
- return tsk->thread.mm_segment.ar4;
- }
- }
-#endif
- /* Primary Segment Table Descriptor */
- return switch_amode << s390_noexec;
+ trans_exc_code &= 3;
+ if (trans_exc_code == 2)
+ /* Access via secondary space, set_fs setting decides */
+ return current->thread.mm_segment.ar4;
+ if (user_mode == HOME_SPACE_MODE)
+ /* User space if the access has been done via home space. */
+ return trans_exc_code == 3;
+ /*
+ * If the user space is not the home space the kernel runs in home
+ * space. Access via secondary space has already been covered,
+ * access via primary space or access register is from user space
+ * and access via home space is from the kernel.
+ */
+ return trans_exc_code != 3;
}
/*
* Send SIGSEGV to task. This is an external routine
* to keep the stack usage of do_page_fault small.
*/
-static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
- int si_code, unsigned long address)
+static noinline void do_sigsegv(struct pt_regs *regs, long int_code,
+ int si_code, unsigned long trans_exc_code)
{
struct siginfo si;
+ unsigned long address;
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+ current->thread.prot_addr = address;
+ current->thread.trap_no = int_code;
#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
#if defined(CONFIG_SYSCTL)
if (sysctl_userprocess_debug)
#endif
{
printk("User process fault: interruption code 0x%lX\n",
- error_code);
+ int_code);
printk("failing address: %lX\n", address);
show_regs(regs);
}
@@ -161,13 +151,14 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
force_sig_info(SIGSEGV, &si, current);
}
-static void do_no_context(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static noinline void do_no_context(struct pt_regs *regs, long int_code,
+ unsigned long trans_exc_code)
{
const struct exception_table_entry *fixup;
+ unsigned long address;
/* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
+ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (fixup) {
regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
return;
@@ -177,129 +168,149 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code,
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if (check_space(current) == 0)
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+ if (!user_space_fault(trans_exc_code))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" at virtual kernel address %p\n", (void *)address);
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" at virtual user address %p\n", (void *)address);
- die("Oops", regs, error_code);
+ die("Oops", regs, int_code);
do_exit(SIGKILL);
}
-static void do_low_address(struct pt_regs *regs, unsigned long error_code)
+static noinline void do_low_address(struct pt_regs *regs, long int_code,
+ unsigned long trans_exc_code)
{
/* Low-address protection hit in kernel mode means
NULL pointer write access in kernel mode. */
if (regs->psw.mask & PSW_MASK_PSTATE) {
/* Low-address protection hit in user mode 'cannot happen'. */
- die ("Low-address protection", regs, error_code);
+ die ("Low-address protection", regs, int_code);
do_exit(SIGKILL);
}
- do_no_context(regs, error_code, 0);
+ do_no_context(regs, int_code, trans_exc_code);
}
-static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static noinline void do_sigbus(struct pt_regs *regs, long int_code,
+ unsigned long trans_exc_code)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- up_read(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- tsk->thread.prot_addr = address;
- tsk->thread.trap_no = error_code;
+ tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK;
+ tsk->thread.trap_no = int_code;
force_sig(SIGBUS, tsk);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(regs->psw.mask & PSW_MASK_PSTATE))
- do_no_context(regs, error_code, address);
}
#ifdef CONFIG_S390_EXEC_PROTECT
-static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
- unsigned long address, unsigned long error_code)
+static noinline int signal_return(struct pt_regs *regs, long int_code,
+ unsigned long trans_exc_code)
{
u16 instruction;
int rc;
-#ifdef CONFIG_COMPAT
- int compat;
-#endif
- pagefault_disable();
rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
- pagefault_enable();
- if (rc)
- return -EFAULT;
- up_read(&mm->mmap_sem);
- clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
-#ifdef CONFIG_COMPAT
- compat = is_compat_task();
- if (compat && instruction == 0x0a77)
- sys32_sigreturn();
- else if (compat && instruction == 0x0aad)
- sys32_rt_sigreturn();
- else
-#endif
- if (instruction == 0x0a77)
- sys_sigreturn();
- else if (instruction == 0x0aad)
- sys_rt_sigreturn();
- else {
- current->thread.prot_addr = address;
- current->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, SEGV_MAPERR, address);
- }
+ if (!rc && instruction == 0x0a77) {
+ clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
+ if (is_compat_task())
+ sys32_sigreturn();
+ else
+ sys_sigreturn();
+ } else if (!rc && instruction == 0x0aad) {
+ clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
+ if (is_compat_task())
+ sys32_rt_sigreturn();
+ else
+ sys_rt_sigreturn();
+ } else
+ do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
return 0;
}
#endif /* CONFIG_S390_EXEC_PROTECT */
+static noinline void do_fault_error(struct pt_regs *regs, long int_code,
+ unsigned long trans_exc_code, int fault)
+{
+ int si_code;
+
+ switch (fault) {
+ case VM_FAULT_BADACCESS:
+#ifdef CONFIG_S390_EXEC_PROTECT
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
+ (trans_exc_code & 3) == 0) {
+ signal_return(regs, int_code, trans_exc_code);
+ break;
+ }
+#endif /* CONFIG_S390_EXEC_PROTECT */
+ case VM_FAULT_BADMAP:
+ /* Bad memory access. Check if it is kernel or user space. */
+ if (regs->psw.mask & PSW_MASK_PSTATE) {
+ /* User mode accesses just cause a SIGSEGV */
+ si_code = (fault == VM_FAULT_BADMAP) ?
+ SEGV_MAPERR : SEGV_ACCERR;
+ do_sigsegv(regs, int_code, si_code, trans_exc_code);
+ return;
+ }
+ case VM_FAULT_BADCONTEXT:
+ do_no_context(regs, int_code, trans_exc_code);
+ break;
+ default: /* fault & VM_FAULT_ERROR */
+ if (fault & VM_FAULT_OOM)
+ pagefault_out_of_memory();
+ else if (fault & VM_FAULT_SIGBUS) {
+ do_sigbus(regs, int_code, trans_exc_code);
+ /* Kernel mode? Handle exceptions or die */
+ if (!(regs->psw.mask & PSW_MASK_PSTATE))
+ do_no_context(regs, int_code, trans_exc_code);
+ } else
+ BUG();
+ break;
+ }
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*
- * error_code:
+ * interruption code (int_code):
* 04 Protection -> Write-Protection (suprression)
* 10 Segment translation -> Not present (nullification)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
*/
-static inline void
-do_exception(struct pt_regs *regs, unsigned long error_code, int write)
+static inline int do_exception(struct pt_regs *regs, int access,
+ unsigned long trans_exc_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
- int space;
- int si_code;
int fault;
- if (notify_page_fault(regs, error_code))
- return;
+ if (notify_page_fault(regs))
+ return 0;
tsk = current;
mm = tsk->mm;
- /* get the failing address and the affected space */
- address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
- space = check_space(tsk);
-
/*
* Verify that the fault happened in user space, that
* we are not in an interrupt and that there is a
* user context.
*/
- if (unlikely(space == 0 || in_atomic() || !mm))
- goto no_context;
+ fault = VM_FAULT_BADCONTEXT;
+ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+ goto out;
+ address = trans_exc_code & __FAIL_ADDR_MASK;
/*
* When we get here, the fault happened in the current
* task's user address space, so we can switch on the
@@ -309,42 +320,26 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
down_read(&mm->mmap_sem);
- si_code = SEGV_MAPERR;
+ fault = VM_FAULT_BADMAP;
vma = find_vma(mm, address);
if (!vma)
- goto bad_area;
-
-#ifdef CONFIG_S390_EXEC_PROTECT
- if (unlikely((space == 2) && !(vma->vm_flags & VM_EXEC)))
- if (!signal_return(mm, regs, address, error_code))
- /*
- * signal_return() has done an up_read(&mm->mmap_sem)
- * if it returns 0.
- */
- return;
-#endif
+ goto out_up;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- si_code = SEGV_ACCERR;
- if (!write) {
- /* page not present, check vm flags */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- } else {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
+ if (unlikely(vma->vm_start > address)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_up;
+ if (expand_stack(vma, address))
+ goto out_up;
}
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+ fault = VM_FAULT_BADACCESS;
+ if (unlikely(!(vma->vm_flags & access)))
+ goto out_up;
+
if (is_vm_hugetlb_page(vma))
address &= HPAGE_MASK;
/*
@@ -352,18 +347,11 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM) {
- up_read(&mm->mmap_sem);
- pagefault_out_of_memory();
- return;
- } else if (fault & VM_FAULT_SIGBUS) {
- do_sigbus(regs, error_code, address);
- return;
- }
- BUG();
- }
+ fault = handle_mm_fault(mm, vma, address,
+ (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0);
+ if (unlikely(fault & VM_FAULT_ERROR))
+ goto out_up;
+
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
@@ -373,74 +361,69 @@ good_area:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
- up_read(&mm->mmap_sem);
/*
* The instruction that caused the program check will
* be repeated. Don't signal single step via SIGTRAP.
*/
clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
+ fault = 0;
+out_up:
up_read(&mm->mmap_sem);
-
- /* User mode accesses just cause a SIGSEGV */
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- tsk->thread.prot_addr = address;
- tsk->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, si_code, address);
- return;
- }
-
-no_context:
- do_no_context(regs, error_code, address);
+out:
+ return fault;
}
-void __kprobes do_protection_exception(struct pt_regs *regs,
- long error_code)
+void __kprobes do_protection_exception(struct pt_regs *regs, long int_code)
{
+ unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
+ int fault;
+
/* Protection exception is supressing, decrement psw address. */
- regs->psw.addr -= (error_code >> 16);
+ regs->psw.addr -= (int_code >> 16);
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
* field is not guaranteed to contain valid data in this case.
*/
- if (unlikely(!(S390_lowcore.trans_exc_code & 4))) {
- do_low_address(regs, error_code);
+ if (unlikely(!(trans_exc_code & 4))) {
+ do_low_address(regs, int_code, trans_exc_code);
return;
}
- do_exception(regs, 4, 1);
+ fault = do_exception(regs, VM_WRITE, trans_exc_code);
+ if (unlikely(fault))
+ do_fault_error(regs, 4, trans_exc_code, fault);
}
-void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs, long int_code)
{
- do_exception(regs, error_code & 0xff, 0);
+ unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
+ int access, fault;
+
+ access = VM_READ | VM_EXEC | VM_WRITE;
+#ifdef CONFIG_S390_EXEC_PROTECT
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
+ (trans_exc_code & 3) == 0)
+ access = VM_EXEC;
+#endif
+ fault = do_exception(regs, access, trans_exc_code);
+ if (unlikely(fault))
+ do_fault_error(regs, int_code & 255, trans_exc_code, fault);
}
#ifdef CONFIG_64BIT
-void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
+void __kprobes do_asce_exception(struct pt_regs *regs, long int_code)
{
- struct mm_struct *mm;
+ unsigned long trans_exc_code = S390_lowcore.trans_exc_code;
+ struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long address;
- int space;
-
- mm = current->mm;
- address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
- space = check_space(current);
- if (unlikely(space == 0 || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
goto no_context;
local_irq_enable();
down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
+ vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK);
up_read(&mm->mmap_sem);
if (vma) {
@@ -450,17 +433,38 @@ void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
/* User mode accesses just cause a SIGSEGV */
if (regs->psw.mask & PSW_MASK_PSTATE) {
- current->thread.prot_addr = address;
- current->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
return;
}
no_context:
- do_no_context(regs, error_code, address);
+ do_no_context(regs, int_code, trans_exc_code);
}
#endif
+int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user)
+{
+ struct pt_regs regs;
+ int access, fault;
+
+ regs.psw.mask = psw_kernel_bits;
+ if (!irqs_disabled())
+ regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
+ regs.psw.addr = (unsigned long) __builtin_return_address(0);
+ regs.psw.addr |= PSW_ADDR_AMODE;
+ uaddr &= PAGE_MASK;
+ access = write_user ? VM_WRITE : VM_READ;
+ fault = do_exception(&regs, access, uaddr | 2);
+ if (unlikely(fault)) {
+ if (fault & VM_FAULT_OOM) {
+ pagefault_out_of_memory();
+ fault = 0;
+ } else if (fault & VM_FAULT_SIGBUS)
+ do_sigbus(&regs, int_code, uaddr);
+ }
+ return fault ? -EFAULT : 0;
+}
+
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
@@ -522,7 +526,7 @@ void pfault_fini(void)
: : "a" (&refbk), "m" (refbk) : "cc");
}
-static void pfault_interrupt(__u16 error_code)
+static void pfault_interrupt(__u16 int_code)
{
struct task_struct *tsk;
__u16 subcode;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 2757c5616a07..ad621e06ada3 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -269,7 +269,7 @@ int s390_enable_sie(void)
struct mm_struct *mm, *old_mm;
/* Do we have switched amode? If no, we cannot do sie */
- if (!switch_amode)
+ if (user_mode == HOME_SPACE_MODE)
return -EINVAL;
/* Do we have pgstes? if yes, we are done */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 5f91a38d7592..300ab012b0fd 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -70,8 +70,12 @@ static pte_t __ref *vmem_pte_alloc(void)
pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
- PTRS_PER_PTE * sizeof(pte_t));
+ if (MACHINE_HAS_HPAGE)
+ clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY | _PAGE_CO,
+ PTRS_PER_PTE * sizeof(pte_t));
+ else
+ clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -112,7 +116,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
(address + HPAGE_SIZE <= start + size) &&
(address >= HPAGE_SIZE)) {
- pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
+ pte_val(pte) |= _SEGMENT_ENTRY_LARGE |
+ _SEGMENT_ENTRY_CO;
pmd_val(*pm_dir) = pte_val(pte);
address += HPAGE_SIZE - PAGE_SIZE;
continue;