From fa968ee215c0ca91e4a9c3a69ac2405aae6e5d2f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 Nov 2012 10:44:08 +0100 Subject: s390/signal: set correct address space control If user space is running in primary mode it can switch to secondary or access register mode, this is used e.g. in the clock_gettime code of the vdso. If a signal is delivered to the user space process while it has been running in access register mode the signal handler is executed in access register mode as well which will result in a crash most of the time. Set the address space control bits in the PSW to the default for the execution of the signal handler and make sure that the previous address space control is restored on signal return. Take care that user space can not switch to the kernel address space by modifying the registers in the signal frame. Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 2 +- arch/s390/include/uapi/asm/ptrace.h | 4 ++-- arch/s390/kernel/compat_signal.c | 14 ++++++++++++-- arch/s390/kernel/signal.c | 14 ++++++++++++-- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index a34a9d612fc0..18cd6b592650 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -20,7 +20,7 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_MASK_USER 0x00003F00UL +#define PSW32_MASK_USER 0x0000FF00UL #define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 705588a16d70..a5ca214b34fd 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -239,7 +239,7 @@ typedef struct #define PSW_MASK_EA 0x00000000UL #define PSW_MASK_BA 0x00000000UL -#define PSW_MASK_USER 0x00003F00UL +#define PSW_MASK_USER 0x0000FF00UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL @@ -269,7 +269,7 @@ typedef struct #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL -#define PSW_MASK_USER 0x00003F8180000000UL +#define PSW_MASK_USER 0x0000FF8180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a1e8a8694bb7..593fcc9253fc 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -309,6 +309,10 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC)) + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -481,7 +485,10 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (__force __u64) frame; - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); @@ -549,7 +556,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (__force __u64) frame; - regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index c13a2a37ef00..d1259d875074 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -136,6 +136,10 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC)) + regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); /* Check for invalid amode */ if (regs->psw.mask & PSW_MASK_EA) regs->psw.mask |= PSW_MASK_BA; @@ -273,7 +277,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -346,7 +353,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; - regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (psw_user_bits & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); -- cgit v1.2.3 From 658e5ce705f2a09ab681eb61ca7c8619bb7a783d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 10 Nov 2012 11:04:27 +0100 Subject: s390/topology: fix core id vs physical package id mix-up The current topology code confuses core id vs physical package id. In other words /sys/devices/system/cpu/cpuX/topology/core_id displays the physical_package_id (aka socket id) instead of the core id. The physical_package_id sysfs attribute always displays "-1" instead of the socket id. Fix this mix-up with a small patch which defines and initializes topology_physical_package_id correctly and fixes the broken core id handling. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 3 +++ arch/s390/kernel/topology.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 9ca305383760..9935cbd6a46f 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -8,6 +8,9 @@ struct cpu; #ifdef CONFIG_SCHED_BOOK +extern unsigned char cpu_socket_id[NR_CPUS]; +#define topology_physical_package_id(cpu) (cpu_socket_id[cpu]) + extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 54d93f4b6818..dd55f7c20104 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -40,6 +40,7 @@ static DEFINE_SPINLOCK(topology_lock); static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; +unsigned char cpu_socket_id[NR_CPUS]; static struct mask_info book_info; cpumask_t cpu_book_map[NR_CPUS]; @@ -83,11 +84,12 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; cpumask_set_cpu(lcpu, &core->mask); + cpu_core_id[lcpu] = rcpu; if (one_core_per_cpu) { - cpu_core_id[lcpu] = rcpu; + cpu_socket_id[lcpu] = rcpu; core = core->next; } else { - cpu_core_id[lcpu] = core->id; + cpu_socket_id[lcpu] = core->id; } smp_cpu_set_polarization(lcpu, tl_cpu->pp); } -- cgit v1.2.3 From d55c4c613fc4d4ad2ba0fc6fa2b57176d420f7e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Oct 2012 15:49:02 +0200 Subject: s390/gup: add missing TASK_SIZE check to get_user_pages_fast() When walking page tables we need to make sure that everything is within bounds of the ASCE limit of the task's address space. Otherwise we might calculate e.g. a pud pointer which is not within a pud and dereference it. So check against TASK_SIZE (which is the ASCE limit) before walking page tables. Reviewed-by: Gerald Schaefer Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 8b8285310b5a..16fb3c1615dc 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -229,7 +229,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (end < start) + if ((end < start) || (end > TASK_SIZE)) goto slow_irqon; /* -- cgit v1.2.3 From 516bad44b9f3bdcb0be6be0252b7557bf7a149e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Oct 2012 15:58:26 +0200 Subject: s390/gup: fix access_ok() usage in __get_user_pages_fast() access_ok() returns always "true" on s390. Therefore all access_ok() invocations are rather pointless. However when walking page tables we need to make sure that everything is within bounds of the ASCE limit of the task's address space. So remove the access_ok() call and add the same check we have in get_user_pages_fast(). Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/gup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 16fb3c1615dc..1f5315d1215c 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -180,8 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) + if ((end < start) || (end > TASK_SIZE)) return 0; local_irq_save(flags); -- cgit v1.2.3 From 4bffbb3455372a26816e364fb4448810f7014452 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 8 Nov 2012 14:18:47 +0100 Subject: s390/mm: have 16 byte aligned struct pages Select HAVE_ALIGNED_STRUCT_PAGE on s390, so that the slub allocator can make use of compare and swap double for lockless updates. This increases the size of struct page to 64 bytes (instead of 56 bytes), however the performance gain justifies the increased size: - now excactly four struct pages fit into a single cache line; the case that accessing a struct page causes two cache line loads does not exist anymore. - calculating the offset of a struct page within the memmap array is only a simple shift instead of a more expensive multiplication. A "hackbench 200 process 200" run on a 32 cpu system did show an 8% runtime improvement. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5dba755a43e6..d385f396dfee 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -96,6 +96,7 @@ config S390 select HAVE_MEMBLOCK_NODE_MAP select HAVE_CMPXCHG_LOCAL select HAVE_CMPXCHG_DOUBLE + select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_VIRT_CPU_ACCOUNTING select VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK -- cgit v1.2.3 From ae289dc1f474ff380e9d7601f02e4d766cbba408 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 15 Nov 2012 09:22:40 +0100 Subject: s390/3215: fix tty close handling The 3215 console always has the RAW3215_FIXED flag set, which causes raw3215_shutdown() not to wait for outstanding I/O requests if an attached tty gets closed. The flag however can be simply removed, so we can guarantee that all requests belonging to the tty have been processed when the tty is closed. However the tasklet that belongs to the 3215 device may be scheduled even if there is no tty attached anymore, since we have a race between console and tty processing. Thefore unconditional tty_wakekup() in raw3215_wakeup() can cause the following NULL pointer dereference: 3.465368 Unable to handle kernel pointer dereference at virtual kernel address (null) 3.465448 Oops: 0004 #1 SMP 3.465454 Modules linked in: 3.465459 CPU: 1 Not tainted 3.6.0 #1 3.465462 Process swapper/1 (pid: 0, task: 000000003ffa4428, ksp: 000000003ffb7ce0) 3.465466 Krnl PSW : 0404100180000000 0000000000162f86 (__wake_up+0x46/0xb8) 3.465480 R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 EA:3 Krnl GPRS: fffffffffffffffe 0000000000000000 0000000000000160 0000000000000001 3.465492 0000000000000001 0000000000000004 0000000000000004 000000000096b490 3.465499 0000000000000001 0000000000000100 0000000000000001 0000000000000001 3.465506 070000003fc87d60 0000000000000160 000000003fc87d68 000000003fc87d00 3.465526 Krnl Code: 0000000000162f76: e3c0f0a80004 lg %r12,168(%r15) 0000000000162f7c: 58000370 l %r0,880 #0000000000162f80: c007ffffffff00 xilf %r0,4294967295 >0000000000162f86: ba102000 cs %r1,%r0,0(%r2) 0000000000162f8a: 1211 ltr %r1,%r1 0000000000162f8c: a774002f brc 7,162fea 0000000000162f90: b904002d lgr %r2,%r13 0000000000162f94: b904003a lgr %r3,%r10 3.465597 Call Trace: 3.465599 (<0400000000000000> 0x400000000000000) 3.465602 <000000000048c77e> raw3215_wakeup+0x2e/0x40 3.465607 <0000000000134d66> tasklet_action+0x96/0x168 3.465612 <000000000013423c> __do_softirq+0xd8/0x21c 3.465615 <0000000000134678> irq_exit+0xa8/0xac 3.465617 <000000000046c232> do_IRQ+0x182/0x248 3.465621 <00000000005c8296> io_return+0x0/0x8 3.465625 <00000000005c7cac> vtime_stop_cpu+0x4c/0xb8 3.465629 (<0000000000194e06> tick_nohz_idle_enter+0x4e/0x74) 3.465633 <0000000000104760> cpu_idle+0x170/0x184 3.465636 <00000000005b5182> smp_start_secondary+0xd6/0xe0 3.465641 <00000000005c86be> restart_int_handler+0x56/0x6c 3.465643 <0000000000000000> 0x0 3.465645 Last Breaking-Event-Address: 3.465647 <0000000000403136> tty_wakeup+0x46/0x98 3.465652 3.465654 Kernel panic - not syncing: Fatal exception in interrupt 01: HCPGIR450W CP entered; disabled wait PSW 00020001 80000000 00000000 0010F63C The easiest solution is simply to check if tty is NULL in the tasklet. If it is NULL nothing is to do (no tty attached), otherwise tty_wakeup() can be called, since we hold a reference to the tty. This is not nice... but it is a small patch and it works. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/con3215.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 9ffb6d5f17aa..4ed343e4eb41 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -44,7 +44,6 @@ #define RAW3215_NR_CCWS 3 #define RAW3215_TIMEOUT HZ/10 /* time for delayed output */ -#define RAW3215_FIXED 1 /* 3215 console device is not be freed */ #define RAW3215_WORKING 4 /* set if a request is being worked on */ #define RAW3215_THROTTLED 8 /* set if reading is disabled */ #define RAW3215_STOPPED 16 /* set if writing is disabled */ @@ -339,8 +338,10 @@ static void raw3215_wakeup(unsigned long data) struct tty_struct *tty; tty = tty_port_tty_get(&raw->port); - tty_wakeup(tty); - tty_kref_put(tty); + if (tty) { + tty_wakeup(tty); + tty_kref_put(tty); + } } /* @@ -629,8 +630,7 @@ static void raw3215_shutdown(struct raw3215_info *raw) DECLARE_WAITQUEUE(wait, current); unsigned long flags; - if (!(raw->port.flags & ASYNC_INITIALIZED) || - (raw->flags & RAW3215_FIXED)) + if (!(raw->port.flags & ASYNC_INITIALIZED)) return; /* Wait for outstanding requests, then free irq */ spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); @@ -926,8 +926,6 @@ static int __init con3215_init(void) dev_set_drvdata(&cdev->dev, raw); cdev->handler = raw3215_irq; - raw->flags |= RAW3215_FIXED; - /* Request the console irq */ if (raw3215_startup(raw) != 0) { raw3215_free_info(raw); -- cgit v1.2.3